cache.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. """Cache Management
  2. """
  3. import errno
  4. import hashlib
  5. import logging
  6. import os
  7. from pip._vendor.packaging.utils import canonicalize_name
  8. from pip._internal import index
  9. from pip._internal.compat import expanduser
  10. from pip._internal.download import path_to_url
  11. from pip._internal.utils.temp_dir import TempDirectory
  12. from pip._internal.wheel import InvalidWheelFilename, Wheel
  13. logger = logging.getLogger(__name__)
  14. class Cache(object):
  15. """An abstract class - provides cache directories for data from links
  16. :param cache_dir: The root of the cache.
  17. :param format_control: A pip.index.FormatControl object to limit
  18. binaries being read from the cache.
  19. :param allowed_formats: which formats of files the cache should store.
  20. ('binary' and 'source' are the only allowed values)
  21. """
  22. def __init__(self, cache_dir, format_control, allowed_formats):
  23. super(Cache, self).__init__()
  24. self.cache_dir = expanduser(cache_dir) if cache_dir else None
  25. self.format_control = format_control
  26. self.allowed_formats = allowed_formats
  27. _valid_formats = {"source", "binary"}
  28. assert self.allowed_formats.union(_valid_formats) == _valid_formats
  29. def _get_cache_path_parts(self, link):
  30. """Get parts of part that must be os.path.joined with cache_dir
  31. """
  32. # We want to generate an url to use as our cache key, we don't want to
  33. # just re-use the URL because it might have other items in the fragment
  34. # and we don't care about those.
  35. key_parts = [link.url_without_fragment]
  36. if link.hash_name is not None and link.hash is not None:
  37. key_parts.append("=".join([link.hash_name, link.hash]))
  38. key_url = "#".join(key_parts)
  39. # Encode our key url with sha224, we'll use this because it has similar
  40. # security properties to sha256, but with a shorter total output (and
  41. # thus less secure). However the differences don't make a lot of
  42. # difference for our use case here.
  43. hashed = hashlib.sha224(key_url.encode()).hexdigest()
  44. # We want to nest the directories some to prevent having a ton of top
  45. # level directories where we might run out of sub directories on some
  46. # FS.
  47. parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
  48. return parts
  49. def _get_candidates(self, link, package_name):
  50. can_not_cache = (
  51. not self.cache_dir or
  52. not package_name or
  53. not link
  54. )
  55. if can_not_cache:
  56. return []
  57. canonical_name = canonicalize_name(package_name)
  58. formats = index.fmt_ctl_formats(
  59. self.format_control, canonical_name
  60. )
  61. if not self.allowed_formats.intersection(formats):
  62. return []
  63. root = self.get_path_for_link(link)
  64. try:
  65. return os.listdir(root)
  66. except OSError as err:
  67. if err.errno in {errno.ENOENT, errno.ENOTDIR}:
  68. return []
  69. raise
  70. def get_path_for_link(self, link):
  71. """Return a directory to store cached items in for link.
  72. """
  73. raise NotImplementedError()
  74. def get(self, link, package_name):
  75. """Returns a link to a cached item if it exists, otherwise returns the
  76. passed link.
  77. """
  78. raise NotImplementedError()
  79. def _link_for_candidate(self, link, candidate):
  80. root = self.get_path_for_link(link)
  81. path = os.path.join(root, candidate)
  82. return index.Link(path_to_url(path))
  83. def cleanup(self):
  84. pass
  85. class SimpleWheelCache(Cache):
  86. """A cache of wheels for future installs.
  87. """
  88. def __init__(self, cache_dir, format_control):
  89. super(SimpleWheelCache, self).__init__(
  90. cache_dir, format_control, {"binary"}
  91. )
  92. def get_path_for_link(self, link):
  93. """Return a directory to store cached wheels for link
  94. Because there are M wheels for any one sdist, we provide a directory
  95. to cache them in, and then consult that directory when looking up
  96. cache hits.
  97. We only insert things into the cache if they have plausible version
  98. numbers, so that we don't contaminate the cache with things that were
  99. not unique. E.g. ./package might have dozens of installs done for it
  100. and build a version of 0.0...and if we built and cached a wheel, we'd
  101. end up using the same wheel even if the source has been edited.
  102. :param link: The link of the sdist for which this will cache wheels.
  103. """
  104. parts = self._get_cache_path_parts(link)
  105. # Store wheels within the root cache_dir
  106. return os.path.join(self.cache_dir, "wheels", *parts)
  107. def get(self, link, package_name):
  108. candidates = []
  109. for wheel_name in self._get_candidates(link, package_name):
  110. try:
  111. wheel = Wheel(wheel_name)
  112. except InvalidWheelFilename:
  113. continue
  114. if not wheel.supported():
  115. # Built for a different python/arch/etc
  116. continue
  117. candidates.append((wheel.support_index_min(), wheel_name))
  118. if not candidates:
  119. return link
  120. return self._link_for_candidate(link, min(candidates)[1])
  121. class EphemWheelCache(SimpleWheelCache):
  122. """A SimpleWheelCache that creates it's own temporary cache directory
  123. """
  124. def __init__(self, format_control):
  125. self._temp_dir = TempDirectory(kind="ephem-wheel-cache")
  126. self._temp_dir.create()
  127. super(EphemWheelCache, self).__init__(
  128. self._temp_dir.path, format_control
  129. )
  130. def cleanup(self):
  131. self._temp_dir.cleanup()
  132. class WheelCache(Cache):
  133. """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
  134. This Cache allows for gracefully degradation, using the ephem wheel cache
  135. when a certain link is not found in the simple wheel cache first.
  136. """
  137. def __init__(self, cache_dir, format_control):
  138. super(WheelCache, self).__init__(
  139. cache_dir, format_control, {'binary'}
  140. )
  141. self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
  142. self._ephem_cache = EphemWheelCache(format_control)
  143. def get_path_for_link(self, link):
  144. return self._wheel_cache.get_path_for_link(link)
  145. def get_ephem_path_for_link(self, link):
  146. return self._ephem_cache.get_path_for_link(link)
  147. def get(self, link, package_name):
  148. retval = self._wheel_cache.get(link, package_name)
  149. if retval is link:
  150. retval = self._ephem_cache.get(link, package_name)
  151. return retval
  152. def cleanup(self):
  153. self._wheel_cache.cleanup()
  154. self._ephem_cache.cleanup()