8 from pathlib
import Path
9 from typing
import Any
, Dict
, List
, Optional
11 from pip
._vendor
.packaging
.tags
import Tag
, interpreter_name
, interpreter_version
12 from pip
._vendor
.packaging
.utils
import canonicalize_name
14 from pip
._internal
.exceptions
import InvalidWheelFilename
15 from pip
._internal
.models
.direct_url
import DirectUrl
16 from pip
._internal
.models
.link
import Link
17 from pip
._internal
.models
.wheel
import Wheel
18 from pip
._internal
.utils
.temp_dir
import TempDirectory
, tempdir_kinds
19 from pip
._internal
.utils
.urls
import path_to_url
21 logger
= logging
.getLogger(__name__
)
23 ORIGIN_JSON_NAME
= "origin.json"
26 def _hash_dict(d
: Dict
[str, str]) -> str:
27 """Return a stable sha224 of a dictionary."""
28 s
= json
.dumps(d
, sort_keys
=True, separators
=(",", ":"), ensure_ascii
=True)
29 return hashlib
.sha224(s
.encode("ascii")).hexdigest()
33 """An abstract class - provides cache directories for data from links
35 :param cache_dir: The root of the cache.
38 def __init__(self
, cache_dir
: str) -> None:
40 assert not cache_dir
or os
.path
.isabs(cache_dir
)
41 self
.cache_dir
= cache_dir
or None
43 def _get_cache_path_parts(self
, link
: Link
) -> List
[str]:
44 """Get parts of part that must be os.path.joined with cache_dir"""
46 # We want to generate an url to use as our cache key, we don't want to
47 # just re-use the URL because it might have other items in the fragment
48 # and we don't care about those.
49 key_parts
= {"url": link.url_without_fragment}
50 if link
.hash_name
is not None and link
.hash is not None:
51 key_parts
[link
.hash_name
] = link
.hash
52 if link
.subdirectory_fragment
:
53 key_parts
["subdirectory"] = link
.subdirectory_fragment
55 # Include interpreter name, major and minor version in cache key
56 # to cope with ill-behaved sdists that build a different wheel
57 # depending on the python version their setup.py is being run on,
58 # and don't encode the difference in compatibility tags.
59 # https://github.com/pypa/pip/issues/7296
60 key_parts
["interpreter_name"] = interpreter_name()
61 key_parts
["interpreter_version"] = interpreter_version()
63 # Encode our key url with sha224, we'll use this because it has similar
64 # security properties to sha256, but with a shorter total output (and
65 # thus less secure). However the differences don't make a lot of
66 # difference for our use case here.
67 hashed
= _hash_dict(key_parts
)
69 # We want to nest the directories some to prevent having a ton of top
70 # level directories where we might run out of sub directories on some
72 parts
= [hashed
[:2], hashed
[2:4], hashed
[4:6], hashed
[6:]]
76 def _get_candidates(self
, link
: Link
, canonical_package_name
: str) -> List
[Any
]:
77 can_not_cache
= not self
.cache_dir
or not canonical_package_name
or not link
82 path
= self
.get_path_for_link(link
)
83 if os
.path
.isdir(path
):
84 for candidate
in os
.listdir(path
):
85 candidates
.append((candidate
, path
))
88 def get_path_for_link(self
, link
: Link
) -> str:
89 """Return a directory to store cached items in for link."""
90 raise NotImplementedError()
95 package_name
: Optional
[str],
96 supported_tags
: List
[Tag
],
98 """Returns a link to a cached item if it exists, otherwise returns the
101 raise NotImplementedError()
104 class SimpleWheelCache(Cache
):
105 """A cache of wheels for future installs."""
107 def __init__(self
, cache_dir
: str) -> None:
108 super().__init
__(cache_dir
)
110 def get_path_for_link(self
, link
: Link
) -> str:
111 """Return a directory to store cached wheels for link
113 Because there are M wheels for any one sdist, we provide a directory
114 to cache them in, and then consult that directory when looking up
117 We only insert things into the cache if they have plausible version
118 numbers, so that we don't contaminate the cache with things that were
119 not unique. E.g. ./package might have dozens of installs done for it
120 and build a version of 0.0...and if we built and cached a wheel, we'd
121 end up using the same wheel even if the source has been edited.
123 :param link: The link of the sdist for which this will cache wheels.
125 parts
= self
._get
_cache
_path
_parts
(link
)
126 assert self
.cache_dir
127 # Store wheels within the root cache_dir
128 return os
.path
.join(self
.cache_dir
, "wheels", *parts
)
133 package_name
: Optional
[str],
134 supported_tags
: List
[Tag
],
141 canonical_package_name
= canonicalize_name(package_name
)
142 for wheel_name
, wheel_dir
in self
._get
_candidates
(link
, canonical_package_name
):
144 wheel
= Wheel(wheel_name
)
145 except InvalidWheelFilename
:
147 if canonicalize_name(wheel
.name
) != canonical_package_name
:
149 "Ignoring cached wheel %s for %s as it "
150 "does not match the expected distribution name %s.",
156 if not wheel
.supported(supported_tags
):
157 # Built for a different python/arch/etc
161 wheel
.support_index_min(supported_tags
),
170 _
, wheel_name
, wheel_dir
= min(candidates
)
171 return Link(path_to_url(os
.path
.join(wheel_dir
, wheel_name
)))
174 class EphemWheelCache(SimpleWheelCache
):
175 """A SimpleWheelCache that creates it's own temporary cache directory"""
177 def __init__(self
) -> None:
178 self
._temp
_dir
= TempDirectory(
179 kind
=tempdir_kinds
.EPHEM_WHEEL_CACHE
,
180 globally_managed
=True,
183 super().__init
__(self
._temp
_dir
.path
)
193 self
.persistent
= persistent
194 self
.origin
: Optional
[DirectUrl
] = None
195 origin_direct_url_path
= Path(self
.link
.file_path
).parent
/ ORIGIN_JSON_NAME
196 if origin_direct_url_path
.exists():
198 self
.origin
= DirectUrl
.from_json(
199 origin_direct_url_path
.read_text(encoding
="utf-8")
201 except Exception as e
:
203 "Ignoring invalid cache entry origin file %s for %s (%s)",
204 origin_direct_url_path
,
210 class WheelCache(Cache
):
211 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
213 This Cache allows for gracefully degradation, using the ephem wheel cache
214 when a certain link is not found in the simple wheel cache first.
217 def __init__(self
, cache_dir
: str) -> None:
218 super().__init
__(cache_dir
)
219 self
._wheel
_cache
= SimpleWheelCache(cache_dir
)
220 self
._ephem
_cache
= EphemWheelCache()
222 def get_path_for_link(self
, link
: Link
) -> str:
223 return self
._wheel
_cache
.get_path_for_link(link
)
225 def get_ephem_path_for_link(self
, link
: Link
) -> str:
226 return self
._ephem
_cache
.get_path_for_link(link
)
231 package_name
: Optional
[str],
232 supported_tags
: List
[Tag
],
234 cache_entry
= self
.get_cache_entry(link
, package_name
, supported_tags
)
235 if cache_entry
is None:
237 return cache_entry
.link
242 package_name
: Optional
[str],
243 supported_tags
: List
[Tag
],
244 ) -> Optional
[CacheEntry
]:
245 """Returns a CacheEntry with a link to a cached item if it exists or
246 None. The cache entry indicates if the item was found in the persistent
249 retval
= self
._wheel
_cache
.get(
251 package_name
=package_name
,
252 supported_tags
=supported_tags
,
254 if retval
is not link
:
255 return CacheEntry(retval
, persistent
=True)
257 retval
= self
._ephem
_cache
.get(
259 package_name
=package_name
,
260 supported_tags
=supported_tags
,
262 if retval
is not link
:
263 return CacheEntry(retval
, persistent
=False)
268 def record_download_origin(cache_dir
: str, download_info
: DirectUrl
) -> None:
269 origin_path
= Path(cache_dir
) / ORIGIN_JSON_NAME
270 if origin_path
.exists():
272 origin
= DirectUrl
.from_json(origin_path
.read_text(encoding
="utf-8"))
273 except Exception as e
:
275 "Could not read origin file %s in cache entry (%s). "
276 "Will attempt to overwrite it.",
281 # TODO: use DirectUrl.equivalent when
282 # https://github.com/pypa/pip/pull/10564 is merged.
283 if origin
.url
!= download_info
.url
:
285 "Origin URL %s in cache entry %s does not match download URL "
286 "%s. This is likely a pip bug or a cache corruption issue. "
287 "Will overwrite it with the new value.",
292 origin_path
.write_text(download_info
.to_json(), encoding
="utf-8")