]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/pip/_internal/cache.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / pip / _internal / cache.py
1 """Cache Management
2 """
3
4 import hashlib
5 import json
6 import logging
7 import os
8 from pathlib import Path
9 from typing import Any, Dict, List, Optional
10
11 from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
12 from pip._vendor.packaging.utils import canonicalize_name
13
14 from pip._internal.exceptions import InvalidWheelFilename
15 from pip._internal.models.direct_url import DirectUrl
16 from pip._internal.models.link import Link
17 from pip._internal.models.wheel import Wheel
18 from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
19 from pip._internal.utils.urls import path_to_url
20
21 logger = logging.getLogger(__name__)
22
23 ORIGIN_JSON_NAME = "origin.json"
24
25
26 def _hash_dict(d: Dict[str, str]) -> str:
27 """Return a stable sha224 of a dictionary."""
28 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
29 return hashlib.sha224(s.encode("ascii")).hexdigest()
30
31
32 class Cache:
33 """An abstract class - provides cache directories for data from links
34
35 :param cache_dir: The root of the cache.
36 """
37
38 def __init__(self, cache_dir: str) -> None:
39 super().__init__()
40 assert not cache_dir or os.path.isabs(cache_dir)
41 self.cache_dir = cache_dir or None
42
43 def _get_cache_path_parts(self, link: Link) -> List[str]:
44 """Get parts of part that must be os.path.joined with cache_dir"""
45
46 # We want to generate an url to use as our cache key, we don't want to
47 # just re-use the URL because it might have other items in the fragment
48 # and we don't care about those.
49 key_parts = {"url": link.url_without_fragment}
50 if link.hash_name is not None and link.hash is not None:
51 key_parts[link.hash_name] = link.hash
52 if link.subdirectory_fragment:
53 key_parts["subdirectory"] = link.subdirectory_fragment
54
55 # Include interpreter name, major and minor version in cache key
56 # to cope with ill-behaved sdists that build a different wheel
57 # depending on the python version their setup.py is being run on,
58 # and don't encode the difference in compatibility tags.
59 # https://github.com/pypa/pip/issues/7296
60 key_parts["interpreter_name"] = interpreter_name()
61 key_parts["interpreter_version"] = interpreter_version()
62
63 # Encode our key url with sha224, we'll use this because it has similar
64 # security properties to sha256, but with a shorter total output (and
65 # thus less secure). However the differences don't make a lot of
66 # difference for our use case here.
67 hashed = _hash_dict(key_parts)
68
69 # We want to nest the directories some to prevent having a ton of top
70 # level directories where we might run out of sub directories on some
71 # FS.
72 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
73
74 return parts
75
76 def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
77 can_not_cache = not self.cache_dir or not canonical_package_name or not link
78 if can_not_cache:
79 return []
80
81 candidates = []
82 path = self.get_path_for_link(link)
83 if os.path.isdir(path):
84 for candidate in os.listdir(path):
85 candidates.append((candidate, path))
86 return candidates
87
88 def get_path_for_link(self, link: Link) -> str:
89 """Return a directory to store cached items in for link."""
90 raise NotImplementedError()
91
92 def get(
93 self,
94 link: Link,
95 package_name: Optional[str],
96 supported_tags: List[Tag],
97 ) -> Link:
98 """Returns a link to a cached item if it exists, otherwise returns the
99 passed link.
100 """
101 raise NotImplementedError()
102
103
104 class SimpleWheelCache(Cache):
105 """A cache of wheels for future installs."""
106
107 def __init__(self, cache_dir: str) -> None:
108 super().__init__(cache_dir)
109
110 def get_path_for_link(self, link: Link) -> str:
111 """Return a directory to store cached wheels for link
112
113 Because there are M wheels for any one sdist, we provide a directory
114 to cache them in, and then consult that directory when looking up
115 cache hits.
116
117 We only insert things into the cache if they have plausible version
118 numbers, so that we don't contaminate the cache with things that were
119 not unique. E.g. ./package might have dozens of installs done for it
120 and build a version of 0.0...and if we built and cached a wheel, we'd
121 end up using the same wheel even if the source has been edited.
122
123 :param link: The link of the sdist for which this will cache wheels.
124 """
125 parts = self._get_cache_path_parts(link)
126 assert self.cache_dir
127 # Store wheels within the root cache_dir
128 return os.path.join(self.cache_dir, "wheels", *parts)
129
130 def get(
131 self,
132 link: Link,
133 package_name: Optional[str],
134 supported_tags: List[Tag],
135 ) -> Link:
136 candidates = []
137
138 if not package_name:
139 return link
140
141 canonical_package_name = canonicalize_name(package_name)
142 for wheel_name, wheel_dir in self._get_candidates(link, canonical_package_name):
143 try:
144 wheel = Wheel(wheel_name)
145 except InvalidWheelFilename:
146 continue
147 if canonicalize_name(wheel.name) != canonical_package_name:
148 logger.debug(
149 "Ignoring cached wheel %s for %s as it "
150 "does not match the expected distribution name %s.",
151 wheel_name,
152 link,
153 package_name,
154 )
155 continue
156 if not wheel.supported(supported_tags):
157 # Built for a different python/arch/etc
158 continue
159 candidates.append(
160 (
161 wheel.support_index_min(supported_tags),
162 wheel_name,
163 wheel_dir,
164 )
165 )
166
167 if not candidates:
168 return link
169
170 _, wheel_name, wheel_dir = min(candidates)
171 return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
172
173
174 class EphemWheelCache(SimpleWheelCache):
175 """A SimpleWheelCache that creates it's own temporary cache directory"""
176
177 def __init__(self) -> None:
178 self._temp_dir = TempDirectory(
179 kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
180 globally_managed=True,
181 )
182
183 super().__init__(self._temp_dir.path)
184
185
186 class CacheEntry:
187 def __init__(
188 self,
189 link: Link,
190 persistent: bool,
191 ):
192 self.link = link
193 self.persistent = persistent
194 self.origin: Optional[DirectUrl] = None
195 origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
196 if origin_direct_url_path.exists():
197 try:
198 self.origin = DirectUrl.from_json(
199 origin_direct_url_path.read_text(encoding="utf-8")
200 )
201 except Exception as e:
202 logger.warning(
203 "Ignoring invalid cache entry origin file %s for %s (%s)",
204 origin_direct_url_path,
205 link.filename,
206 e,
207 )
208
209
210 class WheelCache(Cache):
211 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
212
213 This Cache allows for gracefully degradation, using the ephem wheel cache
214 when a certain link is not found in the simple wheel cache first.
215 """
216
217 def __init__(self, cache_dir: str) -> None:
218 super().__init__(cache_dir)
219 self._wheel_cache = SimpleWheelCache(cache_dir)
220 self._ephem_cache = EphemWheelCache()
221
222 def get_path_for_link(self, link: Link) -> str:
223 return self._wheel_cache.get_path_for_link(link)
224
225 def get_ephem_path_for_link(self, link: Link) -> str:
226 return self._ephem_cache.get_path_for_link(link)
227
228 def get(
229 self,
230 link: Link,
231 package_name: Optional[str],
232 supported_tags: List[Tag],
233 ) -> Link:
234 cache_entry = self.get_cache_entry(link, package_name, supported_tags)
235 if cache_entry is None:
236 return link
237 return cache_entry.link
238
239 def get_cache_entry(
240 self,
241 link: Link,
242 package_name: Optional[str],
243 supported_tags: List[Tag],
244 ) -> Optional[CacheEntry]:
245 """Returns a CacheEntry with a link to a cached item if it exists or
246 None. The cache entry indicates if the item was found in the persistent
247 or ephemeral cache.
248 """
249 retval = self._wheel_cache.get(
250 link=link,
251 package_name=package_name,
252 supported_tags=supported_tags,
253 )
254 if retval is not link:
255 return CacheEntry(retval, persistent=True)
256
257 retval = self._ephem_cache.get(
258 link=link,
259 package_name=package_name,
260 supported_tags=supported_tags,
261 )
262 if retval is not link:
263 return CacheEntry(retval, persistent=False)
264
265 return None
266
267 @staticmethod
268 def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
269 origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
270 if origin_path.exists():
271 try:
272 origin = DirectUrl.from_json(origin_path.read_text(encoding="utf-8"))
273 except Exception as e:
274 logger.warning(
275 "Could not read origin file %s in cache entry (%s). "
276 "Will attempt to overwrite it.",
277 origin_path,
278 e,
279 )
280 else:
281 # TODO: use DirectUrl.equivalent when
282 # https://github.com/pypa/pip/pull/10564 is merged.
283 if origin.url != download_info.url:
284 logger.warning(
285 "Origin URL %s in cache entry %s does not match download URL "
286 "%s. This is likely a pip bug or a cache corruption issue. "
287 "Will overwrite it with the new value.",
288 origin.url,
289 cache_dir,
290 download_info.url,
291 )
292 origin_path.write_text(download_info.to_json(), encoding="utf-8")