]> jfr.im git - dlqueue.git/blob - venv/lib/python3.11/site-packages/setuptools/package_index.py
init: venv aand flask
[dlqueue.git] / venv / lib / python3.11 / site-packages / setuptools / package_index.py
1 """PyPI and direct package downloading."""
2
3 import sys
4 import os
5 import re
6 import io
7 import shutil
8 import socket
9 import base64
10 import hashlib
11 import itertools
12 import configparser
13 import html
14 import http.client
15 import urllib.parse
16 import urllib.request
17 import urllib.error
18 from functools import wraps
19
20 import setuptools
21 from pkg_resources import (
22 CHECKOUT_DIST,
23 Distribution,
24 BINARY_DIST,
25 normalize_path,
26 SOURCE_DIST,
27 Environment,
28 find_distributions,
29 safe_name,
30 safe_version,
31 to_filename,
32 Requirement,
33 DEVELOP_DIST,
34 EGG_DIST,
35 parse_version,
36 )
37 from distutils import log
38 from distutils.errors import DistutilsError
39 from fnmatch import translate
40 from setuptools.wheel import Wheel
41 from setuptools.extern.more_itertools import unique_everseen
42
43
44 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
45 HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
46 PYPI_MD5 = re.compile(
47 r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
48 r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
49 )
50 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
51 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
52
53 __all__ = [
54 'PackageIndex',
55 'distros_for_url',
56 'parse_bdist_wininst',
57 'interpret_distro_name',
58 ]
59
60 _SOCKET_TIMEOUT = 15
61
62 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
63 user_agent = _tmpl.format(
64 py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools
65 )
66
67
68 def parse_requirement_arg(spec):
69 try:
70 return Requirement.parse(spec)
71 except ValueError as e:
72 raise DistutilsError(
73 "Not a URL, existing file, or requirement spec: %r" % (spec,)
74 ) from e
75
76
77 def parse_bdist_wininst(name):
78 """Return (base,pyversion) or (None,None) for possible .exe name"""
79
80 lower = name.lower()
81 base, py_ver, plat = None, None, None
82
83 if lower.endswith('.exe'):
84 if lower.endswith('.win32.exe'):
85 base = name[:-10]
86 plat = 'win32'
87 elif lower.startswith('.win32-py', -16):
88 py_ver = name[-7:-4]
89 base = name[:-16]
90 plat = 'win32'
91 elif lower.endswith('.win-amd64.exe'):
92 base = name[:-14]
93 plat = 'win-amd64'
94 elif lower.startswith('.win-amd64-py', -20):
95 py_ver = name[-7:-4]
96 base = name[:-20]
97 plat = 'win-amd64'
98 return base, py_ver, plat
99
100
101 def egg_info_for_url(url):
102 parts = urllib.parse.urlparse(url)
103 scheme, server, path, parameters, query, fragment = parts
104 base = urllib.parse.unquote(path.split('/')[-1])
105 if server == 'sourceforge.net' and base == 'download': # XXX Yuck
106 base = urllib.parse.unquote(path.split('/')[-2])
107 if '#' in base:
108 base, fragment = base.split('#', 1)
109 return base, fragment
110
111
112 def distros_for_url(url, metadata=None):
113 """Yield egg or source distribution objects that might be found at a URL"""
114 base, fragment = egg_info_for_url(url)
115 for dist in distros_for_location(url, base, metadata):
116 yield dist
117 if fragment:
118 match = EGG_FRAGMENT.match(fragment)
119 if match:
120 for dist in interpret_distro_name(
121 url, match.group(1), metadata, precedence=CHECKOUT_DIST
122 ):
123 yield dist
124
125
126 def distros_for_location(location, basename, metadata=None):
127 """Yield egg or source distribution objects based on basename"""
128 if basename.endswith('.egg.zip'):
129 basename = basename[:-4] # strip the .zip
130 if basename.endswith('.egg') and '-' in basename:
131 # only one, unambiguous interpretation
132 return [Distribution.from_location(location, basename, metadata)]
133 if basename.endswith('.whl') and '-' in basename:
134 wheel = Wheel(basename)
135 if not wheel.is_compatible():
136 return []
137 return [
138 Distribution(
139 location=location,
140 project_name=wheel.project_name,
141 version=wheel.version,
142 # Increase priority over eggs.
143 precedence=EGG_DIST + 1,
144 )
145 ]
146 if basename.endswith('.exe'):
147 win_base, py_ver, platform = parse_bdist_wininst(basename)
148 if win_base is not None:
149 return interpret_distro_name(
150 location, win_base, metadata, py_ver, BINARY_DIST, platform
151 )
152 # Try source distro extensions (.zip, .tgz, etc.)
153 #
154 for ext in EXTENSIONS:
155 if basename.endswith(ext):
156 basename = basename[: -len(ext)]
157 return interpret_distro_name(location, basename, metadata)
158 return [] # no extension matched
159
160
161 def distros_for_filename(filename, metadata=None):
162 """Yield possible egg or source distribution objects based on a filename"""
163 return distros_for_location(
164 normalize_path(filename), os.path.basename(filename), metadata
165 )
166
167
168 def interpret_distro_name(
169 location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
170 ):
171 """Generate the interpretation of a source distro name
172
173 Note: if `location` is a filesystem filename, you should call
174 ``pkg_resources.normalize_path()`` on it before passing it to this
175 routine!
176 """
177
178 parts = basename.split('-')
179 if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
180 # it is a bdist_dumb, not an sdist -- bail out
181 return
182
183 # find the pivot (p) that splits the name from the version.
184 # infer the version as the first item that has a digit.
185 for p in range(len(parts)):
186 if parts[p][:1].isdigit():
187 break
188 else:
189 p = len(parts)
190
191 yield Distribution(
192 location,
193 metadata,
194 '-'.join(parts[:p]),
195 '-'.join(parts[p:]),
196 py_version=py_version,
197 precedence=precedence,
198 platform=platform,
199 )
200
201
202 def unique_values(func):
203 """
204 Wrap a function returning an iterable such that the resulting iterable
205 only ever yields unique items.
206 """
207
208 @wraps(func)
209 def wrapper(*args, **kwargs):
210 return unique_everseen(func(*args, **kwargs))
211
212 return wrapper
213
214
215 REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I)
216 """
217 Regex for an HTML tag with 'rel="val"' attributes.
218 """
219
220
221 @unique_values
222 def find_external_links(url, page):
223 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
224
225 for match in REL.finditer(page):
226 tag, rel = match.groups()
227 rels = set(map(str.strip, rel.lower().split(',')))
228 if 'homepage' in rels or 'download' in rels:
229 for match in HREF.finditer(tag):
230 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
231
232 for tag in ("<th>Home Page", "<th>Download URL"):
233 pos = page.find(tag)
234 if pos != -1:
235 match = HREF.search(page, pos)
236 if match:
237 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
238
239
240 class ContentChecker:
241 """
242 A null content checker that defines the interface for checking content
243 """
244
245 def feed(self, block):
246 """
247 Feed a block of data to the hash.
248 """
249 return
250
251 def is_valid(self):
252 """
253 Check the hash. Return False if validation fails.
254 """
255 return True
256
257 def report(self, reporter, template):
258 """
259 Call reporter with information about the checker (hash name)
260 substituted into the template.
261 """
262 return
263
264
265 class HashChecker(ContentChecker):
266 pattern = re.compile(
267 r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
268 r'(?P<expected>[a-f0-9]+)'
269 )
270
271 def __init__(self, hash_name, expected):
272 self.hash_name = hash_name
273 self.hash = hashlib.new(hash_name)
274 self.expected = expected
275
276 @classmethod
277 def from_url(cls, url):
278 "Construct a (possibly null) ContentChecker from a URL"
279 fragment = urllib.parse.urlparse(url)[-1]
280 if not fragment:
281 return ContentChecker()
282 match = cls.pattern.search(fragment)
283 if not match:
284 return ContentChecker()
285 return cls(**match.groupdict())
286
287 def feed(self, block):
288 self.hash.update(block)
289
290 def is_valid(self):
291 return self.hash.hexdigest() == self.expected
292
293 def report(self, reporter, template):
294 msg = template % self.hash_name
295 return reporter(msg)
296
297
298 class PackageIndex(Environment):
299 """A distribution index that scans web pages for download URLs"""
300
301 def __init__(
302 self,
303 index_url="https://pypi.org/simple/",
304 hosts=('*',),
305 ca_bundle=None,
306 verify_ssl=True,
307 *args,
308 **kw,
309 ):
310 super().__init__(*args, **kw)
311 self.index_url = index_url + "/"[: not index_url.endswith('/')]
312 self.scanned_urls = {}
313 self.fetched_urls = {}
314 self.package_pages = {}
315 self.allows = re.compile('|'.join(map(translate, hosts))).match
316 self.to_scan = []
317 self.opener = urllib.request.urlopen
318
319 def add(self, dist):
320 # ignore invalid versions
321 try:
322 parse_version(dist.version)
323 except Exception:
324 return
325 return super().add(dist)
326
327 # FIXME: 'PackageIndex.process_url' is too complex (14)
328 def process_url(self, url, retrieve=False): # noqa: C901
329 """Evaluate a URL as a possible download, and maybe retrieve it"""
330 if url in self.scanned_urls and not retrieve:
331 return
332 self.scanned_urls[url] = True
333 if not URL_SCHEME(url):
334 self.process_filename(url)
335 return
336 else:
337 dists = list(distros_for_url(url))
338 if dists:
339 if not self.url_ok(url):
340 return
341 self.debug("Found link: %s", url)
342
343 if dists or not retrieve or url in self.fetched_urls:
344 list(map(self.add, dists))
345 return # don't need the actual page
346
347 if not self.url_ok(url):
348 self.fetched_urls[url] = True
349 return
350
351 self.info("Reading %s", url)
352 self.fetched_urls[url] = True # prevent multiple fetch attempts
353 tmpl = "Download error on %s: %%s -- Some packages may not be found!"
354 f = self.open_url(url, tmpl % url)
355 if f is None:
356 return
357 if isinstance(f, urllib.error.HTTPError) and f.code == 401:
358 self.info("Authentication error: %s" % f.msg)
359 self.fetched_urls[f.url] = True
360 if 'html' not in f.headers.get('content-type', '').lower():
361 f.close() # not html, we can't process it
362 return
363
364 base = f.url # handle redirects
365 page = f.read()
366 if not isinstance(page, str):
367 # In Python 3 and got bytes but want str.
368 if isinstance(f, urllib.error.HTTPError):
369 # Errors have no charset, assume latin1:
370 charset = 'latin-1'
371 else:
372 charset = f.headers.get_param('charset') or 'latin-1'
373 page = page.decode(charset, "ignore")
374 f.close()
375 for match in HREF.finditer(page):
376 link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
377 self.process_url(link)
378 if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
379 page = self.process_index(url, page)
380
381 def process_filename(self, fn, nested=False):
382 # process filenames or directories
383 if not os.path.exists(fn):
384 self.warn("Not found: %s", fn)
385 return
386
387 if os.path.isdir(fn) and not nested:
388 path = os.path.realpath(fn)
389 for item in os.listdir(path):
390 self.process_filename(os.path.join(path, item), True)
391
392 dists = distros_for_filename(fn)
393 if dists:
394 self.debug("Found: %s", fn)
395 list(map(self.add, dists))
396
397 def url_ok(self, url, fatal=False):
398 s = URL_SCHEME(url)
399 is_file = s and s.group(1).lower() == 'file'
400 if is_file or self.allows(urllib.parse.urlparse(url)[1]):
401 return True
402 msg = (
403 "\nNote: Bypassing %s (disallowed host; see "
404 "https://setuptools.pypa.io/en/latest/deprecated/"
405 "easy_install.html#restricting-downloads-with-allow-hosts for details).\n"
406 )
407 if fatal:
408 raise DistutilsError(msg % url)
409 else:
410 self.warn(msg, url)
411
412 def scan_egg_links(self, search_path):
413 dirs = filter(os.path.isdir, search_path)
414 egg_links = (
415 (path, entry)
416 for path in dirs
417 for entry in os.listdir(path)
418 if entry.endswith('.egg-link')
419 )
420 list(itertools.starmap(self.scan_egg_link, egg_links))
421
422 def scan_egg_link(self, path, entry):
423 with open(os.path.join(path, entry)) as raw_lines:
424 # filter non-empty lines
425 lines = list(filter(None, map(str.strip, raw_lines)))
426
427 if len(lines) != 2:
428 # format is not recognized; punt
429 return
430
431 egg_path, setup_path = lines
432
433 for dist in find_distributions(os.path.join(path, egg_path)):
434 dist.location = os.path.join(path, *lines)
435 dist.precedence = SOURCE_DIST
436 self.add(dist)
437
438 def _scan(self, link):
439 # Process a URL to see if it's for a package page
440 NO_MATCH_SENTINEL = None, None
441 if not link.startswith(self.index_url):
442 return NO_MATCH_SENTINEL
443
444 parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
445 if len(parts) != 2 or '#' in parts[1]:
446 return NO_MATCH_SENTINEL
447
448 # it's a package page, sanitize and index it
449 pkg = safe_name(parts[0])
450 ver = safe_version(parts[1])
451 self.package_pages.setdefault(pkg.lower(), {})[link] = True
452 return to_filename(pkg), to_filename(ver)
453
454 def process_index(self, url, page):
455 """Process the contents of a PyPI page"""
456
457 # process an index page into the package-page index
458 for match in HREF.finditer(page):
459 try:
460 self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
461 except ValueError:
462 pass
463
464 pkg, ver = self._scan(url) # ensure this page is in the page index
465 if not pkg:
466 return "" # no sense double-scanning non-package pages
467
468 # process individual package page
469 for new_url in find_external_links(url, page):
470 # Process the found URL
471 base, frag = egg_info_for_url(new_url)
472 if base.endswith('.py') and not frag:
473 if ver:
474 new_url += '#egg=%s-%s' % (pkg, ver)
475 else:
476 self.need_version_info(url)
477 self.scan_url(new_url)
478
479 return PYPI_MD5.sub(
480 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
481 )
482
483 def need_version_info(self, url):
484 self.scan_all(
485 "Page at %s links to .py file(s) without version info; an index "
486 "scan is required.",
487 url,
488 )
489
490 def scan_all(self, msg=None, *args):
491 if self.index_url not in self.fetched_urls:
492 if msg:
493 self.warn(msg, *args)
494 self.info("Scanning index of all packages (this may take a while)")
495 self.scan_url(self.index_url)
496
497 def find_packages(self, requirement):
498 self.scan_url(self.index_url + requirement.unsafe_name + '/')
499
500 if not self.package_pages.get(requirement.key):
501 # Fall back to safe version of the name
502 self.scan_url(self.index_url + requirement.project_name + '/')
503
504 if not self.package_pages.get(requirement.key):
505 # We couldn't find the target package, so search the index page too
506 self.not_found_in_index(requirement)
507
508 for url in list(self.package_pages.get(requirement.key, ())):
509 # scan each page that might be related to the desired package
510 self.scan_url(url)
511
512 def obtain(self, requirement, installer=None):
513 self.prescan()
514 self.find_packages(requirement)
515 for dist in self[requirement.key]:
516 if dist in requirement:
517 return dist
518 self.debug("%s does not match %s", requirement, dist)
519 return super(PackageIndex, self).obtain(requirement, installer)
520
521 def check_hash(self, checker, filename, tfp):
522 """
523 checker is a ContentChecker
524 """
525 checker.report(self.debug, "Validating %%s checksum for %s" % filename)
526 if not checker.is_valid():
527 tfp.close()
528 os.unlink(filename)
529 raise DistutilsError(
530 "%s validation failed for %s; "
531 "possible download problem?"
532 % (checker.hash.name, os.path.basename(filename))
533 )
534
535 def add_find_links(self, urls):
536 """Add `urls` to the list that will be prescanned for searches"""
537 for url in urls:
538 if (
539 self.to_scan is None # if we have already "gone online"
540 or not URL_SCHEME(url) # or it's a local file/directory
541 or url.startswith('file:')
542 or list(distros_for_url(url)) # or a direct package link
543 ):
544 # then go ahead and process it now
545 self.scan_url(url)
546 else:
547 # otherwise, defer retrieval till later
548 self.to_scan.append(url)
549
550 def prescan(self):
551 """Scan urls scheduled for prescanning (e.g. --find-links)"""
552 if self.to_scan:
553 list(map(self.scan_url, self.to_scan))
554 self.to_scan = None # from now on, go ahead and process immediately
555
556 def not_found_in_index(self, requirement):
557 if self[requirement.key]: # we've seen at least one distro
558 meth, msg = self.info, "Couldn't retrieve index page for %r"
559 else: # no distros seen for this name, might be misspelled
560 meth, msg = (
561 self.warn,
562 "Couldn't find index page for %r (maybe misspelled?)",
563 )
564 meth(msg, requirement.unsafe_name)
565 self.scan_all()
566
567 def download(self, spec, tmpdir):
568 """Locate and/or download `spec` to `tmpdir`, returning a local path
569
570 `spec` may be a ``Requirement`` object, or a string containing a URL,
571 an existing local filename, or a project/version requirement spec
572 (i.e. the string form of a ``Requirement`` object). If it is the URL
573 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
574 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
575 automatically created alongside the downloaded file.
576
577 If `spec` is a ``Requirement`` object or a string containing a
578 project/version requirement spec, this method returns the location of
579 a matching distribution (possibly after downloading it to `tmpdir`).
580 If `spec` is a locally existing file or directory name, it is simply
581 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
582 of `tmpdir`, and the local filename is returned. Various errors may be
583 raised if a problem occurs during downloading.
584 """
585 if not isinstance(spec, Requirement):
586 scheme = URL_SCHEME(spec)
587 if scheme:
588 # It's a url, download it to tmpdir
589 found = self._download_url(scheme.group(1), spec, tmpdir)
590 base, fragment = egg_info_for_url(spec)
591 if base.endswith('.py'):
592 found = self.gen_setup(found, fragment, tmpdir)
593 return found
594 elif os.path.exists(spec):
595 # Existing file or directory, just return it
596 return spec
597 else:
598 spec = parse_requirement_arg(spec)
599 return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
600
601 def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
602 self,
603 requirement,
604 tmpdir,
605 force_scan=False,
606 source=False,
607 develop_ok=False,
608 local_index=None,
609 ):
610 """Obtain a distribution suitable for fulfilling `requirement`
611
612 `requirement` must be a ``pkg_resources.Requirement`` instance.
613 If necessary, or if the `force_scan` flag is set, the requirement is
614 searched for in the (online) package index as well as the locally
615 installed packages. If a distribution matching `requirement` is found,
616 the returned distribution's ``location`` is the value you would have
617 gotten from calling the ``download()`` method with the matching
618 distribution's URL or filename. If no matching distribution is found,
619 ``None`` is returned.
620
621 If the `source` flag is set, only source distributions and source
622 checkout links will be considered. Unless the `develop_ok` flag is
623 set, development and system eggs (i.e., those using the ``.egg-info``
624 format) will be ignored.
625 """
626 # process a Requirement
627 self.info("Searching for %s", requirement)
628 skipped = {}
629 dist = None
630
631 def find(req, env=None):
632 if env is None:
633 env = self
634 # Find a matching distribution; may be called more than once
635
636 for dist in env[req.key]:
637 if dist.precedence == DEVELOP_DIST and not develop_ok:
638 if dist not in skipped:
639 self.warn(
640 "Skipping development or system egg: %s",
641 dist,
642 )
643 skipped[dist] = 1
644 continue
645
646 test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
647 if test:
648 loc = self.download(dist.location, tmpdir)
649 dist.download_location = loc
650 if os.path.exists(dist.download_location):
651 return dist
652
653 if force_scan:
654 self.prescan()
655 self.find_packages(requirement)
656 dist = find(requirement)
657
658 if not dist and local_index is not None:
659 dist = find(requirement, local_index)
660
661 if dist is None:
662 if self.to_scan is not None:
663 self.prescan()
664 dist = find(requirement)
665
666 if dist is None and not force_scan:
667 self.find_packages(requirement)
668 dist = find(requirement)
669
670 if dist is None:
671 self.warn(
672 "No local packages or working download links found for %s%s",
673 (source and "a source distribution of " or ""),
674 requirement,
675 )
676 else:
677 self.info("Best match: %s", dist)
678 return dist.clone(location=dist.download_location)
679
680 def fetch(self, requirement, tmpdir, force_scan=False, source=False):
681 """Obtain a file suitable for fulfilling `requirement`
682
683 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
684 backward compatibility, this routine is identical but returns the
685 ``location`` of the downloaded distribution instead of a distribution
686 object.
687 """
688 dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
689 if dist is not None:
690 return dist.location
691 return None
692
693 def gen_setup(self, filename, fragment, tmpdir):
694 match = EGG_FRAGMENT.match(fragment)
695 dists = (
696 match
697 and [
698 d
699 for d in interpret_distro_name(filename, match.group(1), None)
700 if d.version
701 ]
702 or []
703 )
704
705 if len(dists) == 1: # unambiguous ``#egg`` fragment
706 basename = os.path.basename(filename)
707
708 # Make sure the file has been downloaded to the temp dir.
709 if os.path.dirname(filename) != tmpdir:
710 dst = os.path.join(tmpdir, basename)
711 if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
712 shutil.copy2(filename, dst)
713 filename = dst
714
715 with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
716 file.write(
717 "from setuptools import setup\n"
718 "setup(name=%r, version=%r, py_modules=[%r])\n"
719 % (
720 dists[0].project_name,
721 dists[0].version,
722 os.path.splitext(basename)[0],
723 )
724 )
725 return filename
726
727 elif match:
728 raise DistutilsError(
729 "Can't unambiguously interpret project/version identifier %r; "
730 "any dashes in the name or version should be escaped using "
731 "underscores. %r" % (fragment, dists)
732 )
733 else:
734 raise DistutilsError(
735 "Can't process plain .py files without an '#egg=name-version'"
736 " suffix to enable automatic setup script generation."
737 )
738
739 dl_blocksize = 8192
740
741 def _download_to(self, url, filename):
742 self.info("Downloading %s", url)
743 # Download the file
744 fp = None
745 try:
746 checker = HashChecker.from_url(url)
747 fp = self.open_url(url)
748 if isinstance(fp, urllib.error.HTTPError):
749 raise DistutilsError(
750 "Can't download %s: %s %s" % (url, fp.code, fp.msg)
751 )
752 headers = fp.info()
753 blocknum = 0
754 bs = self.dl_blocksize
755 size = -1
756 if "content-length" in headers:
757 # Some servers return multiple Content-Length headers :(
758 sizes = headers.get_all('Content-Length')
759 size = max(map(int, sizes))
760 self.reporthook(url, filename, blocknum, bs, size)
761 with open(filename, 'wb') as tfp:
762 while True:
763 block = fp.read(bs)
764 if block:
765 checker.feed(block)
766 tfp.write(block)
767 blocknum += 1
768 self.reporthook(url, filename, blocknum, bs, size)
769 else:
770 break
771 self.check_hash(checker, filename, tfp)
772 return headers
773 finally:
774 if fp:
775 fp.close()
776
777 def reporthook(self, url, filename, blocknum, blksize, size):
778 pass # no-op
779
780 # FIXME:
781 def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
782 if url.startswith('file:'):
783 return local_open(url)
784 try:
785 return open_with_auth(url, self.opener)
786 except (ValueError, http.client.InvalidURL) as v:
787 msg = ' '.join([str(arg) for arg in v.args])
788 if warning:
789 self.warn(warning, msg)
790 else:
791 raise DistutilsError('%s %s' % (url, msg)) from v
792 except urllib.error.HTTPError as v:
793 return v
794 except urllib.error.URLError as v:
795 if warning:
796 self.warn(warning, v.reason)
797 else:
798 raise DistutilsError(
799 "Download error for %s: %s" % (url, v.reason)
800 ) from v
801 except http.client.BadStatusLine as v:
802 if warning:
803 self.warn(warning, v.line)
804 else:
805 raise DistutilsError(
806 '%s returned a bad status line. The server might be '
807 'down, %s' % (url, v.line)
808 ) from v
809 except (http.client.HTTPException, socket.error) as v:
810 if warning:
811 self.warn(warning, v)
812 else:
813 raise DistutilsError("Download error for %s: %s" % (url, v)) from v
814
815 def _download_url(self, scheme, url, tmpdir):
816 # Determine download filename
817 #
818 name, fragment = egg_info_for_url(url)
819 if name:
820 while '..' in name:
821 name = name.replace('..', '.').replace('\\', '_')
822 else:
823 name = "__downloaded__" # default if URL has no path contents
824
825 if name.endswith('.egg.zip'):
826 name = name[:-4] # strip the extra .zip before download
827
828 filename = os.path.join(tmpdir, name)
829
830 # Download the file
831 #
832 if scheme == 'svn' or scheme.startswith('svn+'):
833 return self._download_svn(url, filename)
834 elif scheme == 'git' or scheme.startswith('git+'):
835 return self._download_git(url, filename)
836 elif scheme.startswith('hg+'):
837 return self._download_hg(url, filename)
838 elif scheme == 'file':
839 return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
840 else:
841 self.url_ok(url, True) # raises error if not allowed
842 return self._attempt_download(url, filename)
843
844 def scan_url(self, url):
845 self.process_url(url, True)
846
847 def _attempt_download(self, url, filename):
848 headers = self._download_to(url, filename)
849 if 'html' in headers.get('content-type', '').lower():
850 return self._invalid_download_html(url, headers, filename)
851 else:
852 return filename
853
854 def _invalid_download_html(self, url, headers, filename):
855 os.unlink(filename)
856 raise DistutilsError(f"Unexpected HTML page found at {url}")
857
858 def _download_svn(self, url, _filename):
859 raise DistutilsError(f"Invalid config, SVN download is not supported: {url}")
860
861 @staticmethod
862 def _vcs_split_rev_from_url(url, pop_prefix=False):
863 scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
864
865 scheme = scheme.split('+', 1)[-1]
866
867 # Some fragment identification fails
868 path = path.split('#', 1)[0]
869
870 rev = None
871 if '@' in path:
872 path, rev = path.rsplit('@', 1)
873
874 # Also, discard fragment
875 url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
876
877 return url, rev
878
879 def _download_git(self, url, filename):
880 filename = filename.split('#', 1)[0]
881 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
882
883 self.info("Doing git clone from %s to %s", url, filename)
884 os.system("git clone --quiet %s %s" % (url, filename))
885
886 if rev is not None:
887 self.info("Checking out %s", rev)
888 os.system(
889 "git -C %s checkout --quiet %s"
890 % (
891 filename,
892 rev,
893 )
894 )
895
896 return filename
897
898 def _download_hg(self, url, filename):
899 filename = filename.split('#', 1)[0]
900 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
901
902 self.info("Doing hg clone from %s to %s", url, filename)
903 os.system("hg clone --quiet %s %s" % (url, filename))
904
905 if rev is not None:
906 self.info("Updating to %s", rev)
907 os.system(
908 "hg --cwd %s up -C -r %s -q"
909 % (
910 filename,
911 rev,
912 )
913 )
914
915 return filename
916
917 def debug(self, msg, *args):
918 log.debug(msg, *args)
919
920 def info(self, msg, *args):
921 log.info(msg, *args)
922
923 def warn(self, msg, *args):
924 log.warn(msg, *args)
925
926
927 # This pattern matches a character entity reference (a decimal numeric
928 # references, a hexadecimal numeric reference, or a named reference).
929 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
930
931
932 def decode_entity(match):
933 what = match.group(0)
934 return html.unescape(what)
935
936
937 def htmldecode(text):
938 """
939 Decode HTML entities in the given text.
940
941 >>> htmldecode(
942 ... 'https://../package_name-0.1.2.tar.gz'
943 ... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
944 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
945 """
946 return entity_sub(decode_entity, text)
947
948
949 def socket_timeout(timeout=15):
950 def _socket_timeout(func):
951 def _socket_timeout(*args, **kwargs):
952 old_timeout = socket.getdefaulttimeout()
953 socket.setdefaulttimeout(timeout)
954 try:
955 return func(*args, **kwargs)
956 finally:
957 socket.setdefaulttimeout(old_timeout)
958
959 return _socket_timeout
960
961 return _socket_timeout
962
963
964 def _encode_auth(auth):
965 """
966 Encode auth from a URL suitable for an HTTP header.
967 >>> str(_encode_auth('username%3Apassword'))
968 'dXNlcm5hbWU6cGFzc3dvcmQ='
969
970 Long auth strings should not cause a newline to be inserted.
971 >>> long_auth = 'username:' + 'password'*10
972 >>> chr(10) in str(_encode_auth(long_auth))
973 False
974 """
975 auth_s = urllib.parse.unquote(auth)
976 # convert to bytes
977 auth_bytes = auth_s.encode()
978 encoded_bytes = base64.b64encode(auth_bytes)
979 # convert back to a string
980 encoded = encoded_bytes.decode()
981 # strip the trailing carriage return
982 return encoded.replace('\n', '')
983
984
985 class Credential:
986 """
987 A username/password pair. Use like a namedtuple.
988 """
989
990 def __init__(self, username, password):
991 self.username = username
992 self.password = password
993
994 def __iter__(self):
995 yield self.username
996 yield self.password
997
998 def __str__(self):
999 return '%(username)s:%(password)s' % vars(self)
1000
1001
1002 class PyPIConfig(configparser.RawConfigParser):
1003 def __init__(self):
1004 """
1005 Load from ~/.pypirc
1006 """
1007 defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1008 super().__init__(defaults)
1009
1010 rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1011 if os.path.exists(rc):
1012 self.read(rc)
1013
1014 @property
1015 def creds_by_repository(self):
1016 sections_with_repositories = [
1017 section
1018 for section in self.sections()
1019 if self.get(section, 'repository').strip()
1020 ]
1021
1022 return dict(map(self._get_repo_cred, sections_with_repositories))
1023
1024 def _get_repo_cred(self, section):
1025 repo = self.get(section, 'repository').strip()
1026 return repo, Credential(
1027 self.get(section, 'username').strip(),
1028 self.get(section, 'password').strip(),
1029 )
1030
1031 def find_credential(self, url):
1032 """
1033 If the URL indicated appears to be a repository defined in this
1034 config, return the credential for that repository.
1035 """
1036 for repository, cred in self.creds_by_repository.items():
1037 if url.startswith(repository):
1038 return cred
1039
1040
1041 def open_with_auth(url, opener=urllib.request.urlopen):
1042 """Open a urllib2 request, handling HTTP authentication"""
1043
1044 parsed = urllib.parse.urlparse(url)
1045 scheme, netloc, path, params, query, frag = parsed
1046
1047 # Double scheme does not raise on macOS as revealed by a
1048 # failing test. We would expect "nonnumeric port". Refs #20.
1049 if netloc.endswith(':'):
1050 raise http.client.InvalidURL("nonnumeric port: ''")
1051
1052 if scheme in ('http', 'https'):
1053 auth, address = _splituser(netloc)
1054 else:
1055 auth = None
1056
1057 if not auth:
1058 cred = PyPIConfig().find_credential(url)
1059 if cred:
1060 auth = str(cred)
1061 info = cred.username, url
1062 log.info('Authenticating as %s for %s (from .pypirc)', *info)
1063
1064 if auth:
1065 auth = "Basic " + _encode_auth(auth)
1066 parts = scheme, address, path, params, query, frag
1067 new_url = urllib.parse.urlunparse(parts)
1068 request = urllib.request.Request(new_url)
1069 request.add_header("Authorization", auth)
1070 else:
1071 request = urllib.request.Request(url)
1072
1073 request.add_header('User-Agent', user_agent)
1074 fp = opener(request)
1075
1076 if auth:
1077 # Put authentication info back into request URL if same host,
1078 # so that links found on the page will work
1079 s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1080 if s2 == scheme and h2 == address:
1081 parts = s2, netloc, path2, param2, query2, frag2
1082 fp.url = urllib.parse.urlunparse(parts)
1083
1084 return fp
1085
1086
1087 # copy of urllib.parse._splituser from Python 3.8
1088 def _splituser(host):
1089 """splituser('user[:passwd]@host[:port]')
1090 --> 'user[:passwd]', 'host[:port]'."""
1091 user, delim, host = host.rpartition('@')
1092 return (user if delim else None), host
1093
1094
1095 # adding a timeout to avoid freezing package_index
1096 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1097
1098
1099 def fix_sf_url(url):
1100 return url # backward compatibility
1101
1102
1103 def local_open(url):
1104 """Read a local path, with special support for directories"""
1105 scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1106 filename = urllib.request.url2pathname(path)
1107 if os.path.isfile(filename):
1108 return urllib.request.urlopen(url)
1109 elif path.endswith('/') and os.path.isdir(filename):
1110 files = []
1111 for f in os.listdir(filename):
1112 filepath = os.path.join(filename, f)
1113 if f == 'index.html':
1114 with open(filepath, 'r') as fp:
1115 body = fp.read()
1116 break
1117 elif os.path.isdir(filepath):
1118 f += '/'
1119 files.append('<a href="{name}">{name}</a>'.format(name=f))
1120 else:
1121 tmpl = (
1122 "<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
1123 )
1124 body = tmpl.format(url=url, files='\n'.join(files))
1125 status, message = 200, "OK"
1126 else:
1127 status, message, body = 404, "Path not found", "Not found"
1128
1129 headers = {'content-type': 'text/html'}
1130 body_stream = io.StringIO(body)
1131 return urllib.error.HTTPError(url, status, message, headers, body_stream)