venv/lib/python3.11/site-packages/setuptools/package_index.py

   1 """PyPI and direct package downloading."""
   2
   3 import sys
   4 import os
   5 import re
   6 import io
   7 import shutil
   8 import socket
   9 import base64
  10 import hashlib
  11 import itertools
  12 import configparser
  13 import html
  14 import http.client
  15 import urllib.parse
  16 import urllib.request
  17 import urllib.error
  18 from functools import wraps
  19
  20 import setuptools
  21 from pkg_resources import (
  22     CHECKOUT_DIST,
  23     Distribution,
  24     BINARY_DIST,
  25     normalize_path,
  26     SOURCE_DIST,
  27     Environment,
  28     find_distributions,
  29     safe_name,
  30     safe_version,
  31     to_filename,
  32     Requirement,
  33     DEVELOP_DIST,
  34     EGG_DIST,
  35     parse_version,
  36 )
  37 from distutils import log
  38 from distutils.errors import DistutilsError
  39 from fnmatch import translate
  40 from setuptools.wheel import Wheel
  41 from setuptools.extern.more_itertools import unique_everseen
  42
  43
  44 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
  45 HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
  46 PYPI_MD5 = re.compile(
  47     r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
  48     r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
  49 )
  50 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
  51 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
  52
  53 __all__ = [
  54     'PackageIndex',
  55     'distros_for_url',
  56     'parse_bdist_wininst',
  57     'interpret_distro_name',
  58 ]
  59
  60 _SOCKET_TIMEOUT = 15
  61
  62 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
  63 user_agent = _tmpl.format(
  64     py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools
  65 )
  66
  67
  68 def parse_requirement_arg(spec):
  69     try:
  70         return Requirement.parse(spec)
  71     except ValueError as e:
  72         raise DistutilsError(
  73             "Not a URL, existing file, or requirement spec: %r" % (spec,)
  74         ) from e
  75
  76
  77 def parse_bdist_wininst(name):
  78     """Return (base,pyversion) or (None,None) for possible .exe name"""
  79
  80     lower = name.lower()
  81     base, py_ver, plat = None, None, None
  82
  83     if lower.endswith('.exe'):
  84         if lower.endswith('.win32.exe'):
  85             base = name[:-10]
  86             plat = 'win32'
  87         elif lower.startswith('.win32-py', -16):
  88             py_ver = name[-7:-4]
  89             base = name[:-16]
  90             plat = 'win32'
  91         elif lower.endswith('.win-amd64.exe'):
  92             base = name[:-14]
  93             plat = 'win-amd64'
  94         elif lower.startswith('.win-amd64-py', -20):
  95             py_ver = name[-7:-4]
  96             base = name[:-20]
  97             plat = 'win-amd64'
  98     return base, py_ver, plat
  99
 100
 101 def egg_info_for_url(url):
 102     parts = urllib.parse.urlparse(url)
 103     scheme, server, path, parameters, query, fragment = parts
 104     base = urllib.parse.unquote(path.split('/')[-1])
 105     if server == 'sourceforge.net' and base == 'download':  # XXX Yuck
 106         base = urllib.parse.unquote(path.split('/')[-2])
 107     if '#' in base:
 108         base, fragment = base.split('#', 1)
 109     return base, fragment
 110
 111
 112 def distros_for_url(url, metadata=None):
 113     """Yield egg or source distribution objects that might be found at a URL"""
 114     base, fragment = egg_info_for_url(url)
 115     for dist in distros_for_location(url, base, metadata):
 116         yield dist
 117     if fragment:
 118         match = EGG_FRAGMENT.match(fragment)
 119         if match:
 120             for dist in interpret_distro_name(
 121                 url, match.group(1), metadata, precedence=CHECKOUT_DIST
 122             ):
 123                 yield dist
 124
 125
 126 def distros_for_location(location, basename, metadata=None):
 127     """Yield egg or source distribution objects based on basename"""
 128     if basename.endswith('.egg.zip'):
 129         basename = basename[:-4]  # strip the .zip
 130     if basename.endswith('.egg') and '-' in basename:
 131         # only one, unambiguous interpretation
 132         return [Distribution.from_location(location, basename, metadata)]
 133     if basename.endswith('.whl') and '-' in basename:
 134         wheel = Wheel(basename)
 135         if not wheel.is_compatible():
 136             return []
 137         return [
 138             Distribution(
 139                 location=location,
 140                 project_name=wheel.project_name,
 141                 version=wheel.version,
 142                 # Increase priority over eggs.
 143                 precedence=EGG_DIST + 1,
 144             )
 145         ]
 146     if basename.endswith('.exe'):
 147         win_base, py_ver, platform = parse_bdist_wininst(basename)
 148         if win_base is not None:
 149             return interpret_distro_name(
 150                 location, win_base, metadata, py_ver, BINARY_DIST, platform
 151             )
 152     # Try source distro extensions (.zip, .tgz, etc.)
 153     #
 154     for ext in EXTENSIONS:
 155         if basename.endswith(ext):
 156             basename = basename[: -len(ext)]
 157             return interpret_distro_name(location, basename, metadata)
 158     return []  # no extension matched
 159
 160
 161 def distros_for_filename(filename, metadata=None):
 162     """Yield possible egg or source distribution objects based on a filename"""
 163     return distros_for_location(
 164         normalize_path(filename), os.path.basename(filename), metadata
 165     )
 166
 167
 168 def interpret_distro_name(
 169     location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None
 170 ):
 171     """Generate the interpretation of a source distro name
 172
 173     Note: if `location` is a filesystem filename, you should call
 174     ``pkg_resources.normalize_path()`` on it before passing it to this
 175     routine!
 176     """
 177
 178     parts = basename.split('-')
 179     if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
 180         # it is a bdist_dumb, not an sdist -- bail out
 181         return
 182
 183     # find the pivot (p) that splits the name from the version.
 184     # infer the version as the first item that has a digit.
 185     for p in range(len(parts)):
 186         if parts[p][:1].isdigit():
 187             break
 188     else:
 189         p = len(parts)
 190
 191     yield Distribution(
 192         location,
 193         metadata,
 194         '-'.join(parts[:p]),
 195         '-'.join(parts[p:]),
 196         py_version=py_version,
 197         precedence=precedence,
 198         platform=platform,
 199     )
 200
 201
 202 def unique_values(func):
 203     """
 204     Wrap a function returning an iterable such that the resulting iterable
 205     only ever yields unique items.
 206     """
 207
 208     @wraps(func)
 209     def wrapper(*args, **kwargs):
 210         return unique_everseen(func(*args, **kwargs))
 211
 212     return wrapper
 213
 214
 215 REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I)
 216 """
 217 Regex for an HTML tag with 'rel="val"' attributes.
 218 """
 219
 220
 221 @unique_values
 222 def find_external_links(url, page):
 223     """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
 224
 225     for match in REL.finditer(page):
 226         tag, rel = match.groups()
 227         rels = set(map(str.strip, rel.lower().split(',')))
 228         if 'homepage' in rels or 'download' in rels:
 229             for match in HREF.finditer(tag):
 230                 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
 231
 232     for tag in ("<th>Home Page", "<th>Download URL"):
 233         pos = page.find(tag)
 234         if pos != -1:
 235             match = HREF.search(page, pos)
 236             if match:
 237                 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
 238
 239
 240 class ContentChecker:
 241     """
 242     A null content checker that defines the interface for checking content
 243     """
 244
 245     def feed(self, block):
 246         """
 247         Feed a block of data to the hash.
 248         """
 249         return
 250
 251     def is_valid(self):
 252         """
 253         Check the hash. Return False if validation fails.
 254         """
 255         return True
 256
 257     def report(self, reporter, template):
 258         """
 259         Call reporter with information about the checker (hash name)
 260         substituted into the template.
 261         """
 262         return
 263
 264
 265 class HashChecker(ContentChecker):
 266     pattern = re.compile(
 267         r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
 268         r'(?P<expected>[a-f0-9]+)'
 269     )
 270
 271     def __init__(self, hash_name, expected):
 272         self.hash_name = hash_name
 273         self.hash = hashlib.new(hash_name)
 274         self.expected = expected
 275
 276     @classmethod
 277     def from_url(cls, url):
 278         "Construct a (possibly null) ContentChecker from a URL"
 279         fragment = urllib.parse.urlparse(url)[-1]
 280         if not fragment:
 281             return ContentChecker()
 282         match = cls.pattern.search(fragment)
 283         if not match:
 284             return ContentChecker()
 285         return cls(**match.groupdict())
 286
 287     def feed(self, block):
 288         self.hash.update(block)
 289
 290     def is_valid(self):
 291         return self.hash.hexdigest() == self.expected
 292
 293     def report(self, reporter, template):
 294         msg = template % self.hash_name
 295         return reporter(msg)
 296
 297
 298 class PackageIndex(Environment):
 299     """A distribution index that scans web pages for download URLs"""
 300
 301     def __init__(
 302         self,
 303         index_url="https://pypi.org/simple/",
 304         hosts=('*',),
 305         ca_bundle=None,
 306         verify_ssl=True,
 307         *args,
 308         **kw,
 309     ):
 310         super().__init__(*args, **kw)
 311         self.index_url = index_url + "/"[: not index_url.endswith('/')]
 312         self.scanned_urls = {}
 313         self.fetched_urls = {}
 314         self.package_pages = {}
 315         self.allows = re.compile('|'.join(map(translate, hosts))).match
 316         self.to_scan = []
 317         self.opener = urllib.request.urlopen
 318
 319     def add(self, dist):
 320         # ignore invalid versions
 321         try:
 322             parse_version(dist.version)
 323         except Exception:
 324             return
 325         return super().add(dist)
 326
 327     # FIXME: 'PackageIndex.process_url' is too complex (14)
 328     def process_url(self, url, retrieve=False):  # noqa: C901
 329         """Evaluate a URL as a possible download, and maybe retrieve it"""
 330         if url in self.scanned_urls and not retrieve:
 331             return
 332         self.scanned_urls[url] = True
 333         if not URL_SCHEME(url):
 334             self.process_filename(url)
 335             return
 336         else:
 337             dists = list(distros_for_url(url))
 338             if dists:
 339                 if not self.url_ok(url):
 340                     return
 341                 self.debug("Found link: %s", url)
 342
 343         if dists or not retrieve or url in self.fetched_urls:
 344             list(map(self.add, dists))
 345             return  # don't need the actual page
 346
 347         if not self.url_ok(url):
 348             self.fetched_urls[url] = True
 349             return
 350
 351         self.info("Reading %s", url)
 352         self.fetched_urls[url] = True  # prevent multiple fetch attempts
 353         tmpl = "Download error on %s: %%s -- Some packages may not be found!"
 354         f = self.open_url(url, tmpl % url)
 355         if f is None:
 356             return
 357         if isinstance(f, urllib.error.HTTPError) and f.code == 401:
 358             self.info("Authentication error: %s" % f.msg)
 359         self.fetched_urls[f.url] = True
 360         if 'html' not in f.headers.get('content-type', '').lower():
 361             f.close()  # not html, we can't process it
 362             return
 363
 364         base = f.url  # handle redirects
 365         page = f.read()
 366         if not isinstance(page, str):
 367             # In Python 3 and got bytes but want str.
 368             if isinstance(f, urllib.error.HTTPError):
 369                 # Errors have no charset, assume latin1:
 370                 charset = 'latin-1'
 371             else:
 372                 charset = f.headers.get_param('charset') or 'latin-1'
 373             page = page.decode(charset, "ignore")
 374         f.close()
 375         for match in HREF.finditer(page):
 376             link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
 377             self.process_url(link)
 378         if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
 379             page = self.process_index(url, page)
 380
 381     def process_filename(self, fn, nested=False):
 382         # process filenames or directories
 383         if not os.path.exists(fn):
 384             self.warn("Not found: %s", fn)
 385             return
 386
 387         if os.path.isdir(fn) and not nested:
 388             path = os.path.realpath(fn)
 389             for item in os.listdir(path):
 390                 self.process_filename(os.path.join(path, item), True)
 391
 392         dists = distros_for_filename(fn)
 393         if dists:
 394             self.debug("Found: %s", fn)
 395             list(map(self.add, dists))
 396
 397     def url_ok(self, url, fatal=False):
 398         s = URL_SCHEME(url)
 399         is_file = s and s.group(1).lower() == 'file'
 400         if is_file or self.allows(urllib.parse.urlparse(url)[1]):
 401             return True
 402         msg = (
 403             "\nNote: Bypassing %s (disallowed host; see "
 404             "https://setuptools.pypa.io/en/latest/deprecated/"
 405             "easy_install.html#restricting-downloads-with-allow-hosts for details).\n"
 406         )
 407         if fatal:
 408             raise DistutilsError(msg % url)
 409         else:
 410             self.warn(msg, url)
 411
 412     def scan_egg_links(self, search_path):
 413         dirs = filter(os.path.isdir, search_path)
 414         egg_links = (
 415             (path, entry)
 416             for path in dirs
 417             for entry in os.listdir(path)
 418             if entry.endswith('.egg-link')
 419         )
 420         list(itertools.starmap(self.scan_egg_link, egg_links))
 421
 422     def scan_egg_link(self, path, entry):
 423         with open(os.path.join(path, entry)) as raw_lines:
 424             # filter non-empty lines
 425             lines = list(filter(None, map(str.strip, raw_lines)))
 426
 427         if len(lines) != 2:
 428             # format is not recognized; punt
 429             return
 430
 431         egg_path, setup_path = lines
 432
 433         for dist in find_distributions(os.path.join(path, egg_path)):
 434             dist.location = os.path.join(path, *lines)
 435             dist.precedence = SOURCE_DIST
 436             self.add(dist)
 437
 438     def _scan(self, link):
 439         # Process a URL to see if it's for a package page
 440         NO_MATCH_SENTINEL = None, None
 441         if not link.startswith(self.index_url):
 442             return NO_MATCH_SENTINEL
 443
 444         parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/')))
 445         if len(parts) != 2 or '#' in parts[1]:
 446             return NO_MATCH_SENTINEL
 447
 448         # it's a package page, sanitize and index it
 449         pkg = safe_name(parts[0])
 450         ver = safe_version(parts[1])
 451         self.package_pages.setdefault(pkg.lower(), {})[link] = True
 452         return to_filename(pkg), to_filename(ver)
 453
 454     def process_index(self, url, page):
 455         """Process the contents of a PyPI page"""
 456
 457         # process an index page into the package-page index
 458         for match in HREF.finditer(page):
 459             try:
 460                 self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
 461             except ValueError:
 462                 pass
 463
 464         pkg, ver = self._scan(url)  # ensure this page is in the page index
 465         if not pkg:
 466             return ""  # no sense double-scanning non-package pages
 467
 468         # process individual package page
 469         for new_url in find_external_links(url, page):
 470             # Process the found URL
 471             base, frag = egg_info_for_url(new_url)
 472             if base.endswith('.py') and not frag:
 473                 if ver:
 474                     new_url += '#egg=%s-%s' % (pkg, ver)
 475                 else:
 476                     self.need_version_info(url)
 477             self.scan_url(new_url)
 478
 479         return PYPI_MD5.sub(
 480             lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
 481         )
 482
 483     def need_version_info(self, url):
 484         self.scan_all(
 485             "Page at %s links to .py file(s) without version info; an index "
 486             "scan is required.",
 487             url,
 488         )
 489
 490     def scan_all(self, msg=None, *args):
 491         if self.index_url not in self.fetched_urls:
 492             if msg:
 493                 self.warn(msg, *args)
 494             self.info("Scanning index of all packages (this may take a while)")
 495         self.scan_url(self.index_url)
 496
 497     def find_packages(self, requirement):
 498         self.scan_url(self.index_url + requirement.unsafe_name + '/')
 499
 500         if not self.package_pages.get(requirement.key):
 501             # Fall back to safe version of the name
 502             self.scan_url(self.index_url + requirement.project_name + '/')
 503
 504         if not self.package_pages.get(requirement.key):
 505             # We couldn't find the target package, so search the index page too
 506             self.not_found_in_index(requirement)
 507
 508         for url in list(self.package_pages.get(requirement.key, ())):
 509             # scan each page that might be related to the desired package
 510             self.scan_url(url)
 511
 512     def obtain(self, requirement, installer=None):
 513         self.prescan()
 514         self.find_packages(requirement)
 515         for dist in self[requirement.key]:
 516             if dist in requirement:
 517                 return dist
 518             self.debug("%s does not match %s", requirement, dist)
 519         return super(PackageIndex, self).obtain(requirement, installer)
 520
 521     def check_hash(self, checker, filename, tfp):
 522         """
 523         checker is a ContentChecker
 524         """
 525         checker.report(self.debug, "Validating %%s checksum for %s" % filename)
 526         if not checker.is_valid():
 527             tfp.close()
 528             os.unlink(filename)
 529             raise DistutilsError(
 530                 "%s validation failed for %s; "
 531                 "possible download problem?"
 532                 % (checker.hash.name, os.path.basename(filename))
 533             )
 534
 535     def add_find_links(self, urls):
 536         """Add `urls` to the list that will be prescanned for searches"""
 537         for url in urls:
 538             if (
 539                 self.to_scan is None  # if we have already "gone online"
 540                 or not URL_SCHEME(url)  # or it's a local file/directory
 541                 or url.startswith('file:')
 542                 or list(distros_for_url(url))  # or a direct package link
 543             ):
 544                 # then go ahead and process it now
 545                 self.scan_url(url)
 546             else:
 547                 # otherwise, defer retrieval till later
 548                 self.to_scan.append(url)
 549
 550     def prescan(self):
 551         """Scan urls scheduled for prescanning (e.g. --find-links)"""
 552         if self.to_scan:
 553             list(map(self.scan_url, self.to_scan))
 554         self.to_scan = None  # from now on, go ahead and process immediately
 555
 556     def not_found_in_index(self, requirement):
 557         if self[requirement.key]:  # we've seen at least one distro
 558             meth, msg = self.info, "Couldn't retrieve index page for %r"
 559         else:  # no distros seen for this name, might be misspelled
 560             meth, msg = (
 561                 self.warn,
 562                 "Couldn't find index page for %r (maybe misspelled?)",
 563             )
 564         meth(msg, requirement.unsafe_name)
 565         self.scan_all()
 566
 567     def download(self, spec, tmpdir):
 568         """Locate and/or download `spec` to `tmpdir`, returning a local path
 569
 570         `spec` may be a ``Requirement`` object, or a string containing a URL,
 571         an existing local filename, or a project/version requirement spec
 572         (i.e. the string form of a ``Requirement`` object).  If it is the URL
 573         of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
 574         that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
 575         automatically created alongside the downloaded file.
 576
 577         If `spec` is a ``Requirement`` object or a string containing a
 578         project/version requirement spec, this method returns the location of
 579         a matching distribution (possibly after downloading it to `tmpdir`).
 580         If `spec` is a locally existing file or directory name, it is simply
 581         returned unchanged.  If `spec` is a URL, it is downloaded to a subpath
 582         of `tmpdir`, and the local filename is returned.  Various errors may be
 583         raised if a problem occurs during downloading.
 584         """
 585         if not isinstance(spec, Requirement):
 586             scheme = URL_SCHEME(spec)
 587             if scheme:
 588                 # It's a url, download it to tmpdir
 589                 found = self._download_url(scheme.group(1), spec, tmpdir)
 590                 base, fragment = egg_info_for_url(spec)
 591                 if base.endswith('.py'):
 592                     found = self.gen_setup(found, fragment, tmpdir)
 593                 return found
 594             elif os.path.exists(spec):
 595                 # Existing file or directory, just return it
 596                 return spec
 597             else:
 598                 spec = parse_requirement_arg(spec)
 599         return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
 600
 601     def fetch_distribution(  # noqa: C901  # is too complex (14)  # FIXME
 602         self,
 603         requirement,
 604         tmpdir,
 605         force_scan=False,
 606         source=False,
 607         develop_ok=False,
 608         local_index=None,
 609     ):
 610         """Obtain a distribution suitable for fulfilling `requirement`
 611
 612         `requirement` must be a ``pkg_resources.Requirement`` instance.
 613         If necessary, or if the `force_scan` flag is set, the requirement is
 614         searched for in the (online) package index as well as the locally
 615         installed packages.  If a distribution matching `requirement` is found,
 616         the returned distribution's ``location`` is the value you would have
 617         gotten from calling the ``download()`` method with the matching
 618         distribution's URL or filename.  If no matching distribution is found,
 619         ``None`` is returned.
 620
 621         If the `source` flag is set, only source distributions and source
 622         checkout links will be considered.  Unless the `develop_ok` flag is
 623         set, development and system eggs (i.e., those using the ``.egg-info``
 624         format) will be ignored.
 625         """
 626         # process a Requirement
 627         self.info("Searching for %s", requirement)
 628         skipped = {}
 629         dist = None
 630
 631         def find(req, env=None):
 632             if env is None:
 633                 env = self
 634             # Find a matching distribution; may be called more than once
 635
 636             for dist in env[req.key]:
 637                 if dist.precedence == DEVELOP_DIST and not develop_ok:
 638                     if dist not in skipped:
 639                         self.warn(
 640                             "Skipping development or system egg: %s",
 641                             dist,
 642                         )
 643                         skipped[dist] = 1
 644                     continue
 645
 646                 test = dist in req and (dist.precedence <= SOURCE_DIST or not source)
 647                 if test:
 648                     loc = self.download(dist.location, tmpdir)
 649                     dist.download_location = loc
 650                     if os.path.exists(dist.download_location):
 651                         return dist
 652
 653         if force_scan:
 654             self.prescan()
 655             self.find_packages(requirement)
 656             dist = find(requirement)
 657
 658         if not dist and local_index is not None:
 659             dist = find(requirement, local_index)
 660
 661         if dist is None:
 662             if self.to_scan is not None:
 663                 self.prescan()
 664             dist = find(requirement)
 665
 666         if dist is None and not force_scan:
 667             self.find_packages(requirement)
 668             dist = find(requirement)
 669
 670         if dist is None:
 671             self.warn(
 672                 "No local packages or working download links found for %s%s",
 673                 (source and "a source distribution of " or ""),
 674                 requirement,
 675             )
 676         else:
 677             self.info("Best match: %s", dist)
 678             return dist.clone(location=dist.download_location)
 679
 680     def fetch(self, requirement, tmpdir, force_scan=False, source=False):
 681         """Obtain a file suitable for fulfilling `requirement`
 682
 683         DEPRECATED; use the ``fetch_distribution()`` method now instead.  For
 684         backward compatibility, this routine is identical but returns the
 685         ``location`` of the downloaded distribution instead of a distribution
 686         object.
 687         """
 688         dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
 689         if dist is not None:
 690             return dist.location
 691         return None
 692
 693     def gen_setup(self, filename, fragment, tmpdir):
 694         match = EGG_FRAGMENT.match(fragment)
 695         dists = (
 696             match
 697             and [
 698                 d
 699                 for d in interpret_distro_name(filename, match.group(1), None)
 700                 if d.version
 701             ]
 702             or []
 703         )
 704
 705         if len(dists) == 1:  # unambiguous ``#egg`` fragment
 706             basename = os.path.basename(filename)
 707
 708             # Make sure the file has been downloaded to the temp dir.
 709             if os.path.dirname(filename) != tmpdir:
 710                 dst = os.path.join(tmpdir, basename)
 711                 if not (os.path.exists(dst) and os.path.samefile(filename, dst)):
 712                     shutil.copy2(filename, dst)
 713                     filename = dst
 714
 715             with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
 716                 file.write(
 717                     "from setuptools import setup\n"
 718                     "setup(name=%r, version=%r, py_modules=[%r])\n"
 719                     % (
 720                         dists[0].project_name,
 721                         dists[0].version,
 722                         os.path.splitext(basename)[0],
 723                     )
 724                 )
 725             return filename
 726
 727         elif match:
 728             raise DistutilsError(
 729                 "Can't unambiguously interpret project/version identifier %r; "
 730                 "any dashes in the name or version should be escaped using "
 731                 "underscores. %r" % (fragment, dists)
 732             )
 733         else:
 734             raise DistutilsError(
 735                 "Can't process plain .py files without an '#egg=name-version'"
 736                 " suffix to enable automatic setup script generation."
 737             )
 738
 739     dl_blocksize = 8192
 740
 741     def _download_to(self, url, filename):
 742         self.info("Downloading %s", url)
 743         # Download the file
 744         fp = None
 745         try:
 746             checker = HashChecker.from_url(url)
 747             fp = self.open_url(url)
 748             if isinstance(fp, urllib.error.HTTPError):
 749                 raise DistutilsError(
 750                     "Can't download %s: %s %s" % (url, fp.code, fp.msg)
 751                 )
 752             headers = fp.info()
 753             blocknum = 0
 754             bs = self.dl_blocksize
 755             size = -1
 756             if "content-length" in headers:
 757                 # Some servers return multiple Content-Length headers :(
 758                 sizes = headers.get_all('Content-Length')
 759                 size = max(map(int, sizes))
 760                 self.reporthook(url, filename, blocknum, bs, size)
 761             with open(filename, 'wb') as tfp:
 762                 while True:
 763                     block = fp.read(bs)
 764                     if block:
 765                         checker.feed(block)
 766                         tfp.write(block)
 767                         blocknum += 1
 768                         self.reporthook(url, filename, blocknum, bs, size)
 769                     else:
 770                         break
 771                 self.check_hash(checker, filename, tfp)
 772             return headers
 773         finally:
 774             if fp:
 775                 fp.close()
 776
 777     def reporthook(self, url, filename, blocknum, blksize, size):
 778         pass  # no-op
 779
 780     # FIXME:
 781     def open_url(self, url, warning=None):  # noqa: C901  # is too complex (12)
 782         if url.startswith('file:'):
 783             return local_open(url)
 784         try:
 785             return open_with_auth(url, self.opener)
 786         except (ValueError, http.client.InvalidURL) as v:
 787             msg = ' '.join([str(arg) for arg in v.args])
 788             if warning:
 789                 self.warn(warning, msg)
 790             else:
 791                 raise DistutilsError('%s %s' % (url, msg)) from v
 792         except urllib.error.HTTPError as v:
 793             return v
 794         except urllib.error.URLError as v:
 795             if warning:
 796                 self.warn(warning, v.reason)
 797             else:
 798                 raise DistutilsError(
 799                     "Download error for %s: %s" % (url, v.reason)
 800                 ) from v
 801         except http.client.BadStatusLine as v:
 802             if warning:
 803                 self.warn(warning, v.line)
 804             else:
 805                 raise DistutilsError(
 806                     '%s returned a bad status line. The server might be '
 807                     'down, %s' % (url, v.line)
 808                 ) from v
 809         except (http.client.HTTPException, socket.error) as v:
 810             if warning:
 811                 self.warn(warning, v)
 812             else:
 813                 raise DistutilsError("Download error for %s: %s" % (url, v)) from v
 814
 815     def _download_url(self, scheme, url, tmpdir):
 816         # Determine download filename
 817         #
 818         name, fragment = egg_info_for_url(url)
 819         if name:
 820             while '..' in name:
 821                 name = name.replace('..', '.').replace('\\', '_')
 822         else:
 823             name = "__downloaded__"  # default if URL has no path contents
 824
 825         if name.endswith('.egg.zip'):
 826             name = name[:-4]  # strip the extra .zip before download
 827
 828         filename = os.path.join(tmpdir, name)
 829
 830         # Download the file
 831         #
 832         if scheme == 'svn' or scheme.startswith('svn+'):
 833             return self._download_svn(url, filename)
 834         elif scheme == 'git' or scheme.startswith('git+'):
 835             return self._download_git(url, filename)
 836         elif scheme.startswith('hg+'):
 837             return self._download_hg(url, filename)
 838         elif scheme == 'file':
 839             return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
 840         else:
 841             self.url_ok(url, True)  # raises error if not allowed
 842             return self._attempt_download(url, filename)
 843
 844     def scan_url(self, url):
 845         self.process_url(url, True)
 846
 847     def _attempt_download(self, url, filename):
 848         headers = self._download_to(url, filename)
 849         if 'html' in headers.get('content-type', '').lower():
 850             return self._invalid_download_html(url, headers, filename)
 851         else:
 852             return filename
 853
 854     def _invalid_download_html(self, url, headers, filename):
 855         os.unlink(filename)
 856         raise DistutilsError(f"Unexpected HTML page found at {url}")
 857
 858     def _download_svn(self, url, _filename):
 859         raise DistutilsError(f"Invalid config, SVN download is not supported: {url}")
 860
 861     @staticmethod
 862     def _vcs_split_rev_from_url(url, pop_prefix=False):
 863         scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
 864
 865         scheme = scheme.split('+', 1)[-1]
 866
 867         # Some fragment identification fails
 868         path = path.split('#', 1)[0]
 869
 870         rev = None
 871         if '@' in path:
 872             path, rev = path.rsplit('@', 1)
 873
 874         # Also, discard fragment
 875         url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
 876
 877         return url, rev
 878
 879     def _download_git(self, url, filename):
 880         filename = filename.split('#', 1)[0]
 881         url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
 882
 883         self.info("Doing git clone from %s to %s", url, filename)
 884         os.system("git clone --quiet %s %s" % (url, filename))
 885
 886         if rev is not None:
 887             self.info("Checking out %s", rev)
 888             os.system(
 889                 "git -C %s checkout --quiet %s"
 890                 % (
 891                     filename,
 892                     rev,
 893                 )
 894             )
 895
 896         return filename
 897
 898     def _download_hg(self, url, filename):
 899         filename = filename.split('#', 1)[0]
 900         url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
 901
 902         self.info("Doing hg clone from %s to %s", url, filename)
 903         os.system("hg clone --quiet %s %s" % (url, filename))
 904
 905         if rev is not None:
 906             self.info("Updating to %s", rev)
 907             os.system(
 908                 "hg --cwd %s up -C -r %s -q"
 909                 % (
 910                     filename,
 911                     rev,
 912                 )
 913             )
 914
 915         return filename
 916
 917     def debug(self, msg, *args):
 918         log.debug(msg, *args)
 919
 920     def info(self, msg, *args):
 921         log.info(msg, *args)
 922
 923     def warn(self, msg, *args):
 924         log.warn(msg, *args)
 925
 926
 927 # This pattern matches a character entity reference (a decimal numeric
 928 # references, a hexadecimal numeric reference, or a named reference).
 929 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
 930
 931
 932 def decode_entity(match):
 933     what = match.group(0)
 934     return html.unescape(what)
 935
 936
 937 def htmldecode(text):
 938     """
 939     Decode HTML entities in the given text.
 940
 941     >>> htmldecode(
 942     ...     'https://../package_name-0.1.2.tar.gz'
 943     ...     '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
 944     'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
 945     """
 946     return entity_sub(decode_entity, text)
 947
 948
 949 def socket_timeout(timeout=15):
 950     def _socket_timeout(func):
 951         def _socket_timeout(*args, **kwargs):
 952             old_timeout = socket.getdefaulttimeout()
 953             socket.setdefaulttimeout(timeout)
 954             try:
 955                 return func(*args, **kwargs)
 956             finally:
 957                 socket.setdefaulttimeout(old_timeout)
 958
 959         return _socket_timeout
 960
 961     return _socket_timeout
 962
 963
 964 def _encode_auth(auth):
 965     """
 966     Encode auth from a URL suitable for an HTTP header.
 967     >>> str(_encode_auth('username%3Apassword'))
 968     'dXNlcm5hbWU6cGFzc3dvcmQ='
 969
 970     Long auth strings should not cause a newline to be inserted.
 971     >>> long_auth = 'username:' + 'password'*10
 972     >>> chr(10) in str(_encode_auth(long_auth))
 973     False
 974     """
 975     auth_s = urllib.parse.unquote(auth)
 976     # convert to bytes
 977     auth_bytes = auth_s.encode()
 978     encoded_bytes = base64.b64encode(auth_bytes)
 979     # convert back to a string
 980     encoded = encoded_bytes.decode()
 981     # strip the trailing carriage return
 982     return encoded.replace('\n', '')
 983
 984
 985 class Credential:
 986     """
 987     A username/password pair. Use like a namedtuple.
 988     """
 989
 990     def __init__(self, username, password):
 991         self.username = username
 992         self.password = password
 993
 994     def __iter__(self):
 995         yield self.username
 996         yield self.password
 997
 998     def __str__(self):
 999         return '%(username)s:%(password)s' % vars(self)
1000
1001
1002 class PyPIConfig(configparser.RawConfigParser):
1003     def __init__(self):
1004         """
1005         Load from ~/.pypirc
1006         """
1007         defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1008         super().__init__(defaults)
1009
1010         rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1011         if os.path.exists(rc):
1012             self.read(rc)
1013
1014     @property
1015     def creds_by_repository(self):
1016         sections_with_repositories = [
1017             section
1018             for section in self.sections()
1019             if self.get(section, 'repository').strip()
1020         ]
1021
1022         return dict(map(self._get_repo_cred, sections_with_repositories))
1023
1024     def _get_repo_cred(self, section):
1025         repo = self.get(section, 'repository').strip()
1026         return repo, Credential(
1027             self.get(section, 'username').strip(),
1028             self.get(section, 'password').strip(),
1029         )
1030
1031     def find_credential(self, url):
1032         """
1033         If the URL indicated appears to be a repository defined in this
1034         config, return the credential for that repository.
1035         """
1036         for repository, cred in self.creds_by_repository.items():
1037             if url.startswith(repository):
1038                 return cred
1039
1040
1041 def open_with_auth(url, opener=urllib.request.urlopen):
1042     """Open a urllib2 request, handling HTTP authentication"""
1043
1044     parsed = urllib.parse.urlparse(url)
1045     scheme, netloc, path, params, query, frag = parsed
1046
1047     # Double scheme does not raise on macOS as revealed by a
1048     # failing test. We would expect "nonnumeric port". Refs #20.
1049     if netloc.endswith(':'):
1050         raise http.client.InvalidURL("nonnumeric port: ''")
1051
1052     if scheme in ('http', 'https'):
1053         auth, address = _splituser(netloc)
1054     else:
1055         auth = None
1056
1057     if not auth:
1058         cred = PyPIConfig().find_credential(url)
1059         if cred:
1060             auth = str(cred)
1061             info = cred.username, url
1062             log.info('Authenticating as %s for %s (from .pypirc)', *info)
1063
1064     if auth:
1065         auth = "Basic " + _encode_auth(auth)
1066         parts = scheme, address, path, params, query, frag
1067         new_url = urllib.parse.urlunparse(parts)
1068         request = urllib.request.Request(new_url)
1069         request.add_header("Authorization", auth)
1070     else:
1071         request = urllib.request.Request(url)
1072
1073     request.add_header('User-Agent', user_agent)
1074     fp = opener(request)
1075
1076     if auth:
1077         # Put authentication info back into request URL if same host,
1078         # so that links found on the page will work
1079         s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1080         if s2 == scheme and h2 == address:
1081             parts = s2, netloc, path2, param2, query2, frag2
1082             fp.url = urllib.parse.urlunparse(parts)
1083
1084     return fp
1085
1086
1087 # copy of urllib.parse._splituser from Python 3.8
1088 def _splituser(host):
1089     """splituser('user[:passwd]@host[:port]')
1090     --> 'user[:passwd]', 'host[:port]'."""
1091     user, delim, host = host.rpartition('@')
1092     return (user if delim else None), host
1093
1094
1095 # adding a timeout to avoid freezing package_index
1096 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1097
1098
1099 def fix_sf_url(url):
1100     return url  # backward compatibility
1101
1102
1103 def local_open(url):
1104     """Read a local path, with special support for directories"""
1105     scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1106     filename = urllib.request.url2pathname(path)
1107     if os.path.isfile(filename):
1108         return urllib.request.urlopen(url)
1109     elif path.endswith('/') and os.path.isdir(filename):
1110         files = []
1111         for f in os.listdir(filename):
1112             filepath = os.path.join(filename, f)
1113             if f == 'index.html':
1114                 with open(filepath, 'r') as fp:
1115                     body = fp.read()
1116                 break
1117             elif os.path.isdir(filepath):
1118                 f += '/'
1119             files.append('<a href="{name}">{name}</a>'.format(name=f))
1120         else:
1121             tmpl = (
1122                 "<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
1123             )
1124             body = tmpl.format(url=url, files='\n'.join(files))
1125         status, message = 200, "OK"
1126     else:
1127         status, message, body = 404, "Path not found", "Not found"
1128
1129     headers = {'content-type': 'text/html'}
1130     body_stream = io.StringIO(body)
1131     return urllib.error.HTTPError(url, status, message, headers, body_stream)