1 """PyPI and direct package downloading."""
18 from functools
import wraps
21 from pkg_resources
import (
37 from distutils
import log
38 from distutils
.errors
import DistutilsError
39 from fnmatch
import translate
40 from setuptools
.wheel
import Wheel
41 from setuptools
.extern
.more_itertools
import unique_everseen
44 EGG_FRAGMENT
= re
.compile(r
'^egg=([-A-Za-z0-9_.+!]+)$')
45 HREF
= re
.compile(r
"""href\s*=\s*['"]?([^'"> ]+)""", re
.I
)
46 PYPI_MD5
= re
.compile(
47 r
'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
48 r
'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\)'
50 URL_SCHEME
= re
.compile('([-+.a-z0-9]{2,}):', re
.I
).match
51 EXTENSIONS
= ".tar.gz .tar.bz2 .tar .zip .tgz".split()
56 'parse_bdist_wininst',
57 'interpret_distro_name',
62 _tmpl
= "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
63 user_agent
= _tmpl
.format(
64 py_major
='{}.{}'.format(*sys
.version_info
), setuptools
=setuptools
68 def parse_requirement_arg(spec
):
70 return Requirement
.parse(spec
)
71 except ValueError as e
:
73 "Not a URL, existing file, or requirement spec: %r" % (spec
,)
77 def parse_bdist_wininst(name
):
78 """Return (base,pyversion) or (None,None) for possible .exe name"""
81 base
, py_ver
, plat
= None, None, None
83 if lower
.endswith('.exe'):
84 if lower
.endswith('.win32.exe'):
87 elif lower
.startswith('.win32-py', -16):
91 elif lower
.endswith('.win-amd64.exe'):
94 elif lower
.startswith('.win-amd64-py', -20):
98 return base
, py_ver
, plat
101 def egg_info_for_url(url
):
102 parts
= urllib
.parse
.urlparse(url
)
103 scheme
, server
, path
, parameters
, query
, fragment
= parts
104 base
= urllib
.parse
.unquote(path
.split('/')[-1])
105 if server
== 'sourceforge.net' and base
== 'download': # XXX Yuck
106 base
= urllib
.parse
.unquote(path
.split('/')[-2])
108 base
, fragment
= base
.split('#', 1)
109 return base
, fragment
112 def distros_for_url(url
, metadata
=None):
113 """Yield egg or source distribution objects that might be found at a URL"""
114 base
, fragment
= egg_info_for_url(url
)
115 for dist
in distros_for_location(url
, base
, metadata
):
118 match
= EGG_FRAGMENT
.match(fragment
)
120 for dist
in interpret_distro_name(
121 url
, match
.group(1), metadata
, precedence
=CHECKOUT_DIST
126 def distros_for_location(location
, basename
, metadata
=None):
127 """Yield egg or source distribution objects based on basename"""
128 if basename
.endswith('.egg.zip'):
129 basename
= basename
[:-4] # strip the .zip
130 if basename
.endswith('.egg') and '-' in basename
:
131 # only one, unambiguous interpretation
132 return [Distribution
.from_location(location
, basename
, metadata
)]
133 if basename
.endswith('.whl') and '-' in basename
:
134 wheel
= Wheel(basename
)
135 if not wheel
.is_compatible():
140 project_name
=wheel
.project_name
,
141 version
=wheel
.version
,
142 # Increase priority over eggs.
143 precedence
=EGG_DIST
+ 1,
146 if basename
.endswith('.exe'):
147 win_base
, py_ver
, platform
= parse_bdist_wininst(basename
)
148 if win_base
is not None:
149 return interpret_distro_name(
150 location
, win_base
, metadata
, py_ver
, BINARY_DIST
, platform
152 # Try source distro extensions (.zip, .tgz, etc.)
154 for ext
in EXTENSIONS
:
155 if basename
.endswith(ext
):
156 basename
= basename
[: -len(ext
)]
157 return interpret_distro_name(location
, basename
, metadata
)
158 return [] # no extension matched
161 def distros_for_filename(filename
, metadata
=None):
162 """Yield possible egg or source distribution objects based on a filename"""
163 return distros_for_location(
164 normalize_path(filename
), os
.path
.basename(filename
), metadata
168 def interpret_distro_name(
169 location
, basename
, metadata
, py_version
=None, precedence
=SOURCE_DIST
, platform
=None
171 """Generate the interpretation of a source distro name
173 Note: if `location` is a filesystem filename, you should call
174 ``pkg_resources.normalize_path()`` on it before passing it to this
178 parts
= basename
.split('-')
179 if not py_version
and any(re
.match(r
'py\d\.\d$', p
) for p
in parts
[2:]):
180 # it is a bdist_dumb, not an sdist -- bail out
183 # find the pivot (p) that splits the name from the version.
184 # infer the version as the first item that has a digit.
185 for p
in range(len(parts
)):
186 if parts
[p
][:1].isdigit():
196 py_version
=py_version
,
197 precedence
=precedence
,
202 def unique_values(func
):
204 Wrap a function returning an iterable such that the resulting iterable
205 only ever yields unique items.
209 def wrapper(*args
, **kwargs
):
210 return unique_everseen(func(*args
, **kwargs
))
215 REL
= re
.compile(r
"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re
.I
)
217 Regex for an HTML tag with 'rel="val"' attributes.
222 def find_external_links(url
, page
):
223 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
225 for match
in REL
.finditer(page
):
226 tag
, rel
= match
.groups()
227 rels
= set(map(str.strip
, rel
.lower().split(',')))
228 if 'homepage' in rels
or 'download' in rels
:
229 for match
in HREF
.finditer(tag
):
230 yield urllib
.parse
.urljoin(url
, htmldecode(match
.group(1)))
232 for tag
in ("<th>Home Page", "<th>Download URL"):
235 match
= HREF
.search(page
, pos
)
237 yield urllib
.parse
.urljoin(url
, htmldecode(match
.group(1)))
240 class ContentChecker
:
242 A null content checker that defines the interface for checking content
245 def feed(self
, block
):
247 Feed a block of data to the hash.
253 Check the hash. Return False if validation fails.
257 def report(self
, reporter
, template
):
259 Call reporter with information about the checker (hash name)
260 substituted into the template.
265 class HashChecker(ContentChecker
):
266 pattern
= re
.compile(
267 r
'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
268 r
'(?P<expected>[a-f0-9]+)'
271 def __init__(self
, hash_name
, expected
):
272 self
.hash_name
= hash_name
273 self
.hash = hashlib
.new(hash_name
)
274 self
.expected
= expected
277 def from_url(cls
, url
):
278 "Construct a (possibly null) ContentChecker from a URL"
279 fragment
= urllib
.parse
.urlparse(url
)[-1]
281 return ContentChecker()
282 match
= cls
.pattern
.search(fragment
)
284 return ContentChecker()
285 return cls(**match
.groupdict())
287 def feed(self
, block
):
288 self
.hash.update(block
)
291 return self
.hash.hexdigest() == self
.expected
293 def report(self
, reporter
, template
):
294 msg
= template
% self
.hash_name
298 class PackageIndex(Environment
):
299 """A distribution index that scans web pages for download URLs"""
303 index_url
="https://pypi.org/simple/",
310 super().__init
__(*args
, **kw
)
311 self
.index_url
= index_url
+ "/"[: not index_url
.endswith('/')]
312 self
.scanned_urls
= {}
313 self
.fetched_urls
= {}
314 self
.package_pages
= {}
315 self
.allows
= re
.compile('|'.join(map(translate
, hosts
))).match
317 self
.opener
= urllib
.request
.urlopen
320 # ignore invalid versions
322 parse_version(dist
.version
)
325 return super().add(dist
)
327 # FIXME: 'PackageIndex.process_url' is too complex (14)
328 def process_url(self
, url
, retrieve
=False): # noqa: C901
329 """Evaluate a URL as a possible download, and maybe retrieve it"""
330 if url
in self
.scanned_urls
and not retrieve
:
332 self
.scanned_urls
[url
] = True
333 if not URL_SCHEME(url
):
334 self
.process_filename(url
)
337 dists
= list(distros_for_url(url
))
339 if not self
.url_ok(url
):
341 self
.debug("Found link: %s", url
)
343 if dists
or not retrieve
or url
in self
.fetched_urls
:
344 list(map(self
.add
, dists
))
345 return # don't need the actual page
347 if not self
.url_ok(url
):
348 self
.fetched_urls
[url
] = True
351 self
.info("Reading %s", url
)
352 self
.fetched_urls
[url
] = True # prevent multiple fetch attempts
353 tmpl
= "Download error on %s: %%s -- Some packages may not be found!"
354 f
= self
.open_url(url
, tmpl
% url
)
357 if isinstance(f
, urllib
.error
.HTTPError
) and f
.code
== 401:
358 self
.info("Authentication error: %s" % f
.msg
)
359 self
.fetched_urls
[f
.url
] = True
360 if 'html' not in f
.headers
.get('content-type', '').lower():
361 f
.close() # not html, we can't process it
364 base
= f
.url
# handle redirects
366 if not isinstance(page
, str):
367 # In Python 3 and got bytes but want str.
368 if isinstance(f
, urllib
.error
.HTTPError
):
369 # Errors have no charset, assume latin1:
372 charset
= f
.headers
.get_param('charset') or 'latin-1'
373 page
= page
.decode(charset
, "ignore")
375 for match
in HREF
.finditer(page
):
376 link
= urllib
.parse
.urljoin(base
, htmldecode(match
.group(1)))
377 self
.process_url(link
)
378 if url
.startswith(self
.index_url
) and getattr(f
, 'code', None) != 404:
379 page
= self
.process_index(url
, page
)
381 def process_filename(self
, fn
, nested
=False):
382 # process filenames or directories
383 if not os
.path
.exists(fn
):
384 self
.warn("Not found: %s", fn
)
387 if os
.path
.isdir(fn
) and not nested
:
388 path
= os
.path
.realpath(fn
)
389 for item
in os
.listdir(path
):
390 self
.process_filename(os
.path
.join(path
, item
), True)
392 dists
= distros_for_filename(fn
)
394 self
.debug("Found: %s", fn
)
395 list(map(self
.add
, dists
))
397 def url_ok(self
, url
, fatal
=False):
399 is_file
= s
and s
.group(1).lower() == 'file'
400 if is_file
or self
.allows(urllib
.parse
.urlparse(url
)[1]):
403 "\nNote: Bypassing %s (disallowed host; see "
404 "https://setuptools.pypa.io/en/latest/deprecated/"
405 "easy_install.html#restricting-downloads-with-allow-hosts for details).\n"
408 raise DistutilsError(msg
% url
)
412 def scan_egg_links(self
, search_path
):
413 dirs
= filter(os
.path
.isdir
, search_path
)
417 for entry
in os
.listdir(path
)
418 if entry
.endswith('.egg-link')
420 list(itertools
.starmap(self
.scan_egg_link
, egg_links
))
422 def scan_egg_link(self
, path
, entry
):
423 with open(os
.path
.join(path
, entry
)) as raw_lines
:
424 # filter non-empty lines
425 lines
= list(filter(None, map(str.strip
, raw_lines
)))
428 # format is not recognized; punt
431 egg_path
, setup_path
= lines
433 for dist
in find_distributions(os
.path
.join(path
, egg_path
)):
434 dist
.location
= os
.path
.join(path
, *lines
)
435 dist
.precedence
= SOURCE_DIST
438 def _scan(self
, link
):
439 # Process a URL to see if it's for a package page
440 NO_MATCH_SENTINEL
= None, None
441 if not link
.startswith(self
.index_url
):
442 return NO_MATCH_SENTINEL
444 parts
= list(map(urllib
.parse
.unquote
, link
[len(self
.index_url
) :].split('/')))
445 if len(parts
) != 2 or '#' in parts
[1]:
446 return NO_MATCH_SENTINEL
448 # it's a package page, sanitize and index it
449 pkg
= safe_name(parts
[0])
450 ver
= safe_version(parts
[1])
451 self
.package_pages
.setdefault(pkg
.lower(), {})[link
] = True
452 return to_filename(pkg
), to_filename(ver
)
454 def process_index(self
, url
, page
):
455 """Process the contents of a PyPI page"""
457 # process an index page into the package-page index
458 for match
in HREF
.finditer(page
):
460 self
._scan
(urllib
.parse
.urljoin(url
, htmldecode(match
.group(1))))
464 pkg
, ver
= self
._scan
(url
) # ensure this page is in the page index
466 return "" # no sense double-scanning non-package pages
468 # process individual package page
469 for new_url
in find_external_links(url
, page
):
470 # Process the found URL
471 base
, frag
= egg_info_for_url(new_url
)
472 if base
.endswith('.py') and not frag
:
474 new_url
+= '#egg=%s-%s' % (pkg
, ver
)
476 self
.need_version_info(url
)
477 self
.scan_url(new_url
)
480 lambda m
: '<a href="%s#md5=%s">%s</a>' % m
.group(1, 3, 2), page
483 def need_version_info(self
, url
):
485 "Page at %s links to .py file(s) without version info; an index "
490 def scan_all(self
, msg
=None, *args
):
491 if self
.index_url
not in self
.fetched_urls
:
493 self
.warn(msg
, *args
)
494 self
.info("Scanning index of all packages (this may take a while)")
495 self
.scan_url(self
.index_url
)
497 def find_packages(self
, requirement
):
498 self
.scan_url(self
.index_url
+ requirement
.unsafe_name
+ '/')
500 if not self
.package_pages
.get(requirement
.key
):
501 # Fall back to safe version of the name
502 self
.scan_url(self
.index_url
+ requirement
.project_name
+ '/')
504 if not self
.package_pages
.get(requirement
.key
):
505 # We couldn't find the target package, so search the index page too
506 self
.not_found_in_index(requirement
)
508 for url
in list(self
.package_pages
.get(requirement
.key
, ())):
509 # scan each page that might be related to the desired package
512 def obtain(self
, requirement
, installer
=None):
514 self
.find_packages(requirement
)
515 for dist
in self
[requirement
.key
]:
516 if dist
in requirement
:
518 self
.debug("%s does not match %s", requirement
, dist
)
519 return super(PackageIndex
, self
).obtain(requirement
, installer
)
521 def check_hash(self
, checker
, filename
, tfp
):
523 checker is a ContentChecker
525 checker
.report(self
.debug
, "Validating %%s checksum for %s" % filename
)
526 if not checker
.is_valid():
529 raise DistutilsError(
530 "%s validation failed for %s; "
531 "possible download problem?"
532 % (checker
.hash.name
, os
.path
.basename(filename
))
535 def add_find_links(self
, urls
):
536 """Add `urls` to the list that will be prescanned for searches"""
539 self
.to_scan
is None # if we have already "gone online"
540 or not URL_SCHEME(url
) # or it's a local file/directory
541 or url
.startswith('file:')
542 or list(distros_for_url(url
)) # or a direct package link
544 # then go ahead and process it now
547 # otherwise, defer retrieval till later
548 self
.to_scan
.append(url
)
551 """Scan urls scheduled for prescanning (e.g. --find-links)"""
553 list(map(self
.scan_url
, self
.to_scan
))
554 self
.to_scan
= None # from now on, go ahead and process immediately
556 def not_found_in_index(self
, requirement
):
557 if self
[requirement
.key
]: # we've seen at least one distro
558 meth
, msg
= self
.info
, "Couldn't retrieve index page for %r"
559 else: # no distros seen for this name, might be misspelled
562 "Couldn't find index page for %r (maybe misspelled?)",
564 meth(msg
, requirement
.unsafe_name
)
567 def download(self
, spec
, tmpdir
):
568 """Locate and/or download `spec` to `tmpdir`, returning a local path
570 `spec` may be a ``Requirement`` object, or a string containing a URL,
571 an existing local filename, or a project/version requirement spec
572 (i.e. the string form of a ``Requirement`` object). If it is the URL
573 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
574 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
575 automatically created alongside the downloaded file.
577 If `spec` is a ``Requirement`` object or a string containing a
578 project/version requirement spec, this method returns the location of
579 a matching distribution (possibly after downloading it to `tmpdir`).
580 If `spec` is a locally existing file or directory name, it is simply
581 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
582 of `tmpdir`, and the local filename is returned. Various errors may be
583 raised if a problem occurs during downloading.
585 if not isinstance(spec
, Requirement
):
586 scheme
= URL_SCHEME(spec
)
588 # It's a url, download it to tmpdir
589 found
= self
._download
_url
(scheme
.group(1), spec
, tmpdir
)
590 base
, fragment
= egg_info_for_url(spec
)
591 if base
.endswith('.py'):
592 found
= self
.gen_setup(found
, fragment
, tmpdir
)
594 elif os
.path
.exists(spec
):
595 # Existing file or directory, just return it
598 spec
= parse_requirement_arg(spec
)
599 return getattr(self
.fetch_distribution(spec
, tmpdir
), 'location', None)
601 def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
610 """Obtain a distribution suitable for fulfilling `requirement`
612 `requirement` must be a ``pkg_resources.Requirement`` instance.
613 If necessary, or if the `force_scan` flag is set, the requirement is
614 searched for in the (online) package index as well as the locally
615 installed packages. If a distribution matching `requirement` is found,
616 the returned distribution's ``location`` is the value you would have
617 gotten from calling the ``download()`` method with the matching
618 distribution's URL or filename. If no matching distribution is found,
619 ``None`` is returned.
621 If the `source` flag is set, only source distributions and source
622 checkout links will be considered. Unless the `develop_ok` flag is
623 set, development and system eggs (i.e., those using the ``.egg-info``
624 format) will be ignored.
626 # process a Requirement
627 self
.info("Searching for %s", requirement
)
631 def find(req
, env
=None):
634 # Find a matching distribution; may be called more than once
636 for dist
in env
[req
.key
]:
637 if dist
.precedence
== DEVELOP_DIST
and not develop_ok
:
638 if dist
not in skipped
:
640 "Skipping development or system egg: %s",
646 test
= dist
in req
and (dist
.precedence
<= SOURCE_DIST
or not source
)
648 loc
= self
.download(dist
.location
, tmpdir
)
649 dist
.download_location
= loc
650 if os
.path
.exists(dist
.download_location
):
655 self
.find_packages(requirement
)
656 dist
= find(requirement
)
658 if not dist
and local_index
is not None:
659 dist
= find(requirement
, local_index
)
662 if self
.to_scan
is not None:
664 dist
= find(requirement
)
666 if dist
is None and not force_scan
:
667 self
.find_packages(requirement
)
668 dist
= find(requirement
)
672 "No local packages or working download links found for %s%s",
673 (source
and "a source distribution of " or ""),
677 self
.info("Best match: %s", dist
)
678 return dist
.clone(location
=dist
.download_location
)
680 def fetch(self
, requirement
, tmpdir
, force_scan
=False, source
=False):
681 """Obtain a file suitable for fulfilling `requirement`
683 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
684 backward compatibility, this routine is identical but returns the
685 ``location`` of the downloaded distribution instead of a distribution
688 dist
= self
.fetch_distribution(requirement
, tmpdir
, force_scan
, source
)
693 def gen_setup(self
, filename
, fragment
, tmpdir
):
694 match
= EGG_FRAGMENT
.match(fragment
)
699 for d
in interpret_distro_name(filename
, match
.group(1), None)
705 if len(dists
) == 1: # unambiguous ``#egg`` fragment
706 basename
= os
.path
.basename(filename
)
708 # Make sure the file has been downloaded to the temp dir.
709 if os
.path
.dirname(filename
) != tmpdir
:
710 dst
= os
.path
.join(tmpdir
, basename
)
711 if not (os
.path
.exists(dst
) and os
.path
.samefile(filename
, dst
)):
712 shutil
.copy2(filename
, dst
)
715 with open(os
.path
.join(tmpdir
, 'setup.py'), 'w') as file:
717 "from setuptools import setup\n"
718 "setup(name=%r, version=%r, py_modules=[%r])\n"
720 dists
[0].project_name
,
722 os
.path
.splitext(basename
)[0],
728 raise DistutilsError(
729 "Can't unambiguously interpret project/version identifier %r; "
730 "any dashes in the name or version should be escaped using "
731 "underscores. %r" % (fragment
, dists
)
734 raise DistutilsError(
735 "Can't process plain .py files without an '#egg=name-version'"
736 " suffix to enable automatic setup script generation."
741 def _download_to(self
, url
, filename
):
742 self
.info("Downloading %s", url
)
746 checker
= HashChecker
.from_url(url
)
747 fp
= self
.open_url(url
)
748 if isinstance(fp
, urllib
.error
.HTTPError
):
749 raise DistutilsError(
750 "Can't download %s: %s %s" % (url
, fp
.code
, fp
.msg
)
754 bs
= self
.dl_blocksize
756 if "content-length" in headers
:
757 # Some servers return multiple Content-Length headers :(
758 sizes
= headers
.get_all('Content-Length')
759 size
= max(map(int, sizes
))
760 self
.reporthook(url
, filename
, blocknum
, bs
, size
)
761 with open(filename
, 'wb') as tfp
:
768 self
.reporthook(url
, filename
, blocknum
, bs
, size
)
771 self
.check_hash(checker
, filename
, tfp
)
777 def reporthook(self
, url
, filename
, blocknum
, blksize
, size
):
781 def open_url(self
, url
, warning
=None): # noqa: C901 # is too complex (12)
782 if url
.startswith('file:'):
783 return local_open(url
)
785 return open_with_auth(url
, self
.opener
)
786 except (ValueError, http
.client
.InvalidURL
) as v
:
787 msg
= ' '.join([str(arg
) for arg
in v
.args
])
789 self
.warn(warning
, msg
)
791 raise DistutilsError('%s %s' % (url
, msg
)) from v
792 except urllib
.error
.HTTPError
as v
:
794 except urllib
.error
.URLError
as v
:
796 self
.warn(warning
, v
.reason
)
798 raise DistutilsError(
799 "Download error for %s: %s" % (url
, v
.reason
)
801 except http
.client
.BadStatusLine
as v
:
803 self
.warn(warning
, v
.line
)
805 raise DistutilsError(
806 '%s returned a bad status line. The server might be '
807 'down, %s' % (url
, v
.line
)
809 except (http
.client
.HTTPException
, socket
.error
) as v
:
811 self
.warn(warning
, v
)
813 raise DistutilsError("Download error for %s: %s" % (url
, v
)) from v
815 def _download_url(self
, scheme
, url
, tmpdir
):
816 # Determine download filename
818 name
, fragment
= egg_info_for_url(url
)
821 name
= name
.replace('..', '.').replace('\\', '_')
823 name
= "__downloaded__" # default if URL has no path contents
825 if name
.endswith('.egg.zip'):
826 name
= name
[:-4] # strip the extra .zip before download
828 filename
= os
.path
.join(tmpdir
, name
)
832 if scheme
== 'svn' or scheme
.startswith('svn+'):
833 return self
._download
_svn
(url
, filename
)
834 elif scheme
== 'git' or scheme
.startswith('git+'):
835 return self
._download
_git
(url
, filename
)
836 elif scheme
.startswith('hg+'):
837 return self
._download
_hg
(url
, filename
)
838 elif scheme
== 'file':
839 return urllib
.request
.url2pathname(urllib
.parse
.urlparse(url
)[2])
841 self
.url_ok(url
, True) # raises error if not allowed
842 return self
._attempt
_download
(url
, filename
)
844 def scan_url(self
, url
):
845 self
.process_url(url
, True)
847 def _attempt_download(self
, url
, filename
):
848 headers
= self
._download
_to
(url
, filename
)
849 if 'html' in headers
.get('content-type', '').lower():
850 return self
._invalid
_download
_html
(url
, headers
, filename
)
854 def _invalid_download_html(self
, url
, headers
, filename
):
856 raise DistutilsError(f
"Unexpected HTML page found at {url}")
858 def _download_svn(self
, url
, _filename
):
859 raise DistutilsError(f
"Invalid config, SVN download is not supported: {url}")
862 def _vcs_split_rev_from_url(url
, pop_prefix
=False):
863 scheme
, netloc
, path
, query
, frag
= urllib
.parse
.urlsplit(url
)
865 scheme
= scheme
.split('+', 1)[-1]
867 # Some fragment identification fails
868 path
= path
.split('#', 1)[0]
872 path
, rev
= path
.rsplit('@', 1)
874 # Also, discard fragment
875 url
= urllib
.parse
.urlunsplit((scheme
, netloc
, path
, query
, ''))
879 def _download_git(self
, url
, filename
):
880 filename
= filename
.split('#', 1)[0]
881 url
, rev
= self
._vcs
_split
_rev
_from
_url
(url
, pop_prefix
=True)
883 self
.info("Doing git clone from %s to %s", url
, filename
)
884 os
.system("git clone --quiet %s %s" % (url
, filename
))
887 self
.info("Checking out %s", rev
)
889 "git -C %s checkout --quiet %s"
898 def _download_hg(self
, url
, filename
):
899 filename
= filename
.split('#', 1)[0]
900 url
, rev
= self
._vcs
_split
_rev
_from
_url
(url
, pop_prefix
=True)
902 self
.info("Doing hg clone from %s to %s", url
, filename
)
903 os
.system("hg clone --quiet %s %s" % (url
, filename
))
906 self
.info("Updating to %s", rev
)
908 "hg --cwd %s up -C -r %s -q"
917 def debug(self
, msg
, *args
):
918 log
.debug(msg
, *args
)
920 def info(self
, msg
, *args
):
923 def warn(self
, msg
, *args
):
927 # This pattern matches a character entity reference (a decimal numeric
928 # references, a hexadecimal numeric reference, or a named reference).
929 entity_sub
= re
.compile(r
'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
932 def decode_entity(match
):
933 what
= match
.group(0)
934 return html
.unescape(what
)
937 def htmldecode(text
):
939 Decode HTML entities in the given text.
942 ... 'https://../package_name-0.1.2.tar.gz'
943 ... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz')
944 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
946 return entity_sub(decode_entity
, text
)
949 def socket_timeout(timeout
=15):
950 def _socket_timeout(func
):
951 def _socket_timeout(*args
, **kwargs
):
952 old_timeout
= socket
.getdefaulttimeout()
953 socket
.setdefaulttimeout(timeout
)
955 return func(*args
, **kwargs
)
957 socket
.setdefaulttimeout(old_timeout
)
959 return _socket_timeout
961 return _socket_timeout
964 def _encode_auth(auth
):
966 Encode auth from a URL suitable for an HTTP header.
967 >>> str(_encode_auth('username%3Apassword'))
968 'dXNlcm5hbWU6cGFzc3dvcmQ='
970 Long auth strings should not cause a newline to be inserted.
971 >>> long_auth = 'username:' + 'password'*10
972 >>> chr(10) in str(_encode_auth(long_auth))
975 auth_s
= urllib
.parse
.unquote(auth
)
977 auth_bytes
= auth_s
.encode()
978 encoded_bytes
= base64
.b64encode(auth_bytes
)
979 # convert back to a string
980 encoded
= encoded_bytes
.decode()
981 # strip the trailing carriage return
982 return encoded
.replace('\n', '')
987 A username/password pair. Use like a namedtuple.
990 def __init__(self
, username
, password
):
991 self
.username
= username
992 self
.password
= password
999 return '%(username)s:%(password)s' % vars(self
)
1002 class PyPIConfig(configparser
.RawConfigParser
):
1007 defaults
= dict.fromkeys(['username', 'password', 'repository'], '')
1008 super().__init
__(defaults
)
1010 rc
= os
.path
.join(os
.path
.expanduser('~'), '.pypirc')
1011 if os
.path
.exists(rc
):
1015 def creds_by_repository(self
):
1016 sections_with_repositories
= [
1018 for section
in self
.sections()
1019 if self
.get(section
, 'repository').strip()
1022 return dict(map(self
._get
_repo
_cred
, sections_with_repositories
))
1024 def _get_repo_cred(self
, section
):
1025 repo
= self
.get(section
, 'repository').strip()
1026 return repo
, Credential(
1027 self
.get(section
, 'username').strip(),
1028 self
.get(section
, 'password').strip(),
1031 def find_credential(self
, url
):
1033 If the URL indicated appears to be a repository defined in this
1034 config, return the credential for that repository.
1036 for repository
, cred
in self
.creds_by_repository
.items():
1037 if url
.startswith(repository
):
1041 def open_with_auth(url
, opener
=urllib
.request
.urlopen
):
1042 """Open a urllib2 request, handling HTTP authentication"""
1044 parsed
= urllib
.parse
.urlparse(url
)
1045 scheme
, netloc
, path
, params
, query
, frag
= parsed
1047 # Double scheme does not raise on macOS as revealed by a
1048 # failing test. We would expect "nonnumeric port". Refs #20.
1049 if netloc
.endswith(':'):
1050 raise http
.client
.InvalidURL("nonnumeric port: ''")
1052 if scheme
in ('http', 'https'):
1053 auth
, address
= _splituser(netloc
)
1058 cred
= PyPIConfig().find_credential(url
)
1061 info
= cred
.username
, url
1062 log
.info('Authenticating as %s for %s (from .pypirc)', *info
)
1065 auth
= "Basic " + _encode_auth(auth
)
1066 parts
= scheme
, address
, path
, params
, query
, frag
1067 new_url
= urllib
.parse
.urlunparse(parts
)
1068 request
= urllib
.request
.Request(new_url
)
1069 request
.add_header("Authorization", auth
)
1071 request
= urllib
.request
.Request(url
)
1073 request
.add_header('User-Agent', user_agent
)
1074 fp
= opener(request
)
1077 # Put authentication info back into request URL if same host,
1078 # so that links found on the page will work
1079 s2
, h2
, path2
, param2
, query2
, frag2
= urllib
.parse
.urlparse(fp
.url
)
1080 if s2
== scheme
and h2
== address
:
1081 parts
= s2
, netloc
, path2
, param2
, query2
, frag2
1082 fp
.url
= urllib
.parse
.urlunparse(parts
)
1087 # copy of urllib.parse._splituser from Python 3.8
1088 def _splituser(host
):
1089 """splituser('user[:passwd]@host[:port]')
1090 --> 'user[:passwd]', 'host[:port]'."""
1091 user
, delim
, host
= host
.rpartition('@')
1092 return (user
if delim
else None), host
1095 # adding a timeout to avoid freezing package_index
1096 open_with_auth
= socket_timeout(_SOCKET_TIMEOUT
)(open_with_auth
)
1099 def fix_sf_url(url
):
1100 return url
# backward compatibility
1103 def local_open(url
):
1104 """Read a local path, with special support for directories"""
1105 scheme
, server
, path
, param
, query
, frag
= urllib
.parse
.urlparse(url
)
1106 filename
= urllib
.request
.url2pathname(path
)
1107 if os
.path
.isfile(filename
):
1108 return urllib
.request
.urlopen(url
)
1109 elif path
.endswith('/') and os
.path
.isdir(filename
):
1111 for f
in os
.listdir(filename
):
1112 filepath
= os
.path
.join(filename
, f
)
1113 if f
== 'index.html':
1114 with open(filepath
, 'r') as fp
:
1117 elif os
.path
.isdir(filepath
):
1119 files
.append('<a href="{name}">{name}</a>'.format(name
=f
))
1122 "<html><head><title>{url}</title>" "</head><body>{files}</body></html>"
1124 body
= tmpl
.format(url
=url
, files
='\n'.join(files
))
1125 status
, message
= 200, "OK"
1127 status
, message
, body
= 404, "Path not found", "Not found"
1129 headers
= {'content-type': 'text/html'}
1130 body_stream
= io
.StringIO(body
)
1131 return urllib
.error
.HTTPError(url
, status
, message
, headers
, body_stream
)