1 """Prepares a distribution for installation
4 # The following comment should be removed at some point in the future.
5 # mypy: strict-optional=False
11 from typing
import Dict
, Iterable
, List
, Optional
13 from pip
._vendor
.packaging
.utils
import canonicalize_name
15 from pip
._internal
.distributions
import make_distribution_for_install_requirement
16 from pip
._internal
.distributions
.installed
import InstalledDistribution
17 from pip
._internal
.exceptions
import (
18 DirectoryUrlHashUnsupported
,
23 NetworkConnectionError
,
24 PreviousBuildDirError
,
27 from pip
._internal
.index
.package_finder
import PackageFinder
28 from pip
._internal
.metadata
import BaseDistribution
, get_metadata_distribution
29 from pip
._internal
.models
.direct_url
import ArchiveInfo
30 from pip
._internal
.models
.link
import Link
31 from pip
._internal
.models
.wheel
import Wheel
32 from pip
._internal
.network
.download
import BatchDownloader
, Downloader
33 from pip
._internal
.network
.lazy_wheel
import (
34 HTTPRangeRequestUnsupported
,
37 from pip
._internal
.network
.session
import PipSession
38 from pip
._internal
.operations
.build
.build_tracker
import BuildTracker
39 from pip
._internal
.req
.req_install
import InstallRequirement
40 from pip
._internal
.utils
.direct_url_helpers
import (
41 direct_url_for_editable
,
44 from pip
._internal
.utils
.hashes
import Hashes
, MissingHashes
45 from pip
._internal
.utils
.logging
import indent_log
46 from pip
._internal
.utils
.misc
import (
52 from pip
._internal
.utils
.temp_dir
import TempDirectory
53 from pip
._internal
.utils
.unpacking
import unpack_file
54 from pip
._internal
.vcs
import vcs
56 logger
= logging
.getLogger(__name__
)
59 def _get_prepared_distribution(
60 req
: InstallRequirement
,
61 build_tracker
: BuildTracker
,
62 finder
: PackageFinder
,
63 build_isolation
: bool,
64 check_build_deps
: bool,
65 ) -> BaseDistribution
:
66 """Prepare a distribution for installation."""
67 abstract_dist
= make_distribution_for_install_requirement(req
)
68 with build_tracker
.track(req
):
69 abstract_dist
.prepare_distribution_metadata(
70 finder
, build_isolation
, check_build_deps
72 return abstract_dist
.get_metadata_distribution()
75 def unpack_vcs_link(link
: Link
, location
: str, verbosity
: int) -> None:
76 vcs_backend
= vcs
.get_backend_for_scheme(link
.scheme
)
77 assert vcs_backend
is not None
78 vcs_backend
.unpack(location
, url
=hide_url(link
.url
), verbosity
=verbosity
)
82 def __init__(self
, path
: str, content_type
: Optional
[str]) -> None:
84 if content_type
is None:
85 self
.content_type
= mimetypes
.guess_type(path
)[0]
87 self
.content_type
= content_type
93 download_dir
: Optional
[str] = None,
94 hashes
: Optional
[Hashes
] = None,
96 temp_dir
= TempDirectory(kind
="unpack", globally_managed
=True)
97 # If a download dir is specified, is the file already downloaded there?
98 already_downloaded_path
= None
100 already_downloaded_path
= _check_download_dir(link
, download_dir
, hashes
)
102 if already_downloaded_path
:
103 from_path
= already_downloaded_path
106 # let's download to a tmp dir
107 from_path
, content_type
= download(link
, temp_dir
.path
)
109 hashes
.check_against_path(from_path
)
111 return File(from_path
, content_type
)
115 link
: Link
, download_dir
: Optional
[str] = None, hashes
: Optional
[Hashes
] = None
117 """Get file and optionally check its hash."""
118 # If a download dir is specified, is the file already there and valid?
119 already_downloaded_path
= None
121 already_downloaded_path
= _check_download_dir(link
, download_dir
, hashes
)
123 if already_downloaded_path
:
124 from_path
= already_downloaded_path
126 from_path
= link
.file_path
128 # If --require-hashes is off, `hashes` is either empty, the
129 # link's embedded hash, or MissingHashes; it is required to
130 # match. If --require-hashes is on, we are satisfied by any
131 # hash in `hashes` matching: a URL-based or an option-based
132 # one; no internet-sourced hash will be in `hashes`.
134 hashes
.check_against_path(from_path
)
135 return File(from_path
, None)
141 download
: Downloader
,
143 download_dir
: Optional
[str] = None,
144 hashes
: Optional
[Hashes
] = None,
146 """Unpack link into location, downloading if required.
148 :param hashes: A Hashes object, one of whose embedded hashes must match,
149 or HashMismatch will be raised. If the Hashes is empty, no matches are
150 required, and unhashable types of requirements (like VCS ones, which
151 would ordinarily raise HashUnsupported) are allowed.
153 # non-editable vcs urls
155 unpack_vcs_link(link
, location
, verbosity
=verbosity
)
158 assert not link
.is_existing_dir()
162 file = get_file_url(link
, download_dir
, hashes
=hashes
)
173 # unpack the archive to the build dir location. even when only downloading
174 # archives, they have to be unpacked to parse dependencies, except wheels
175 if not link
.is_wheel
:
176 unpack_file(file.path
, location
, file.content_type
)
181 def _check_download_dir(
184 hashes
: Optional
[Hashes
],
185 warn_on_hash_mismatch
: bool = True,
187 """Check download_dir for previously downloaded file with correct hash
188 If a correct file is found return its path else None
190 download_path
= os
.path
.join(download_dir
, link
.filename
)
192 if not os
.path
.exists(download_path
):
195 # If already downloaded, does its hash match?
196 logger
.info("File was already downloaded %s", download_path
)
199 hashes
.check_against_path(download_path
)
201 if warn_on_hash_mismatch
:
203 "Previously-downloaded file %s has bad hash. Re-downloading.",
206 os
.unlink(download_path
)
211 class RequirementPreparer
:
212 """Prepares a Requirement"""
217 download_dir
: Optional
[str],
219 build_isolation
: bool,
220 check_build_deps
: bool,
221 build_tracker
: BuildTracker
,
224 finder
: PackageFinder
,
225 require_hashes
: bool,
229 legacy_resolver
: bool,
233 self
.src_dir
= src_dir
234 self
.build_dir
= build_dir
235 self
.build_tracker
= build_tracker
236 self
._session
= session
237 self
._download
= Downloader(session
, progress_bar
)
238 self
._batch
_download
= BatchDownloader(session
, progress_bar
)
241 # Where still-packed archives should be written to. If None, they are
242 # not saved, and are deleted immediately after unpacking.
243 self
.download_dir
= download_dir
245 # Is build isolation allowed?
246 self
.build_isolation
= build_isolation
248 # Should check build dependencies?
249 self
.check_build_deps
= check_build_deps
251 # Should hash-checking be required?
252 self
.require_hashes
= require_hashes
254 # Should install in user site-packages?
255 self
.use_user_site
= use_user_site
257 # Should wheels be downloaded lazily?
258 self
.use_lazy_wheel
= lazy_wheel
260 # How verbose should underlying tooling be?
261 self
.verbosity
= verbosity
263 # Are we using the legacy resolver?
264 self
.legacy_resolver
= legacy_resolver
266 # Memoized downloaded files, as mapping of url: path.
267 self
._downloaded
: Dict
[str, str] = {}
269 # Previous "header" printed for a link-based InstallRequirement
270 self
._previous
_requirement
_header
= ("", "")
272 def _log_preparing_link(self
, req
: InstallRequirement
) -> None:
273 """Provide context for the requirement being prepared."""
274 if req
.link
.is_file
and not req
.is_wheel_from_cache
:
275 message
= "Processing %s"
276 information
= str(display_path(req
.link
.file_path
))
278 message
= "Collecting %s"
279 information
= str(req
.req
or req
)
281 # If we used req.req, inject requirement source if available (this
282 # would already be included if we used req directly)
283 if req
.req
and req
.comes_from
:
284 if isinstance(req
.comes_from
, str):
285 comes_from
: Optional
[str] = req
.comes_from
287 comes_from
= req
.comes_from
.from_path()
289 information
+= f
" (from {comes_from})"
291 if (message
, information
) != self
._previous
_requirement
_header
:
292 self
._previous
_requirement
_header
= (message
, information
)
293 logger
.info(message
, information
)
295 if req
.is_wheel_from_cache
:
297 logger
.info("Using cached %s", req
.link
.filename
)
299 def _ensure_link_req_src_dir(
300 self
, req
: InstallRequirement
, parallel_builds
: bool
302 """Ensure source_dir of a linked InstallRequirement."""
303 # Since source_dir is only set for editable requirements.
304 if req
.link
.is_wheel
:
305 # We don't need to unpack wheels, so no need for a source
308 assert req
.source_dir
is None
309 if req
.link
.is_existing_dir():
310 # build local directories in-tree
311 req
.source_dir
= req
.link
.file_path
314 # We always delete unpacked sdists after pip runs.
315 req
.ensure_has_source_dir(
318 parallel_builds
=parallel_builds
,
321 # If a checkout exists, it's unwise to keep going. version
322 # inconsistencies are logged later, but do not fail the
324 # FIXME: this won't upgrade when there's an existing
325 # package unpacked in `req.source_dir`
326 # TODO: this check is now probably dead code
327 if is_installable_dir(req
.source_dir
):
328 raise PreviousBuildDirError(
329 "pip can't proceed with requirements '{}' due to a"
330 "pre-existing build directory ({}). This is likely "
331 "due to a previous installation that failed . pip is "
332 "being responsible and not assuming it can delete this. "
333 "Please delete it and try again.".format(req
, req
.source_dir
)
336 def _get_linked_req_hashes(self
, req
: InstallRequirement
) -> Hashes
:
337 # By the time this is called, the requirement's link should have
338 # been checked so we can tell what kind of requirements req is
339 # and raise some more informative errors than otherwise.
340 # (For example, we can raise VcsHashUnsupported for a VCS URL
341 # rather than HashMissing.)
342 if not self
.require_hashes
:
343 return req
.hashes(trust_internet
=True)
345 # We could check these first 2 conditions inside unpack_url
346 # and save repetition of conditions, but then we would
347 # report less-useful error messages for unhashable
348 # requirements, complaining that there's no hash provided.
350 raise VcsHashUnsupported()
351 if req
.link
.is_existing_dir():
352 raise DirectoryUrlHashUnsupported()
354 # Unpinned packages are asking for trouble when a new version
355 # is uploaded. This isn't a security check, but it saves users
356 # a surprising hash mismatch in the future.
357 # file:/// URLs aren't pinnable, so don't complain about them
359 if not req
.is_direct
and not req
.is_pinned
:
362 # If known-good hashes are missing for this requirement,
363 # shim it with a facade object that will provoke hash
364 # computation and then raise a HashMissing exception
365 # showing the user what the hash should be.
366 return req
.hashes(trust_internet
=False) or MissingHashes()
368 def _fetch_metadata_only(
370 req
: InstallRequirement
,
371 ) -> Optional
[BaseDistribution
]:
372 if self
.legacy_resolver
:
374 "Metadata-only fetching is not used in the legacy resolver",
377 if self
.require_hashes
:
379 "Metadata-only fetching is not used as hash checking is required",
382 # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
383 return self
._fetch
_metadata
_using
_link
_data
_attr
(
385 ) or self
._fetch
_metadata
_using
_lazy
_wheel
(req
.link
)
387 def _fetch_metadata_using_link_data_attr(
389 req
: InstallRequirement
,
390 ) -> Optional
[BaseDistribution
]:
391 """Fetch metadata from the data-dist-info-metadata attribute, if possible."""
392 # (1) Get the link to the metadata file, if provided by the backend.
393 metadata_link
= req
.link
.metadata_link()
394 if metadata_link
is None:
396 assert req
.req
is not None
398 "Obtaining dependency information for %s from %s",
402 # (2) Download the contents of the METADATA file, separate from the dist itself.
403 metadata_file
= get_http_url(
406 hashes
=metadata_link
.as_hashes(),
408 with open(metadata_file
.path
, "rb") as f
:
409 metadata_contents
= f
.read()
410 # (3) Generate a dist just from those file contents.
411 metadata_dist
= get_metadata_distribution(
416 # (4) Ensure the Name: field from the METADATA file matches the name from the
417 # install requirement.
419 # NB: raw_name will fall back to the name from the install requirement if
420 # the Name: field is not present, but it's noted in the raw_name docstring
421 # that that should NEVER happen anyway.
422 if canonicalize_name(metadata_dist
.raw_name
) != canonicalize_name(req
.req
.name
):
423 raise MetadataInconsistent(
424 req
, "Name", req
.req
.name
, metadata_dist
.raw_name
428 def _fetch_metadata_using_lazy_wheel(
431 ) -> Optional
[BaseDistribution
]:
432 """Fetch metadata using lazy wheel, if possible."""
433 # --use-feature=fast-deps must be provided.
434 if not self
.use_lazy_wheel
:
436 if link
.is_file
or not link
.is_wheel
:
438 "Lazy wheel is not used as %r does not point to a remote wheel",
443 wheel
= Wheel(link
.filename
)
444 name
= canonicalize_name(wheel
.name
)
446 "Obtaining dependency information from %s %s",
450 url
= link
.url
.split("#", 1)[0]
452 return dist_from_wheel_url(name
, url
, self
._session
)
453 except HTTPRangeRequestUnsupported
:
454 logger
.debug("%s does not support range requests", url
)
457 def _complete_partial_requirements(
459 partially_downloaded_reqs
: Iterable
[InstallRequirement
],
460 parallel_builds
: bool = False,
462 """Download any requirements which were only fetched by metadata."""
463 # Download to a temporary directory. These will be copied over as
464 # needed for downstream 'download', 'wheel', and 'install' commands.
465 temp_dir
= TempDirectory(kind
="unpack", globally_managed
=True).path
467 # Map each link to the requirement that owns it. This allows us to set
468 # `req.local_file_path` on the appropriate requirement after passing
469 # all the links at once into BatchDownloader.
470 links_to_fully_download
: Dict
[Link
, InstallRequirement
] = {}
471 for req
in partially_downloaded_reqs
:
473 links_to_fully_download
[req
.link
] = req
475 batch_download
= self
._batch
_download
(
476 links_to_fully_download
.keys(),
479 for link
, (filepath
, _
) in batch_download
:
480 logger
.debug("Downloading link %s to %s", link
, filepath
)
481 req
= links_to_fully_download
[link
]
482 req
.local_file_path
= filepath
483 # TODO: This needs fixing for sdists
484 # This is an emergency fix for #11847, which reports that
485 # distributions get downloaded twice when metadata is loaded
486 # from a PEP 658 standalone metadata file. Setting _downloaded
487 # fixes this for wheels, but breaks the sdist case (tests
488 # test_download_metadata). As PyPI is currently only serving
489 # metadata for wheels, this is not an immediate issue.
490 # Fixing the problem properly looks like it will require a
491 # complete refactoring of the `prepare_linked_requirements_more`
492 # logic, and I haven't a clue where to start on that, so for now
493 # I have fixed the issue *just* for wheels.
495 self
._downloaded
[req
.link
.url
] = filepath
497 # This step is necessary to ensure all lazy wheels are processed
498 # successfully by the 'download', 'wheel', and 'install' commands.
499 for req
in partially_downloaded_reqs
:
500 self
._prepare
_linked
_requirement
(req
, parallel_builds
)
502 def prepare_linked_requirement(
503 self
, req
: InstallRequirement
, parallel_builds
: bool = False
504 ) -> BaseDistribution
:
505 """Prepare a requirement to be obtained from req.link."""
507 self
._log
_preparing
_link
(req
)
509 # Check if the relevant file is already available
510 # in the download directory
512 if self
.download_dir
is not None and req
.link
.is_wheel
:
513 hashes
= self
._get
_linked
_req
_hashes
(req
)
514 file_path
= _check_download_dir(
518 # When a locally built wheel has been found in cache, we don't warn
519 # about re-downloading when the already downloaded wheel hash does
520 # not match. This is because the hash must be checked against the
521 # original link, not the cached link. It that case the already
522 # downloaded file will be removed and re-fetched from cache (which
523 # implies a hash check against the cache entry's origin.json).
524 warn_on_hash_mismatch
=not req
.is_wheel_from_cache
,
527 if file_path
is not None:
528 # The file is already available, so mark it as downloaded
529 self
._downloaded
[req
.link
.url
] = file_path
531 # The file is not available, attempt to fetch only metadata
532 metadata_dist
= self
._fetch
_metadata
_only
(req
)
533 if metadata_dist
is not None:
534 req
.needs_more_preparation
= True
537 # None of the optimizations worked, fully prepare the requirement
538 return self
._prepare
_linked
_requirement
(req
, parallel_builds
)
540 def prepare_linked_requirements_more(
541 self
, reqs
: Iterable
[InstallRequirement
], parallel_builds
: bool = False
543 """Prepare linked requirements more, if needed."""
544 reqs
= [req
for req
in reqs
if req
.needs_more_preparation
]
546 # Determine if any of these requirements were already downloaded.
547 if self
.download_dir
is not None and req
.link
.is_wheel
:
548 hashes
= self
._get
_linked
_req
_hashes
(req
)
549 file_path
= _check_download_dir(req
.link
, self
.download_dir
, hashes
)
550 if file_path
is not None:
551 self
._downloaded
[req
.link
.url
] = file_path
552 req
.needs_more_preparation
= False
554 # Prepare requirements we found were already downloaded for some
555 # reason. The other downloads will be completed separately.
556 partially_downloaded_reqs
: List
[InstallRequirement
] = []
558 if req
.needs_more_preparation
:
559 partially_downloaded_reqs
.append(req
)
561 self
._prepare
_linked
_requirement
(req
, parallel_builds
)
563 # TODO: separate this part out from RequirementPreparer when the v1
564 # resolver can be removed!
565 self
._complete
_partial
_requirements
(
566 partially_downloaded_reqs
,
567 parallel_builds
=parallel_builds
,
570 def _prepare_linked_requirement(
571 self
, req
: InstallRequirement
, parallel_builds
: bool
572 ) -> BaseDistribution
:
576 hashes
= self
._get
_linked
_req
_hashes
(req
)
578 if hashes
and req
.is_wheel_from_cache
:
579 assert req
.download_info
is not None
582 # We need to verify hashes, and we have found the requirement in the cache
583 # of locally built wheels.
585 isinstance(req
.download_info
.info
, ArchiveInfo
)
586 and req
.download_info
.info
.hashes
587 and hashes
.has_one_of(req
.download_info
.info
.hashes
)
589 # At this point we know the requirement was built from a hashable source
590 # artifact, and we verified that the cache entry's hash of the original
591 # artifact matches one of the hashes we expect. We don't verify hashes
592 # against the cached wheel, because the wheel is not the original.
596 "The hashes of the source archive found in cache entry "
597 "don't match, ignoring cached built wheel "
598 "and re-downloading source."
600 req
.link
= req
.cached_wheel_source_link
603 self
._ensure
_link
_req
_src
_dir
(req
, parallel_builds
)
605 if link
.is_existing_dir():
607 elif link
.url
not in self
._downloaded
:
609 local_file
= unpack_url(
617 except NetworkConnectionError
as exc
:
618 raise InstallationError(
619 "Could not install requirement {} because of HTTP "
620 "error {} for URL {}".format(req
, exc
, link
)
623 file_path
= self
._downloaded
[link
.url
]
625 hashes
.check_against_path(file_path
)
626 local_file
= File(file_path
, content_type
=None)
628 # If download_info is set, we got it from the wheel cache.
629 if req
.download_info
is None:
630 # Editables don't go through this function (see
631 # prepare_editable_requirement).
632 assert not req
.editable
633 req
.download_info
= direct_url_from_link(link
, req
.source_dir
)
634 # Make sure we have a hash in download_info. If we got it as part of the
635 # URL, it will have been verified and we can rely on it. Otherwise we
636 # compute it from the downloaded file.
637 # FIXME: https://github.com/pypa/pip/issues/11943
639 isinstance(req
.download_info
.info
, ArchiveInfo
)
640 and not req
.download_info
.info
.hashes
643 hash = hash_file(local_file
.path
)[0].hexdigest()
644 # We populate info.hash for backward compatibility.
645 # This will automatically populate info.hashes.
646 req
.download_info
.info
.hash = f
"sha256={hash}"
648 # For use in later processing,
649 # preserve the file path on the requirement.
651 req
.local_file_path
= local_file
.path
653 dist
= _get_prepared_distribution(
657 self
.build_isolation
,
658 self
.check_build_deps
,
662 def save_linked_requirement(self
, req
: InstallRequirement
) -> None:
663 assert self
.download_dir
is not None
664 assert req
.link
is not None
666 if link
.is_vcs
or (link
.is_existing_dir() and req
.editable
):
667 # Make a .zip of the source_dir we already created.
668 req
.archive(self
.download_dir
)
671 if link
.is_existing_dir():
673 "Not copying link to destination directory "
674 "since it is a directory: %s",
678 if req
.local_file_path
is None:
679 # No distribution was downloaded for this requirement.
682 download_location
= os
.path
.join(self
.download_dir
, link
.filename
)
683 if not os
.path
.exists(download_location
):
684 shutil
.copy(req
.local_file_path
, download_location
)
685 download_path
= display_path(download_location
)
686 logger
.info("Saved %s", download_path
)
688 def prepare_editable_requirement(
690 req
: InstallRequirement
,
691 ) -> BaseDistribution
:
692 """Prepare an editable requirement."""
693 assert req
.editable
, "cannot prepare a non-editable req as editable"
695 logger
.info("Obtaining %s", req
)
698 if self
.require_hashes
:
699 raise InstallationError(
700 "The editable requirement {} cannot be installed when "
701 "requiring hashes, because there is no single file to "
704 req
.ensure_has_source_dir(self
.src_dir
)
705 req
.update_editable()
706 assert req
.source_dir
707 req
.download_info
= direct_url_for_editable(req
.unpacked_source_directory
)
709 dist
= _get_prepared_distribution(
713 self
.build_isolation
,
714 self
.check_build_deps
,
717 req
.check_if_exists(self
.use_user_site
)
721 def prepare_installed_requirement(
723 req
: InstallRequirement
,
725 ) -> BaseDistribution
:
726 """Prepare an already-installed requirement."""
727 assert req
.satisfied_by
, "req should have been satisfied but isn't"
728 assert skip_reason
is not None, (
729 "did not get skip reason skipped but req.satisfied_by "
730 "is set to {}".format(req
.satisfied_by
)
733 "Requirement %s: %s (%s)", skip_reason
, req
, req
.satisfied_by
.version
736 if self
.require_hashes
:
738 "Since it is already installed, we are trusting this "
739 "package without checking its hash. To ensure a "
740 "completely repeatable environment, install into an "
743 return InstalledDistribution(req
).get_metadata_distribution()