Skip to content

Commit

Permalink
reuse should_cache(req) logic
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Sep 3, 2023
1 parent 0f20f62 commit 635539b
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 111 deletions.
54 changes: 54 additions & 0 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

Expand All @@ -16,14 +17,58 @@
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
from pip._internal.vcs import vcs

logger = logging.getLogger(__name__)

_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)

ORIGIN_JSON_NAME = "origin.json"


def _contains_egg_info(s: str) -> bool:
"""Determine whether the string looks like an egg_info.
:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))


def should_cache(
req: InstallRequirement,
) -> bool:
"""
Return whether a built InstallRequirement can be stored in the persistent
wheel cache, assuming the wheel cache is available, and _should_build()
has determined a wheel needs to be built.
"""
if req.editable or not req.source_dir:
# never cache editable requirements
return False

if req.link and req.link.is_vcs:
# VCS checkout. Do not cache
# unless it points to an immutable commit hash.
assert not req.editable
assert req.source_dir
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
assert vcs_backend
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
return True
return False

assert req.link
base, ext = req.link.splitext()
if _contains_egg_info(base):
return True

# Otherwise, do not cache.
return False


def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
Expand Down Expand Up @@ -244,6 +289,15 @@ def get_path_for_link(self, link: Link) -> str:
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)

def resolve_cache_dir(self, req: InstallRequirement) -> str:
"""Return the persistent or temporary cache directory where the built or
downloaded wheel should be stored."""
cache_available = bool(self.cache_dir)
assert req.link, req
if cache_available and should_cache(req):
return self.get_path_for_link(req.link)
return self.get_ephem_path_for_link(req.link)

def get(
self,
link: Link,
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/network/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:


def _http_get_download(session: PipSession, link: Link) -> Response:
target_url = link.url.split("#", 1)[0]
target_url = link.url_without_fragment
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
return resp
Expand Down
44 changes: 18 additions & 26 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
from typing import Any, Dict, Iterable, List, Optional

from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.requests.exceptions import InvalidSchema

from pip._internal.cache import LinkMetadataCache
from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.exceptions import (
DirectoryUrlHashUnsupported,
Expand Down Expand Up @@ -416,7 +415,7 @@ def _fetch_metadata_only(
)
return None

cached_metadata = self._fetch_cached_metadata(req.link)
cached_metadata = self._fetch_cached_metadata(req)
if cached_metadata is not None:
return cached_metadata

Expand All @@ -425,29 +424,30 @@ def _fetch_metadata_only(
) or self._fetch_metadata_using_lazy_wheel(req.link)
# Populate the metadata cache.
if computed_metadata is not None:
self._cache_metadata(req.link, computed_metadata)
self._cache_metadata(req, computed_metadata)
return computed_metadata

def _fetch_cached_metadata(
self,
link: Link,
self, req: InstallRequirement
) -> Optional[BaseDistribution]:
if self._metadata_cache is None:
return None
if not should_cache(req):
return None
try:
cached_path = self._metadata_cache.cache_path(link)
cached_path = self._metadata_cache.cache_path(req.link)
os.makedirs(str(cached_path.parent), exist_ok=True)
with cached_path.open("rb") as f:
logger.debug(
"found cached metadata for link %s at %s", link.url, f.name
"found cached metadata for link %s at %s", req.link, f.name
)
args = json.load(f)
cached_dist = CacheableDist.from_json(args)
return cached_dist.to_dist()
except (OSError, json.JSONDecodeError, KeyError) as e:
logger.debug(
"no cached metadata for link %s at %s %s(%s)",
link.url,
req.link,
cached_path,
e.__class__.__name__,
str(e),
Expand All @@ -456,24 +456,26 @@ def _fetch_cached_metadata(

def _cache_metadata(
self,
link: Link,
req: InstallRequirement,
metadata_dist: BaseDistribution,
) -> None:
if self._metadata_cache is None:
return
if not should_cache(req):
return
try:
cached_path = self._metadata_cache.cache_path(link)
cached_path = self._metadata_cache.cache_path(req.link)
os.makedirs(str(cached_path.parent), exist_ok=True)
with cached_path.open("w") as f:
cacheable_dist = CacheableDist.from_dist(link, metadata_dist)
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", link.url, f.name)
logger.debug("caching metadata for link %s at %s", req.link, f.name)
json.dump(args, f)
except (OSError, email.errors.HeaderParseError) as e:
logger.debug(
"could not cache metadata for dist %s from %s: %s(%s)",
metadata_dist,
link,
req.link,
e.__class__.__name__,
str(e),
)
Expand Down Expand Up @@ -564,17 +566,7 @@ def _complete_partial_requirements(
links_to_fully_download: Dict[Link, InstallRequirement] = {}
for req in partially_downloaded_reqs:
assert req.link
# If this is e.g. a git url, we don't know how to handle that in the
# BatchDownloader, so leave it for self._prepare_linked_requirement() at the
# end of this method, which knows how to handle any URL.
can_simply_download = True
try:
# This will raise InvalidSchema if our Session can't download it.
self._session.get_adapter(req.link.url)
except InvalidSchema:
can_simply_download = False
if can_simply_download:
links_to_fully_download[req.link] = req
links_to_fully_download[req.link] = req

batch_download = self._batch_download(
links_to_fully_download.keys(),
Expand Down Expand Up @@ -642,7 +634,7 @@ def prepare_linked_requirement(
# None of the optimizations worked, fully prepare the requirement
result = self._prepare_linked_requirement(req, parallel_builds)
# Cache the metadata for later.
self._cache_metadata(req.link, result)
self._cache_metadata(req, result)
return result

def _ensure_download_info(self, reqs: Iterable[InstallRequirement]) -> None:
Expand Down
62 changes: 1 addition & 61 deletions src/pip/_internal/wheel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import logging
import os.path
import re
import shutil
from typing import Iterable, List, Optional, Tuple

Expand All @@ -25,23 +24,12 @@
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.urls import path_to_url
from pip._internal.vcs import vcs

logger = logging.getLogger(__name__)

_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)

BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]]


def _contains_egg_info(s: str) -> bool:
"""Determine whether the string looks like an egg_info.
:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))


def _should_build(
req: InstallRequirement,
need_wheel: bool,
Expand Down Expand Up @@ -87,54 +75,6 @@ def should_build_for_install_command(
return _should_build(req, need_wheel=False)


def _should_cache(
req: InstallRequirement,
) -> Optional[bool]:
"""
Return whether a built InstallRequirement can be stored in the persistent
wheel cache, assuming the wheel cache is available, and _should_build()
has determined a wheel needs to be built.
"""
if req.editable or not req.source_dir:
# never cache editable requirements
return False

if req.link and req.link.is_vcs:
# VCS checkout. Do not cache
# unless it points to an immutable commit hash.
assert not req.editable
assert req.source_dir
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
assert vcs_backend
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
return True
return False

assert req.link
base, ext = req.link.splitext()
if _contains_egg_info(base):
return True

# Otherwise, do not cache.
return False


def _get_cache_dir(
req: InstallRequirement,
wheel_cache: WheelCache,
) -> str:
"""Return the persistent or temporary cache directory where the built
wheel need to be stored.
"""
cache_available = bool(wheel_cache.cache_dir)
assert req.link
if cache_available and _should_cache(req):
cache_dir = wheel_cache.get_path_for_link(req.link)
else:
cache_dir = wheel_cache.get_ephem_path_for_link(req.link)
return cache_dir


def _verify_one(req: InstallRequirement, wheel_path: str) -> None:
canonical_name = canonicalize_name(req.name or "")
w = Wheel(os.path.basename(wheel_path))
Expand Down Expand Up @@ -316,7 +256,7 @@ def build(
build_successes, build_failures = [], []
for req in requirements:
assert req.name
cache_dir = _get_cache_dir(req, wheel_cache)
cache_dir = wheel_cache.resolve_cache_dir(req)
wheel_file = _build_one(
req,
cache_dir,
Expand Down
21 changes: 20 additions & 1 deletion tests/unit/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
import os
from pathlib import Path

import pytest
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version

from pip._internal.cache import WheelCache, _hash_dict
from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict
from pip._internal.models.link import Link
from pip._internal.utils.misc import ensure_dir


@pytest.mark.parametrize(
"s, expected",
[
# Trivial.
("pip-18.0", True),
# Ambiguous.
("foo-2-2", True),
("im-valid", True),
# Invalid.
("invalid", False),
("im_invalid", False),
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
result = _contains_egg_info(s)
assert result == expected


def test_falsey_path_none() -> None:
wc = WheelCache("")
assert wc.cache_dir is None
Expand Down
26 changes: 4 additions & 22 deletions tests/unit/test_wheel_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,14 @@

import pytest

from pip._internal import wheel_builder
from pip._internal import cache, wheel_builder
from pip._internal.models.link import Link
from pip._internal.operations.build.wheel_legacy import format_command_result
from pip._internal.req.req_install import InstallRequirement
from pip._internal.vcs.git import Git
from tests.lib import _create_test_package


@pytest.mark.parametrize(
"s, expected",
[
# Trivial.
("pip-18.0", True),
# Ambiguous.
("foo-2-2", True),
("im-valid", True),
# Invalid.
("invalid", False),
("im_invalid", False),
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
result = wheel_builder._contains_egg_info(s)
assert result == expected


class ReqMock:
def __init__(
self,
Expand Down Expand Up @@ -128,7 +110,7 @@ def test_should_build_for_wheel_command(req: ReqMock, expected: bool) -> None:
],
)
def test_should_cache(req: ReqMock, expected: bool) -> None:
assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected
assert cache.should_cache(cast(InstallRequirement, req)) is expected


def test_should_cache_git_sha(tmpdir: Path) -> None:
Expand All @@ -138,12 +120,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None:
# a link referencing a sha should be cached
url = "git+https://g.c/o/r@" + commit + "#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
assert wheel_builder._should_cache(cast(InstallRequirement, req))
assert cache.should_cache(cast(InstallRequirement, req))

# a link not referencing a sha should not be cached
url = "git+https://g.c/o/r@master#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
assert not wheel_builder._should_cache(cast(InstallRequirement, req))
assert not cache.should_cache(cast(InstallRequirement, req))


def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None:
Expand Down

0 comments on commit 635539b

Please sign in to comment.