Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

yarn: SBOM components #739

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cachi2/core/checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def to_sri(self) -> str:
base64_sha = base64.b64encode(bytes_sha).decode("utf-8")
return f"{self.algorithm}-{base64_sha}"

def __str__(self) -> str:
return f"{self.algorithm}:{self.hexdigest}"

@classmethod
def from_sri(cls, sri: str) -> "ChecksumInfo":
"""Convert the input Subresource Integrity value to ChecksumInfo."""
Expand Down
4 changes: 2 additions & 2 deletions cachi2/core/package_managers/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ def get_purl(
else: # dep_type == "https"
qualifiers = {"download_url": resolved_url}
if integrity:
algorithm, digest = ChecksumInfo.from_sri(integrity)
qualifiers["checksum"] = f"{algorithm}:{digest}"
checksum = ChecksumInfo.from_sri(integrity)
qualifiers["checksum"] = str(checksum)

return PackageURL(
type="npm",
Expand Down
30 changes: 26 additions & 4 deletions cachi2/core/package_managers/yarn_classic/main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import logging
from typing import Iterable

from cachi2.core.errors import PackageManagerError, PackageRejected
from cachi2.core.models.input import Request
from cachi2.core.models.output import Component, EnvironmentVariable, RequestOutput
from cachi2.core.models.property_semantics import PropertySet
from cachi2.core.package_managers.yarn.utils import (
VersionsRange,
extract_yarn_version_from_env,
run_yarn_cmd,
)
from cachi2.core.package_managers.yarn_classic.project import Project
from cachi2.core.package_managers.yarn_classic.resolver import resolve_packages
from cachi2.core.package_managers.yarn_classic.resolver import YarnClassicPackage, resolve_packages
from cachi2.core.rooted_path import RootedPath

log = logging.getLogger(__name__)
Expand All @@ -28,22 +30,42 @@ def _ensure_mirror_dir_exists(output_dir: RootedPath) -> None:
for package in request.yarn_classic_packages:
package_path = request.source_dir.join_within_root(package.path)
_ensure_mirror_dir_exists(request.output_dir)
_resolve_yarn_project(Project.from_source_dir(package_path), request.output_dir)
components.extend(
_resolve_yarn_project(Project.from_source_dir(package_path), request.output_dir)
)

return RequestOutput.from_obj_list(
components, _generate_build_environment_variables(), project_files=[]
)


def _resolve_yarn_project(project: Project, output_dir: RootedPath) -> None:
def _resolve_yarn_project(project: Project, output_dir: RootedPath) -> list[Component]:
"""Process a request for a single yarn source directory."""
log.info(f"Fetching the yarn dependencies at the subpath {project.source_dir}")

_verify_repository(project)
prefetch_env = _get_prefetch_environment_variables(output_dir)
_verify_corepack_yarn_version(project.source_dir, prefetch_env)
_fetch_dependencies(project.source_dir, prefetch_env)
resolve_packages(project)
packages = resolve_packages(project)
return _create_sbom_component(packages)


def _create_sbom_component(packages: Iterable[YarnClassicPackage]) -> list[Component]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor naming nitpick: it creates multiple components, so better be named _create_sbom_components. The docstring seems to agree with me on this one.

Another controversial naming scheme to consider is this: _sbom_components_created_from.
Then in the invocation spot you'll have:

return _sbom_components_created_from(packages)

which is mostly a regular English sentence (except for weird punctuation).
I am absolutely not insisting on the second one, it is just a readability idea.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be _create_sbom_components (probably just a typo).

"""Create SBOM components from the given yarn packages."""
result = []
for package in packages:
properties = PropertySet(npm_development=package.dev).to_properties()
result.append(
Component(
name=package.name,
purl=package.purl,
version=package.version,
properties=properties,
)
)

return result


def _fetch_dependencies(source_dir: RootedPath, env: dict[str, str]) -> None:
Expand Down
79 changes: 79 additions & 0 deletions cachi2/core/package_managers/yarn_classic/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from typing import Iterable, Optional, Union
from urllib.parse import urlparse

from packageurl import PackageURL
from pyarn.lockfile import Package as PYarnPackage
from pydantic import BaseModel

from cachi2.core.checksum import ChecksumInfo
from cachi2.core.errors import PackageRejected, UnexpectedFormat
from cachi2.core.package_managers.npm import NPM_REGISTRY_CNAMES
from cachi2.core.package_managers.yarn_classic.project import PackageJson, Project, YarnLock
Expand All @@ -15,6 +17,7 @@
extract_workspace_metadata,
)
from cachi2.core.rooted_path import RootedPath
from cachi2.core.scm import RepoID

# https://github.com/yarnpkg/yarn/blob/7cafa512a777048ce0b666080a24e80aae3d66a9/src/resolvers/exotics/git-resolver.js#L15-L17
GIT_HOSTS = frozenset(("github.com", "gitlab.com", "bitbucket.com", "bitbucket.org"))
Expand All @@ -26,6 +29,9 @@
re.compile(r"^https?:.+\.git#.+"),
)

DEFAULT_NPM_REGISTRY = "https://registry.npmjs.org"
ALTERNATIVE_NPM_REGISTRY = "https://registry.yarnpkg.com"


class _BasePackage(BaseModel):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we make this inherit from ABC and have purl as an abstract method?

"""A base Yarn 1.x package."""
Expand All @@ -47,26 +53,99 @@ class _RelpathMixin(BaseModel):
class RegistryPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from the registry."""

@property
def purl(self) -> str:
"""Return package URL."""
qualifiers = {}

if self.url != DEFAULT_NPM_REGISTRY:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean that self.url can be anything? If not then could it be constrained to a union of literals?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do have a check for that:
https://github.com/containerbuildsystem/cachi2/blob/main/cachi2/core/package_managers/yarn_classic/resolver.py#L184

IOW, if a package is RegistryPackage it is one of those npm registries.

qualifiers = {"repository_url": ALTERNATIVE_NPM_REGISTRY}

if self.integrity:
checksum = ChecksumInfo.from_sri(self.integrity)
qualifiers["checksum"] = str(checksum)

return PackageURL(
type="npm",
name=self.name,
version=self.version,
qualifiers=qualifiers,
).to_string()


class GitPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from a git repo."""

@property
def purl(self) -> str:
"""Return package URL."""
parsed_url = urlparse(self.url)
repo_id = RepoID(origin_url=self.url, commit_id=parsed_url.fragment)
qualifiers = {"vcs_url": repo_id.as_vcs_url_qualifier()}
return PackageURL(
type="npm",
name=self.name,
version=self.version,
qualifiers=qualifiers,
).to_string()


class UrlPackage(_BasePackage, _UrlMixin):
"""A Yarn 1.x package from a http/https URL."""

@property
def purl(self) -> str:
"""Return package URL."""
qualifiers = {"download_url": self.url}
return PackageURL(
type="npm",
name=self.name,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For some of the non-registry packages, we may want to read their respective package.json files from the cached tarballs to get their true names. As a user, I can call these non-registry dependencies whatever I want in my project's package.json and it could be completely different from what it really is:

"dependencies": {
  "not-fecha": "https://github.com/taylorhakes/fecha.git"
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the filename in the cache come from package.json or the URL ?

version=self.version,
qualifiers=qualifiers,
).to_string()


class FilePackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x package from a local file path."""

@property
def purl(self) -> str:
"""Return package URL."""
return PackageURL(
type="npm",
name=self.name,
version=self.version,
subpath=str(self.relpath),
).to_string()


class WorkspacePackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x local workspace package."""

@property
def purl(self) -> str:
"""Return package URL."""
return PackageURL(
type="npm",
name=self.name,
version=self.version,
subpath=str(self.relpath),
).to_string()


class LinkPackage(_BasePackage, _RelpathMixin):
"""A Yarn 1.x local link package."""

@property
def purl(self) -> str:
"""Return package URL."""
return PackageURL(
type="npm",
name=self.name,
version=self.version,
subpath=str(self.relpath),
).to_string()


YarnClassicPackage = Union[
FilePackage,
Expand Down
Loading
Loading