Skip to content

Commit

Permalink
Handle cargo workspaces in git repositories
Browse files Browse the repository at this point in the history
Crates downloaded from crates.io can just be put into the
vendored-sources directory.
However a referenced git repository might contain multiple crates in a
so-called "workspace".
If we put such a workspace into the vendor folder, cargo fails with
> found a virtual manifest at [...]Cargo.toml instead of a package manifest

This solution uses a separate folder for sources downloaded from git
repositories and checks them for workspaces.
If they are "regular", i.e. don't have a workspace, they are put to the
vendored sources from crates.io.
Otherwise they are left in the git-vendor directory.

Additionally refine the config.toml entry for sources vendored from git.
This allows to override a specific git URL and revision with the path to
the extracted repository.
Otherwise we would need entries for each crate in workspaces but then
cargo tries to verify the revision and fails with
> Unable to update https://github.com/[...]?rev=[...]
> Caused by: can't checkout from 'https://github.com/[...]': you are in the offline mode (--offline)
  • Loading branch information
Flamefire committed Oct 14, 2024
1 parent 3cd59c8 commit f00d1ce
Showing 1 changed file with 158 additions and 46 deletions.
204 changes: 158 additions & 46 deletions easybuild/easyblocks/generic/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
@author: Mikael Oehman (Chalmers University of Technology)
@author: Alex Domingo (Vrije Universiteit Brussel)
@author: Alexander Grund (TU Dresden)
"""

import os
Expand All @@ -37,10 +38,10 @@
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.framework.easyconfig import CUSTOM
from easybuild.framework.extensioneasyblock import ExtensionEasyBlock
from easybuild.tools.filetools import extract_file, change_dir
from easybuild.tools.filetools import extract_file
from easybuild.tools.run import run_cmd
from easybuild.tools.config import build_option
from easybuild.tools.filetools import compute_checksum, mkdir, write_file
from easybuild.tools.filetools import compute_checksum, mkdir, move_file, read_file, write_file, CHECKSUM_TYPE_SHA256
from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC

CRATESIO_SOURCE = "https://crates.io/api/v1/crates"
Expand All @@ -54,14 +55,74 @@
"""

CONFIG_TOML_PATCH_GIT = """
[patch."{repo}"]
{crates}
CONFIG_TOML_SOURCE_GIT = """
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "vendored-sources"
"""
CONFIG_TOML_PATCH_GIT_CRATES = """{0} = {{ path = "{1}" }}

CONFIG_TOML_SOURCE_GIT_WORKSPACE = """
[source."real-{url}?rev={rev}"]
directory = "{workspace_dir}"
[source."{url}?rev={rev}"]
git = "{url}"
rev = "{rev}"
replace-with = "real-{url}?rev={rev}"
"""

CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{chksum}"}}'
CARGO_CHECKSUM_JSON = '{{"files": {{}}, "package": "{checksum}"}}'


def get_workspace_members(crate_dir):
"""Find all members of a cargo workspace in crate_dir.
(Minimally) parse the Cargo.toml file.
If it is a workspace return all members (subfolder names).
Otherwise return None.
"""
cargo_toml = os.path.join(crate_dir, 'Cargo.toml')

# We are looking for this:
# [workspace]
# members = [
# "reqwest-middleware",
# "reqwest-tracing",
# "reqwest-retry",
# ]

lines = [line.strip() for line in read_file(cargo_toml).splitlines()]
try:
start_idx = lines.index('[workspace]')
except ValueError:
return None
# Find "members = [" and concatenate the value, stop at end of section or file
member_str = None
for line in lines[start_idx + 1:]:
if line.startswith('#'):
continue # Skip comments
if re.match(r'\[\w+\]', line):
break
if member_str is None:
m = re.match(r'members\s+=\s+\[', line)
if m:
member_str = line[m.end():]
elif line.endswith(']'):
member_str += line[:-1].strip()
break
else:
member_str += line
# Split at commas after removing possibly trailing ones and remove the quotes
members = [member.strip().strip('"') for member in member_str.rstrip(',').split(',')]
# Sanity check that we didn't pick up anything unexpected
invalid_members = [member for member in members if not re.match(r'(\w|-)+', member)]
if invalid_members:
raise EasyBuildError('Failed to parse %s: Found seemingly invalid members: %s',
cargo_toml, ', '.join(invalid_members))
return [os.path.join(crate_dir, m) for m in members]


class Cargo(ExtensionEasyBlock):
Expand Down Expand Up @@ -122,7 +183,6 @@ def __init__(self, *args, **kwargs):
"""Constructor for Cargo easyblock."""
super(Cargo, self).__init__(*args, **kwargs)
self.cargo_home = os.path.join(self.builddir, '.cargo')
self.vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
env.setvar('CARGO_HOME', self.cargo_home)
env.setvar('RUSTC', 'rustc')
env.setvar('RUSTDOC', 'rustdoc')
Expand Down Expand Up @@ -165,67 +225,119 @@ def extract_step(self):
"""
Unpack the source files and populate them with required .cargo-checksum.json if offline
"""
mkdir(self.vendor_dir)
vendor_dir = os.path.join(self.builddir, 'easybuild_vendor')
mkdir(vendor_dir)
# Sources from git repositories might contain multiple crates/folders in a so-called "workspace".
# If we put such a workspace into the vendor folder, cargo fails with
# "found a virtual manifest at [...]Cargo.toml instead of a package manifest".
# Hence we put those in a separate folder and only move "regular" crates into the vendor folder.
git_vendor_dir = os.path.join(self.builddir, 'easybuild_vendor_git')
mkdir(git_vendor_dir)

vendor_crates = {self.crate_src_filename(*crate): crate for crate in self.crates}
git_sources = {crate[2]: [] for crate in self.crates if len(crate) == 4}
# Track git sources for building the cargo config and avoiding duplicated folders
git_sources = {}

for src in self.src:
extraction_dir = self.builddir
# Check for git crates, `git_key` will be set to a true-ish value for those
try:
crate_name, _, git_repo, rev = vendor_crates[src['name']]
except (ValueError, KeyError):
git_key = None
else:
git_key = (git_repo, rev)
self.log.debug("Sources of %s(%s) belong to git repo: %s rev %s",
crate_name, src['name'], git_repo, rev)
# Do a sanity check that sources for the same repo and revision are the same
try:
previous_source = git_sources[git_key]
except KeyError:
git_sources[git_key] = src
else:
previous_checksum = previous_source['checksum']
current_checksum = src['checksum']
if previous_checksum and current_checksum and previous_checksum != current_checksum:
raise EasyBuildError("Sources for the same git repository need to be identical."
"Mismatch found for %s rev %s in %s vs %s",
git_repo, rev, previous_source['name'], src['name'])
self.log.info("Source %s already extracted to %s by %s. Skipping extraction.",
src['name'], previous_source['finalpath'], previous_source['name'])
src['finalpath'] = previous_source['finalpath']
continue

is_vendor_crate = src['name'] in vendor_crates
# Extract dependency crates into vendor subdirectory, separate from sources of main package
if src['name'] in vendor_crates:
extraction_dir = self.vendor_dir
if is_vendor_crate:
extraction_dir = git_vendor_dir if git_key else vendor_dir
else:
extraction_dir = self.builddir

self.log.info("Unpacking source of %s", src['name'])
existing_dirs = set(os.listdir(extraction_dir))
crate_dir = None
src_dir = extract_file(src['path'], extraction_dir, cmd=src['cmd'],
extra_options=self.cfg['unpack_options'], change_into_dir=False)
new_extracted_dirs = set(os.listdir(extraction_dir)) - existing_dirs

if len(new_extracted_dirs) == 1:
# Expected crate tarball with 1 folder
crate_dir = new_extracted_dirs.pop()
src_dir = os.path.join(extraction_dir, crate_dir)
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)
elif len(new_extracted_dirs) == 0:
if len(new_extracted_dirs) == 0:
# Extraction went wrong
raise EasyBuildError("Unpacking sources of '%s' failed", src['name'])
# Expected crate tarball with 1 folder
# TODO: properly handle case with multiple extracted folders
# this is currently in a grey area, might still be used by cargo
if len(new_extracted_dirs) == 1:
src_dir = os.path.join(extraction_dir, new_extracted_dirs.pop())
self.log.debug("Unpacked sources of %s into: %s", src['name'], src_dir)

change_dir(src_dir)
self.src[self.src.index(src)]['finalpath'] = src_dir

if self.cfg['offline'] and crate_dir:
# Create checksum file for extracted sources required by vendored crates.io sources
self.log.info('creating .cargo-checksums.json file for : %s', crate_dir)
chksum = compute_checksum(src['path'], checksum_type='sha256')
chkfile = os.path.join(extraction_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(chksum=chksum))
# Add path to extracted sources for any crate from a git repo
try:
crate_name, _, crate_repo, _ = vendor_crates[src['name']]
except (ValueError, KeyError):
pass
else:
self.log.debug("Sources of %s belong to git repo: %s", src['name'], crate_repo)
git_src_dir = (crate_name, src_dir)
git_sources[crate_repo].append(git_src_dir)
if is_vendor_crate and self.cfg['offline']:
# Create checksum file for extracted sources required by vendored crates

# By default there is only a single crate
crate_dirs = [src_dir]
# For git sources determine the folders that contain crates by taking workspaces into account
if git_key:
member_dirs = get_workspace_members(src_dir)
if member_dirs:
crate_dirs = member_dirs

try:
checksum = src[CHECKSUM_TYPE_SHA256]
except KeyError:
checksum = compute_checksum(src['path'], checksum_type=CHECKSUM_TYPE_SHA256)
for crate_dir in crate_dirs:
self.log.info('creating .cargo-checksums.json file for %s', os.path.basename(crate_dir))
chkfile = os.path.join(src_dir, crate_dir, '.cargo-checksum.json')
write_file(chkfile, CARGO_CHECKSUM_JSON.format(checksum=checksum))
# Move non-workspace git crates to the vendor folder
if git_key and member_dirs is None:
src_dir = os.path.join(vendor_dir, os.path.basename(crate_dirs[0]))
move_file(crate_dirs[0], src_dir)

src['finalpath'] = src_dir

if self.cfg['offline']:
self.log.info("Setting vendored crates dir for offline operation")
config_toml = os.path.join(self.cargo_home, 'config.toml')
# Replace crates-io with vendored sources using build dir wide toml file in CARGO_HOME
# because the rust source subdirectories might differ with python packages
self.log.debug("Writting config.toml entry for vendored crates from crate.io")
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=self.vendor_dir), append=True)

# also vendor sources from other git sources (could be many crates for one git source)
for git_repo, repo_crates in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s", git_repo)
config_crates = ''.join([CONFIG_TOML_PATCH_GIT_CRATES.format(*crate) for crate in repo_crates])
write_file(config_toml, CONFIG_TOML_PATCH_GIT.format(repo=git_repo, crates=config_crates), append=True)
write_file(config_toml, CONFIG_TOML_SOURCE_VENDOR.format(vendor_dir=vendor_dir), append=True)

# Tell cargo about the vendored git sources to avoid it failing with:
# Unable to update https://github.com/[...]
# can't checkout from 'https://github.com/[...]]': you are in the offline mode (--offline)
for (git_repo, rev), src in git_sources.items():
self.log.debug("Writting config.toml entry for git repo: %s rev %s", git_repo, rev)
src_dir = src['finalpath']
if os.path.dirname(src_dir) == vendor_dir:
# Non-workspace sources are in vendor_dir
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT.format(url=git_repo, rev=rev),
append=True)
else:
# Workspace sources stay in their own separate folder.
# We cannot have a `directory = "<dir>"` entry where a folder containing a workspace is inside
write_file(config_toml,
CONFIG_TOML_SOURCE_GIT_WORKSPACE.format(url=git_repo, rev=rev, workspace_dir=src_dir),
append=True)

# Use environment variable since it would also be passed along to builds triggered via python packages
env.setvar('CARGO_NET_OFFLINE', 'true')
Expand Down

0 comments on commit f00d1ce

Please sign in to comment.