-
-
Notifications
You must be signed in to change notification settings - Fork 288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Export a lockfile in an ad-hoc JSON graph format. #2047
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,10 @@ | |
|
||
from __future__ import absolute_import, print_function | ||
|
||
import json | ||
import sys | ||
from argparse import Action, ArgumentError, ArgumentParser, ArgumentTypeError, _ActionsContainer | ||
from collections import OrderedDict | ||
from collections import OrderedDict, defaultdict | ||
from operator import attrgetter | ||
|
||
from pex import pex_warnings | ||
|
@@ -65,6 +66,7 @@ class Value(Enum.Value): | |
pass | ||
|
||
PIP = Value("pip") | ||
GRAPH = Value("graph") | ||
PEP_665 = Value("pep-665") | ||
|
||
|
||
|
@@ -237,8 +239,12 @@ def _add_export_arguments(cls, export_parser): | |
choices=ExportFormat.values(), | ||
type=ExportFormat.for_value, | ||
help=( | ||
"The format to export the lock to. Currently only the {pip!r} requirements file " | ||
"format using `--hash` is supported.".format(pip=ExportFormat.PIP) | ||
"The format to export the lock to. Export results may be a subset of the full " | ||
"lockfile, targeting a specific interpreter and platform. Currently only " | ||
"`{pip!r}` - a Pip requirements file format using `--hash`, and `{graph!r} - " | ||
"an ad-hoc adjacency graph format, are supported.".format( | ||
pip=ExportFormat.PIP, graph=ExportFormat.GRAPH | ||
) | ||
), | ||
) | ||
export_parser.add_argument( | ||
|
@@ -492,20 +498,22 @@ def dump_with_terminating_newline(out): | |
|
||
def _export(self): | ||
# type: () -> Result | ||
if self.options.format != ExportFormat.PIP: | ||
if self.options.format not in [ExportFormat.PIP, ExportFormat.GRAPH]: | ||
return Error( | ||
"Only the {pip!r} lock format is supported currently.".format(pip=ExportFormat.PIP) | ||
"Only the `{pip!r}` and `{graph!r}` formats are supported currently.".format( | ||
pip=ExportFormat.PIP, graph=ExportFormat.GRAPH | ||
) | ||
) | ||
|
||
lockfile_path, lock_file = self._load_lockfile() | ||
targets = target_options.configure(self.options).resolve_targets() | ||
resolved_targets = targets.unique_targets() | ||
if len(resolved_targets) > 1: | ||
return Error( | ||
"A lock can only be exported for a single target in the {pip!r} format.\n" | ||
"A lock can only be exported for a single target in the {export_fmt!r} format.\n" | ||
"There were {count} targets selected:\n" | ||
"{targets}".format( | ||
pip=ExportFormat.PIP, | ||
export_fmt=self.options.format, | ||
count=len(resolved_targets), | ||
targets="\n".join( | ||
"{index}. {target}".format(index=index, target=target) | ||
|
@@ -535,13 +543,13 @@ def _export(self): | |
resolved_subset.resolved for resolved_subset in subset_result.subsets | ||
) | ||
pex_warnings.warn( | ||
"Only a single lock can be exported in the {pip!r} format.\n" | ||
"Only a single lock can be exported in the {export_fmt!r} format.\n" | ||
"There were {count} locks stored in {lockfile} that were applicable for the " | ||
"selected target: {target}; so using the most specific lock with platform " | ||
"{platform}.".format( | ||
count=len(subset_result.subsets), | ||
lockfile=lockfile_path, | ||
pip=ExportFormat.PIP, | ||
export_fmt=self.options.format, | ||
target=target, | ||
platform=resolved.source.platform_tag, | ||
) | ||
|
@@ -555,25 +563,41 @@ def _export(self): | |
downloaded_artifact.artifact.fingerprint | ||
) | ||
|
||
with self.output(self.options) as output: | ||
pins = fingerprints_by_pin.keys() # type: Iterable[Pin] | ||
if self.options.sort_by == ExportSortBy.PROJECT_NAME: | ||
pins = sorted(pins, key=attrgetter("project_name.normalized")) | ||
for pin in pins: | ||
fingerprints = fingerprints_by_pin[pin] | ||
output.write( | ||
"{project_name}=={version} \\\n" | ||
" {hashes}\n".format( | ||
project_name=pin.project_name, | ||
version=pin.version.raw, | ||
hashes=" \\\n ".join( | ||
"--hash={algorithm}:{hash}".format( | ||
algorithm=fingerprint.algorithm, hash=fingerprint.hash | ||
) | ||
for fingerprint in fingerprints | ||
), | ||
if self.options.format == ExportFormat.PIP: | ||
with self.output(self.options) as output: | ||
pins = fingerprints_by_pin.keys() # type: Iterable[Pin] | ||
if self.options.sort_by == ExportSortBy.PROJECT_NAME: | ||
pins = sorted(pins, key=attrgetter("project_name.normalized")) | ||
for pin in pins: | ||
fingerprints = fingerprints_by_pin[pin] | ||
output.write( | ||
"{project_name}=={version} \\\n" | ||
" {hashes}\n".format( | ||
project_name=pin.project_name, | ||
version=pin.version.raw, | ||
hashes=" \\\n ".join( | ||
"--hash={algorithm}:{hash}".format( | ||
algorithm=fingerprint.algorithm, hash=fingerprint.hash | ||
) | ||
for fingerprint in fingerprints | ||
), | ||
) | ||
) | ||
) | ||
else: | ||
vertices = set() | ||
edges = defaultdict(list) | ||
for src_pin, dst_pins in resolved.adjacency_list.items(): | ||
dst_pin_strs = sorted(str(dst_pin) for dst_pin in dst_pins) | ||
vertices.add(str(src_pin)) | ||
vertices.update(dst_pin_strs) | ||
edges[str(src_pin)] = dst_pin_strs | ||
graph = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The single target selected above on 508-510 is controlled by command line flags like --python, --python-path, --interpreter-constraint, --platform and --complete-platform, but none of these need be present either. It seems like some node to store information about the rendered graph context would make sense; i.e.: for a universal lock, this would give the context for environment marker activation / deactivation assuming you include activation information in the graph and for a multi-lock lock file, it also indicates which lock is being talked about (say the CPython 3.7 locked resolve from a lock file containing 3 locks, 1 for 3.7, 1 for 3.8 and 1 for 3.9. |
||
"vertices": sorted(vertices), | ||
"edges": edges, | ||
} | ||
with self.output(self.options) as output: | ||
json.dump(graph, output) | ||
|
||
return Ok() | ||
|
||
def _update(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -345,6 +345,7 @@ def create( | |
target, # type: Target | ||
direct_requirements, # type: Iterable[Requirement] | ||
resolved_artifacts, # type: Iterable[_ResolvedArtifact] | ||
adjacency_list, # type: Dict[Pin, Set[Pin]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A LockedResolve already has all the information needed. There should be no need to add graph rendering-specific data or methods to it. I think this is just a sign a (stripped-down) model for exported graph data is needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is the case, we could derive the graph entirely in Pants, no? But it seemed to me (naively?) that this logic had to be here because request_resolve() is where we know who actually requires who, in the context of a target. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yup. Just be ready to break / fix when Pex changes its private, undocumented format.
I think naively - maybe. So how will Pants use the graph info? If the lock is universal, which it is in Pants case, will I think Pants really ought to have what it wants to do sorted. The sorting can probably best be done today using the lock file json directly. Then, once sorted, a solid idea of the requirements for a stable PEX graph export format will be in hand and we can make sure it works for the not-pants cases, like non-universal locks with multiple resolves contained within and not just 1 universal resolve, etc. |
||
source, # type: LockedResolve | ||
): | ||
# type: (...) -> Resolved | ||
|
@@ -382,6 +383,7 @@ def create( | |
return cls( | ||
target_specificity=sum(target_specificities) / len(target_specificities), | ||
downloadable_artifacts=tuple(downloadable_artifacts), | ||
adjacency_list=adjacency_list, | ||
source=source, | ||
) | ||
|
||
|
@@ -396,6 +398,7 @@ def most_specific(cls, resolves): | |
|
||
target_specificity = attr.ib() # type: float | ||
downloadable_artifacts = attr.ib() # type: Tuple[DownloadableArtifact, ...] | ||
adjacency_list = attr.ib() # type: Dict[Pin, Set[Pin]] | ||
source = attr.ib(eq=False) # type: LockedResolve | ||
|
||
|
||
|
@@ -526,6 +529,7 @@ def resolve( | |
# 1. Gather all required projects and their requirers. | ||
required = OrderedDict() # type: OrderedDict[ProjectName, List[_ResolveRequest]] | ||
to_be_resolved = deque() # type: Deque[_ResolveRequest] | ||
project_adjacency_list = defaultdict(set) # type Dict[ProjectName, Set[ProjectName]] | ||
|
||
def request_resolve(requests): | ||
# type: (Iterable[_ResolveRequest]) -> None | ||
|
@@ -541,6 +545,14 @@ def request_resolve(requests): | |
resolve_request = to_be_resolved.popleft() | ||
project_name = resolve_request.project_name | ||
required.setdefault(project_name, []).append(resolve_request) | ||
# Ensure that projects with no requirements appear in the list. | ||
project_adjacency_list[project_name].update([]) | ||
|
||
if len(resolve_request.required_by) > 1: | ||
# NB: resolve_request.required_by[-1] is project_name itself. | ||
project_adjacency_list[resolve_request.required_by[-2].project_name].add( | ||
project_name | ||
) | ||
|
||
if not transitive: | ||
continue | ||
|
@@ -726,9 +738,26 @@ def attributed_reason(reason): | |
uniqued_resolved_artifacts.append(resolved_artifact) | ||
seen.add(resolved_artifact.ranked_artifact.artifact) | ||
|
||
# TODO: I assume each project name is unique in uniqued_resolved_artifacts? | ||
project_name_to_pin = { | ||
resolved_artifact.locked_requirement.pin.project_name: resolved_artifact.locked_requirement.pin | ||
for resolved_artifact in uniqued_resolved_artifacts | ||
} | ||
|
||
def _pin(proj_name): | ||
return project_name_to_pin[proj_name] | ||
|
||
pin_adjacency_list = { | ||
_pin(src_project_name): { | ||
_pin(dst_project_name) for dst_project_name in dst_project_names | ||
} | ||
for src_project_name, dst_project_names in project_adjacency_list.items() | ||
} | ||
|
||
return Resolved.create( | ||
target=target, | ||
direct_requirements=requirements, | ||
resolved_artifacts=uniqued_resolved_artifacts, | ||
adjacency_list=pin_adjacency_list, | ||
source=self, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI: The current bit of Pex producing graphs is here: https://github.com/pantsbuild/pex/blob/97a2497e0938ece709310a03e7e41b5c26992952/pex/tools/commands/graph.py#L30-L83
That does take markers into account to good effect in the rendered graph, showing which requirements are not active in the present PEX being run by the present interpreter on the present machine. Of course the format is the dot adjacency list format and the side-band information about activation is stored in labels and colors.
If it's still the case that Pants doesn't want to define a format and Pex must my requirements are: