Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export a lockfile in an ad-hoc JSON graph format. #2047

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 51 additions & 27 deletions pex/cli/commands/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

from __future__ import absolute_import, print_function

import json
import sys
from argparse import Action, ArgumentError, ArgumentParser, ArgumentTypeError, _ActionsContainer
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from operator import attrgetter

from pex import pex_warnings
Expand Down Expand Up @@ -65,6 +66,7 @@ class Value(Enum.Value):
pass

PIP = Value("pip")
GRAPH = Value("graph")
PEP_665 = Value("pep-665")


Expand Down Expand Up @@ -237,8 +239,12 @@ def _add_export_arguments(cls, export_parser):
choices=ExportFormat.values(),
type=ExportFormat.for_value,
help=(
"The format to export the lock to. Currently only the {pip!r} requirements file "
"format using `--hash` is supported.".format(pip=ExportFormat.PIP)
"The format to export the lock to. Export results may be a subset of the full "
"lockfile, targeting a specific interpreter and platform. Currently only "
"`{pip!r}` - a Pip requirements file format using `--hash`, and `{graph!r} - "
"an ad-hoc adjacency graph format, are supported.".format(
pip=ExportFormat.PIP, graph=ExportFormat.GRAPH
)
),
)
export_parser.add_argument(
Expand Down Expand Up @@ -492,20 +498,22 @@ def dump_with_terminating_newline(out):

def _export(self):
# type: () -> Result
if self.options.format != ExportFormat.PIP:
if self.options.format not in [ExportFormat.PIP, ExportFormat.GRAPH]:
return Error(
"Only the {pip!r} lock format is supported currently.".format(pip=ExportFormat.PIP)
"Only the `{pip!r}` and `{graph!r}` formats are supported currently.".format(
pip=ExportFormat.PIP, graph=ExportFormat.GRAPH
)
)

lockfile_path, lock_file = self._load_lockfile()
targets = target_options.configure(self.options).resolve_targets()
resolved_targets = targets.unique_targets()
if len(resolved_targets) > 1:
return Error(
"A lock can only be exported for a single target in the {pip!r} format.\n"
"A lock can only be exported for a single target in the {export_fmt!r} format.\n"
"There were {count} targets selected:\n"
"{targets}".format(
pip=ExportFormat.PIP,
export_fmt=self.options.format,
count=len(resolved_targets),
targets="\n".join(
"{index}. {target}".format(index=index, target=target)
Expand Down Expand Up @@ -535,13 +543,13 @@ def _export(self):
resolved_subset.resolved for resolved_subset in subset_result.subsets
)
pex_warnings.warn(
"Only a single lock can be exported in the {pip!r} format.\n"
"Only a single lock can be exported in the {export_fmt!r} format.\n"
"There were {count} locks stored in {lockfile} that were applicable for the "
"selected target: {target}; so using the most specific lock with platform "
"{platform}.".format(
count=len(subset_result.subsets),
lockfile=lockfile_path,
pip=ExportFormat.PIP,
export_fmt=self.options.format,
target=target,
platform=resolved.source.platform_tag,
)
Expand All @@ -555,25 +563,41 @@ def _export(self):
downloaded_artifact.artifact.fingerprint
)

with self.output(self.options) as output:
pins = fingerprints_by_pin.keys() # type: Iterable[Pin]
if self.options.sort_by == ExportSortBy.PROJECT_NAME:
pins = sorted(pins, key=attrgetter("project_name.normalized"))
for pin in pins:
fingerprints = fingerprints_by_pin[pin]
output.write(
"{project_name}=={version} \\\n"
" {hashes}\n".format(
project_name=pin.project_name,
version=pin.version.raw,
hashes=" \\\n ".join(
"--hash={algorithm}:{hash}".format(
algorithm=fingerprint.algorithm, hash=fingerprint.hash
)
for fingerprint in fingerprints
),
if self.options.format == ExportFormat.PIP:
with self.output(self.options) as output:
pins = fingerprints_by_pin.keys() # type: Iterable[Pin]
if self.options.sort_by == ExportSortBy.PROJECT_NAME:
pins = sorted(pins, key=attrgetter("project_name.normalized"))
for pin in pins:
fingerprints = fingerprints_by_pin[pin]
output.write(
"{project_name}=={version} \\\n"
" {hashes}\n".format(
project_name=pin.project_name,
version=pin.version.raw,
hashes=" \\\n ".join(
"--hash={algorithm}:{hash}".format(
algorithm=fingerprint.algorithm, hash=fingerprint.hash
)
for fingerprint in fingerprints
),
)
)
)
else:
vertices = set()
Copy link
Member

@jsirois jsirois Feb 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: The current bit of Pex producing graphs is here: https://github.com/pantsbuild/pex/blob/97a2497e0938ece709310a03e7e41b5c26992952/pex/tools/commands/graph.py#L30-L83
That does take markers into account to good effect in the rendered graph, showing which requirements are not active in the present PEX being run by the present interpreter on the present machine. Of course the format is the dot adjacency list format and the side-band information about activation is stored in labels and colors.

If it's still the case that Pants doesn't want to define a format and Pex must my requirements are:

  • Embedding a version for the format: allow an escape hatch for backwards incompatible evolution (but with the intent to try hard to never actually use it)
  • Eventually merge the existing model / logic driving the tool example above with this: one kicks out dot, the other JSON.

edges = defaultdict(list)
for src_pin, dst_pins in resolved.adjacency_list.items():
dst_pin_strs = sorted(str(dst_pin) for dst_pin in dst_pins)
vertices.add(str(src_pin))
vertices.update(dst_pin_strs)
edges[str(src_pin)] = dst_pin_strs
graph = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The single target selected above on 508-510 is controlled by command line flags like --python, --python-path, --interpreter-constraint, --platform and --complete-platform, but none of these need be present either. It seems like some node to store information about the rendered graph context would make sense; i.e.: for a universal lock, this would give the context for environment marker activation / deactivation assuming you include activation information in the graph and for a multi-lock lock file, it also indicates which lock is being talked about (say the CPython 3.7 locked resolve from a lock file containing 3 locks, 1 for 3.7, 1 for 3.8 and 1 for 3.9.

"vertices": sorted(vertices),
"edges": edges,
}
with self.output(self.options) as output:
json.dump(graph, output)

return Ok()

def _update(self):
Expand Down
29 changes: 29 additions & 0 deletions pex/resolve/locked_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def create(
target, # type: Target
direct_requirements, # type: Iterable[Requirement]
resolved_artifacts, # type: Iterable[_ResolvedArtifact]
adjacency_list, # type: Dict[Pin, Set[Pin]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A LockedResolve already has all the information needed. There should be no need to add graph rendering-specific data or methods to it. I think this is just a sign a (stripped-down) model for exported graph data is needed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is the case, we could derive the graph entirely in Pants, no? But it seemed to me (naively?) that this logic had to be here because request_resolve() is where we know who actually requires who, in the context of a target.

Copy link
Member

@jsirois jsirois Feb 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is the case, we could derive the graph entirely in Pants, no?

Yup. Just be ready to break / fix when Pex changes its private, undocumented format.

But it seemed to me (naively?) that this logic had to be here because request_resolve() is where we know who actually requires who, in the context of a target.

I think naively - maybe. So how will Pants use the graph info? If the lock is universal, which it is in Pants case, will ./pants dependencies ... include this info now and if so, what the heck will that pick for a target? Will it solve for a local interpreter that matches the constraints of all targets requested and use that if success and fail otherwise? If so and success, you already know the target up in Pants and can annotate the graph display with that info; i.e. this is the dependency graph when Python 3.7 is used, but the results may be different for 3.8 and 3.9 (and the repo is like Pants itself and supports >=3.7,<3.10).

I think Pants really ought to have what it wants to do sorted. The sorting can probably best be done today using the lock file json directly. Then, once sorted, a solid idea of the requirements for a stable PEX graph export format will be in hand and we can make sure it works for the not-pants cases, like non-universal locks with multiple resolves contained within and not just 1 universal resolve, etc.

source, # type: LockedResolve
):
# type: (...) -> Resolved
Expand Down Expand Up @@ -382,6 +383,7 @@ def create(
return cls(
target_specificity=sum(target_specificities) / len(target_specificities),
downloadable_artifacts=tuple(downloadable_artifacts),
adjacency_list=adjacency_list,
source=source,
)

Expand All @@ -396,6 +398,7 @@ def most_specific(cls, resolves):

target_specificity = attr.ib() # type: float
downloadable_artifacts = attr.ib() # type: Tuple[DownloadableArtifact, ...]
adjacency_list = attr.ib() # type: Dict[Pin, Set[Pin]]
source = attr.ib(eq=False) # type: LockedResolve


Expand Down Expand Up @@ -526,6 +529,7 @@ def resolve(
# 1. Gather all required projects and their requirers.
required = OrderedDict() # type: OrderedDict[ProjectName, List[_ResolveRequest]]
to_be_resolved = deque() # type: Deque[_ResolveRequest]
project_adjacency_list = defaultdict(set) # type Dict[ProjectName, Set[ProjectName]]

def request_resolve(requests):
# type: (Iterable[_ResolveRequest]) -> None
Expand All @@ -541,6 +545,14 @@ def request_resolve(requests):
resolve_request = to_be_resolved.popleft()
project_name = resolve_request.project_name
required.setdefault(project_name, []).append(resolve_request)
# Ensure that projects with no requirements appear in the list.
project_adjacency_list[project_name].update([])

if len(resolve_request.required_by) > 1:
# NB: resolve_request.required_by[-1] is project_name itself.
project_adjacency_list[resolve_request.required_by[-2].project_name].add(
project_name
)

if not transitive:
continue
Expand Down Expand Up @@ -726,9 +738,26 @@ def attributed_reason(reason):
uniqued_resolved_artifacts.append(resolved_artifact)
seen.add(resolved_artifact.ranked_artifact.artifact)

# TODO: I assume each project name is unique in uniqued_resolved_artifacts?
project_name_to_pin = {
resolved_artifact.locked_requirement.pin.project_name: resolved_artifact.locked_requirement.pin
for resolved_artifact in uniqued_resolved_artifacts
}

def _pin(proj_name):
return project_name_to_pin[proj_name]

pin_adjacency_list = {
_pin(src_project_name): {
_pin(dst_project_name) for dst_project_name in dst_project_names
}
for src_project_name, dst_project_names in project_adjacency_list.items()
}

return Resolved.create(
target=target,
direct_requirements=requirements,
resolved_artifacts=uniqued_resolved_artifacts,
adjacency_list=pin_adjacency_list,
source=self,
)