Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add py_image_layer #402

Merged
merged 7 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module(

# Lower-bound versions of direct dependencies.
# When bumping, add a comment explaining what's required from the newer release.
bazel_dep(name = "aspect_bazel_lib", version = "1.40.0")
bazel_dep(name = "aspect_bazel_lib", version = "2.9.1") # py_image_layer requires 2.x for the `tar` rule.
alexeagle marked this conversation as resolved.
Show resolved Hide resolved
bazel_dep(name = "bazel_skylib", version = "1.4.2")
bazel_dep(name = "rules_python", version = "0.29.0")
bazel_dep(name = "platforms", version = "0.0.7")
Expand Down
8 changes: 8 additions & 0 deletions docs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ stardoc_with_diff_test(
bzl_library_target = "//py/private:py_pex_binary",
)

stardoc_with_diff_test(
name = "py_image_layer",
bzl_library_target = "//py:defs",
thesayyn marked this conversation as resolved.
Show resolved Hide resolved
symbol_names = [
"py_image_layer",
],
)

stardoc_with_diff_test(
name = "venv",
bzl_library_target = "//py/private:py_venv",
Expand Down
102 changes: 102 additions & 0 deletions docs/py_image_layer.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions py/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ bzl_library(
"//py/private:py_wheel",
"//py/private:virtual",
"//py/private:py_pex_binary",
"//py/private:py_image_layer",
"@aspect_bazel_lib//lib:utils",
],
)
5 changes: 4 additions & 1 deletion py/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ python.toolchain(python_version = "3.9", is_default = True)
load("@aspect_bazel_lib//lib:utils.bzl", "propagate_common_rule_attributes")
load("//py/private:py_binary.bzl", _py_binary = "py_binary", _py_test = "py_test")
load("//py/private:py_executable.bzl", "determine_main")
load("//py/private:py_image_layer.bzl", _py_image_layer = "py_image_layer")
load("//py/private:py_library.bzl", _py_library = "py_library")
load("//py/private:py_pex_binary.bzl", _py_pex_binary = "py_pex_binary")
load("//py/private:py_pytest_main.bzl", _py_pytest_main = "py_pytest_main")
load("//py/private:py_unpacked_wheel.bzl", _py_unpacked_wheel = "py_unpacked_wheel")
load("//py/private:virtual.bzl", _resolutions = "resolutions")
load("//py/private:py_venv.bzl", _py_venv = "py_venv")
load("//py/private:virtual.bzl", _resolutions = "resolutions")

py_pex_binary = _py_pex_binary
py_pytest_main = _py_pytest_main
Expand All @@ -54,6 +55,8 @@ py_test_rule = _py_test
py_library = _py_library
py_unpacked_wheel = _py_unpacked_wheel

py_image_layer = _py_image_layer

resolutions = _resolutions

def _py_binary_or_test(name, rule, srcs, main, deps = [], resolutions = {}, **kwargs):
Expand Down
8 changes: 8 additions & 0 deletions py/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ exports_files(
visibility = ["//docs:__pkg__"],
)

bzl_library(
name = "py_image_layer",
srcs = ["py_image_layer.bzl"],
deps = [
"@aspect_bazel_lib//lib:tar",
],
)

bzl_library(
name = "py_binary",
srcs = ["py_binary.bzl"],
Expand Down
138 changes: 138 additions & 0 deletions py/private/py_image_layer.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"py_image_layer"
thesayyn marked this conversation as resolved.
Show resolved Hide resolved

load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")

default_layer_groups = {
# match *only* external pip like repositories that contain the string "pip_deps"
"packages": "\\.runfiles/pip_deps.*",
alexeagle marked this conversation as resolved.
Show resolved Hide resolved
# match *only* external repositories that begins with the string "python"
# e.g. this will match
# `/hello_world/hello_world_bin.runfiles/rules_python~0.21.0~python~python3_9_aarch64-unknown-linux-gnu/bin/python3`
# but not match
# `/hello_world/hello_world_bin.runfiles/_main/python_app`
"interpreter": "\\.runfiles/python.*-.*/",
}

def py_image_layer(name, py_binary, root = None, layer_groups = {}, compress = "gzip", tar_args = ["--options", "gzip:!timestamp"], **kwargs):
"""Produce a separate tar output for each layer of a python app

> Requires `awk` to be installed on the host machiner/rbe runner.
thesayyn marked this conversation as resolved.
Show resolved Hide resolved

For better performance, it is recommended to split the output of a py_binary into multiple layers.
This can be done by grouping files into layers based on their path by using the `layer_groups` attribute.

The matching order for layer groups is as follows:
1. `layer_groups` are checked first.
2. If no match is found for `layer_groups`, the `default layer groups` are checked.
3. Any remaining files are placed into the default layer.

The default layer groups are:
```
{
"packages": "\\.runfiles/pip_deps.*", # contains third-party deps
thesayyn marked this conversation as resolved.
Show resolved Hide resolved
"interpreter": "\\.runfiles/python.*-.*/", # contains the python interpreter
}
```

A py_binary that uses `torch` and `numpy` can use the following layer groups:

```
oci_image(
tars = py_image_layer(
name = "my_app",
py_binary = ":my_app_bin",
layer_groups = {
"torch": "pip_deps_torch.*",
"numpy": "pip_deps_numpy.*",
}
)
)
```


Args:
name: base name for targets
py_binary: a py_binary target
root: Path to where the layers should be rooted. If not specified, the layers will be rooted at the workspace root.
layer_groups: Additional layer groups to create. They are used to group files into layers based on their path. In the form of: ```{"<name>": "regex_to_match_against_file_paths"}```
compress: Compression algorithm to use. Default is gzip. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule
tar_args: Additional arguments to pass to the tar rule. Default is `["--options", "gzip:!timestamp"]`. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule
**kwargs: attribute that apply to all targets expanded by the macro

Returns:
A list of labels for each layer.
"""
if root != None and not root.startswith("/"):
fail("root path must start with '/' but got '{root}', expected '/{root}'".format(root = root))

# Produce the manifest for a tar file of our py_binary, but don't tar it up yet, so we can split
# into fine-grained layers for better pull, push and remote cache performance.
mtree_spec(
name = name + ".manifest",
srcs = [py_binary],
**kwargs
)

groups = dict(**layer_groups)
group_names = groups.keys() + ["default"]
Comment on lines +137 to +138
Copy link

@hartikainen hartikainen Oct 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope I'm not adding any unintentional noise here, especially since I'm still very new to Bazel. I was browsing through these Bazel rules and noticed that there might be a bug here. I wonder if these two lines should be something like:

    groups = dict(default_layer_groups.items() + layer_groups.items())
    group_names = default_layer_groups.keys() + groups.keys()


mtree_begin_blocks = "\n".join([
thesayyn marked this conversation as resolved.
Show resolved Hide resolved
'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn)
for gn in group_names
])

# When an mtree entry matches a layer group, it will be moved into the mtree
# for that group.
ifs = "\n".join([
"""\
if ($$1 ~ "%s") {
print $$0 >> "$(RULEDIR)/%s.%s.manifest.spec";
next
}""" % (regex, name, gn)
for (gn, regex) in groups.items()
])

cmd = """\
awk < $< 'BEGIN {
%s
}
{
# Exclude .whl files from container images
if ($$1 ~ ".whl") {
next
}
# Move everything under the specified root
sub(/^/, ".%s")
# Match by regexes and write to the destination.
%s
# Every line that did not match the layer groups will go into the default layer.
print $$0 >> "$(RULEDIR)/%s.default.manifest.spec"
}'
""" % (mtree_begin_blocks, root, ifs, name)

native.genrule(
name = "_{}_manifests".format(name),
srcs = [name + ".manifest"],
outs = [
"{}.{}.manifest.spec".format(name, group_name)
for group_name in group_names
],
cmd = cmd,
**kwargs
)

# Finally create layers using the tar rule
result = []
for group_name in group_names:
tar_target = "_{}_{}".format(name, group_name)
tar(
name = tar_target,
srcs = [py_binary],
mtree = "{}.{}.manifest.spec".format(name, group_name),
compress = compress,
args = tar_args,
**kwargs
)
result.append(tar_target)

return result
7 changes: 4 additions & 3 deletions py/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ def rules_py_dependencies():
url = "https://github.com/bazelbuild/bazel-skylib/archive/refs/tags/1.5.0.tar.gz",
)

# py_image_layer requires 2.x for the `tar` rule.
http_archive(
name = "aspect_bazel_lib",
sha256 = "6e6f8ac3c601d6df25810cd51e51d85831e3437e873b152c5c4ecd3b96964bc8",
strip_prefix = "bazel-lib-1.42.3",
url = "https://github.com/aspect-build/bazel-lib/archive/refs/tags/v1.42.3.tar.gz",
sha256 = "f93d386d8d0b0149031175e81df42a488be4267c3ca2249ba5321c23c60bc1f0",
strip_prefix = "bazel-lib-2.9.1",
url = "https://github.com/bazel-contrib/bazel-lib/releases/download/v2.9.1/bazel-lib-v2.9.1.tar.gz",
)

http_archive(
Expand Down
5 changes: 4 additions & 1 deletion py/toolchains.bzl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""Declare toolchains"""

load("@aspect_bazel_lib//lib:repositories.bzl", "register_tar_toolchains")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file")
load("//py/private/toolchain:autodetecting.bzl", _register_autodetecting_python_toolchain = "register_autodetecting_python_toolchain")
load("//py/private/toolchain:repo.bzl", "prerelease_toolchains_repo", "toolchains_repo")
load("//py/private/toolchain:tools.bzl", "TOOLCHAIN_PLATFORMS", "prebuilt_tool_repo")
load("//tools:version.bzl", "IS_PRERELEASE")


register_autodetecting_python_toolchain = _register_autodetecting_python_toolchain

DEFAULT_TOOLS_REPOSITORY = "rules_py_tools"
Expand All @@ -19,6 +19,9 @@ def rules_py_toolchains(name = DEFAULT_TOOLS_REPOSITORY, register = True, is_pre
register: whether to call the register_toolchains, should be True for WORKSPACE and False for bzlmod.
is_prerelease: True iff there are no pre-built tool binaries for this version of rules_py
"""

register_tar_toolchains(register = register)

if is_prerelease:
prerelease_toolchains_repo(name = name)
if register:
Expand Down