Skip to content

Commit

Permalink
Merge pull request #188 from infosiftr/reproducible-rootfs
Browse files Browse the repository at this point in the history
Adjust tarball creation to be reproducible
  • Loading branch information
tianon authored Mar 1, 2024
2 parents e704abc + 7e39d61 commit 6ded0a4
Show file tree
Hide file tree
Showing 64 changed files with 751 additions and 73 deletions.
13 changes: 12 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
run: |
strategy="$(GENERATE_STACKBREW_LIBRARY='.github/workflows/fake-gsl.sh' "$BASHBREW_SCRIPTS/github-actions/generate.sh")"
strategy="$(.github/workflows/munge-build.sh -c <<<"$strategy")"
strategy="$(.github/workflows/munge-debian-unstable.sh -c <<<"$strategy")"
strategy="$(.github/workflows/munge-unstable.sh -c <<<"$strategy")"
EOF="EOF-$RANDOM-$RANDOM-$RANDOM"
echo "strategy<<$EOF" >> "$GITHUB_OUTPUT"
Expand All @@ -47,8 +47,11 @@ jobs:
strategy: ${{ fromJson(needs.generate-jobs.outputs.strategy) }}
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
env:
BASHBREW_ARCH: amd64 # TODO consider using "$BASHBREW_SCRIPTS/bashbrew-host-arch.sh" ? (would make it harder to force i386 in our matrix too, so explicit is probably better)
steps:
- uses: actions/checkout@v3
- uses: docker-library/bashbrew@HEAD # build.sh needs bashbrew
- name: Prepare Environment
run: ${{ matrix.runs.prepare }}
- name: Pull Dependencies
Expand All @@ -61,3 +64,11 @@ jobs:
run: ${{ matrix.runs.test }}
- name: '"docker images"'
run: ${{ matrix.runs.images }}
- name: Git Diff # see "munge-build.sh"
run: |
if git diff --exit-code */*/Dockerfile.builder; then # see "hack-unstable.sh" (and "munge-unstable.sh")
git diff --exit-code
else
# for unstable builds, let's leave this in but purely informational (instead of causing CI to fail)
git diff
fi
10 changes: 9 additions & 1 deletion .github/workflows/munge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ set -Eeuo pipefail

jq '
.matrix.include |= map(
.runs.build = "./build.sh " + (.meta.entries[].directory | @sh) + "\n" + (.runs.build | sub(" --file [^ ]+ "; " "))
.runs.build = (
[
"dir=" + (.meta.entries[].directory | @sh),
"rm -rf \"$dir/$BASHBREW_ARCH\"", # make sure our OCI directory is clean so we can "git diff --exit-code" later
"./build.sh \"$dir\"",
(.runs.build | sub(" --file [^ ]+ "; " ")),
empty
] | join("\n")
)
)
' "$@"
24 changes: 0 additions & 24 deletions .github/workflows/munge-debian-unstable.sh

This file was deleted.

19 changes: 19 additions & 0 deletions .github/workflows/munge-unstable.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -Eeuo pipefail

# see also "hack-unstable.sh"
jq '
.matrix.include += [
.matrix.include[]
| select(.name | test(" [(].+[)]") | not) # ignore any existing munged builds
| select(.os | startswith("windows-") | not)
| select(.meta.froms | any(startswith("debian:") or startswith("alpine:")))
| .name += " (unstable)"
| .runs.prepare += ([
"./hack-unstable.sh " + (.meta.entries[].directory | @sh),
"if git diff --exit-code; then exit 1; fi", # trust, but verify (if hack-unstable did not modify anything, we want to bail quickly)
empty
] | map("\n" + .) | add)
| .runs.pull = "" # pulling images does not make sense here (we just changed them)
]
' "$@"
30 changes: 24 additions & 6 deletions Dockerfile-builder.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ RUN set -eux; \
musl-dev \
patch \
tzdata \
# busybox's tar ironically does not maintain mtime of directories correctly (which we need for SOURCE_DATE_EPOCH / reproducibility)
tar \
;
{{ ) else ( -}}
FROM debian:bookworm-slim
Expand Down Expand Up @@ -234,22 +236,36 @@ RUN set -eux; \
curl -fL -o busybox.tar.bz2 "https://busybox.net/downloads/$tarball"; \
echo "$BUSYBOX_SHA256 *busybox.tar.bz2" | sha256sum -c -; \
gpg --batch --verify busybox.tar.bz2.sig busybox.tar.bz2; \
mkdir -p /usr/src/busybox; \
tar -xf busybox.tar.bz2 -C /usr/src/busybox --strip-components 1; \
rm busybox.tar.bz2*
# Alpine... 😅
mkdir -p /usr/src; \
tar -xf busybox.tar.bz2 -C /usr/src "busybox-$BUSYBOX_VERSION"; \
mv "/usr/src/busybox-$BUSYBOX_VERSION" /usr/src/busybox; \
rm busybox.tar.bz2*; \
\
# save the tarball's filesystem timestamp persistently (in case building busybox modifies it) so we can use it for reproducible rootfs later
SOURCE_DATE_EPOCH="$(stat -c '%Y' /usr/src/busybox | tee /usr/src/busybox.SOURCE_DATE_EPOCH)"; \
date="$(date -d "@$SOURCE_DATE_EPOCH" '+%Y%m%d%H%M.%S')"; \
touch -t "$date" /usr/src/busybox.SOURCE_DATE_EPOCH; \
# for logging validation/edification
date --date "@$SOURCE_DATE_EPOCH" --rfc-2822

WORKDIR /usr/src/busybox

RUN set -eux; \
\
# build date/time gets embedded in the BusyBox binary -- SOURCE_DATE_EPOCH should override that
SOURCE_DATE_EPOCH="$(cat /usr/src/busybox.SOURCE_DATE_EPOCH)"; \
export SOURCE_DATE_EPOCH; \
# (has to be set in the config stage for making sure "AUTOCONF_TIMESTAMP" is embedded correctly)
\
setConfs=' \
CONFIG_AR=y \
CONFIG_FEATURE_AR_CREATE=y \
CONFIG_FEATURE_AR_LONG_FILENAMES=y \
# CONFIG_LAST_SUPPORTED_WCHAR: see https://github.com/docker-library/busybox/issues/13 (UTF-8 input)
CONFIG_LAST_SUPPORTED_WCHAR=0 \
{{ if env.variant == "glibc" then ( -}}
# As long as we rely on libnss (see below), we have to have libc.so anyhow, so we've removed CONFIG_STATIC here... :cry:
# As long as we rely on libnss (see below), we have to have libc.so anyhow, so we've removed CONFIG_STATIC here... 😭
{{ ) else ( -}}
CONFIG_STATIC=y \
{{ ) end -}}
Expand Down Expand Up @@ -352,15 +368,17 @@ RUN set -eux; \
done; \
{{ ) elif env.variant == "musl" then ( -}}
# copy simplified getconf port from Alpine
aportsVersion="v$(cat /etc/alpine-release)"; \
# https://github.com/alpinelinux/aports/commits/HEAD/main/musl/getconf.c
curl -fsSL \
"https://github.com/alpinelinux/aports/raw/$aportsVersion/main/musl/getconf.c" \
"https://github.com/alpinelinux/aports/raw/48b16204aeeda5bc1f87e49c6b8e23d9abb07c73/main/musl/getconf.c" \
-o /usr/src/getconf.c \
; \
echo 'd87d0cbb3690ae2c5d8cc218349fd8278b93855dd625deaf7ae50e320aad247c */usr/src/getconf.c' | sha256sum -c -; \
gcc -o rootfs/bin/getconf -static -Os /usr/src/getconf.c; \
{{ ) else "" end -}}
chroot rootfs /bin/getconf _NPROCESSORS_ONLN; \
\
# TODO make this create symlinks instead so the output tarball is cleaner (but "-s" outputs absolute symlinks which is kind of annoying to deal with -- we should also consider letting busybox determine the "install paths"; see "busybox --list-full")
chroot rootfs /bin/busybox --install /bin
# install a few extra files from buildroot (/etc/passwd, etc)
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile.template
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# this isn't used for the official published images anymore, but is included for backwards compatibility
# see https://github.com/docker-library/bashbrew/issues/51
FROM scratch
ADD busybox.tar.xz /
ADD busybox.tar.gz /
CMD ["sh"]
4 changes: 4 additions & 0 deletions apply-templates.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ for version; do
variants="$(jq -r '.[env.version].variants | map(@sh) | join(" ")' versions.json)"
eval "variants=( $variants )"

# TODO somehow make sure this deletes any content we're not generating (without accidentally deleting potentialy generated tarballs for the things we *do* care about)

for variant in "${variants[@]}"; do
export variant

echo "processing $version/$variant ..."

mkdir -p "$version/$variant"

{
generated_warning
gawk -f "$jqt" Dockerfile-builder.template
Expand Down
125 changes: 122 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,131 @@ if [ "$#" -eq 0 ]; then
eval "set -- $dirs"
fi

[ -n "$BASHBREW_ARCH" ]
platformString="$(bashbrew cat --format '{{ ociPlatform arch }}' <(echo 'Maintainers: empty hack (@example)'))"
platform="$(bashbrew cat --format '{{ ociPlatform arch | json }}' <(echo 'Maintainers: empty hack (@example)'))"

for dir; do
base="busybox:${dir////-}"
variant="$(basename "$dir")"
base="busybox:${dir////-}-$BASHBREW_ARCH"

froms="$(awk 'toupper($1) == "FROM" { print $2 }' "$dir/Dockerfile.builder")"
for from in "$froms"; do
if ! bashbrew remote arches --json "$from" | jq -e '.arches | has(env.BASHBREW_ARCH)' > /dev/null; then
echo >&2 "warning: '$base' is 'FROM $from' which does not support '$BASHBREW_ARCH'; skipping"
continue 2
fi
done

(
set -x
docker build -t "$base-builder" -f "$dir/Dockerfile.builder" "$dir"
docker run --rm "$base-builder" tar cC rootfs . | xz -T0 -z9 > "$dir/busybox.tar.xz"

# TODO save the output of "bashbrew remote arches" above so we can "--build-context" here?
docker buildx build \
--progress=plain \
--platform "$platformString" \
--pull \
--load \
--tag "$base-builder" \
--file "$dir/Dockerfile.builder" \
"$dir"

oci="$dir/$BASHBREW_ARCH"
rm -rf "$oci"
mkdir "$oci" "$oci/blobs" "$oci/blobs/sha256"

docker run --rm "$base-builder" \
tar \
--create \
--directory rootfs \
--numeric-owner \
--transform 's,^./,,' \
--sort name \
--mtime /usr/src/busybox.SOURCE_DATE_EPOCH --clamp-mtime \
. \
> "$oci/rootfs.tar"

# if we gzip separately, we can calculate the diffid without decompressing
diffId="$(sha256sum "$oci/rootfs.tar" | cut -d' ' -f1)"
diffId="sha256:$diffId"

# we need to use the container's gzip so it's more likely reproducible over time (and using busybox's own gzip is a cute touch 😀)
docker run -i --rm "$base-builder" chroot rootfs gzip -c < "$oci/rootfs.tar" > "$oci/rootfs.tar.gz"
rm "$oci/rootfs.tar"
rootfs="$(sha256sum "$oci/rootfs.tar.gz" | cut -d' ' -f1)"
ln -svfT --relative "$oci/rootfs.tar.gz" "$oci/blobs/sha256/$rootfs"
rootfsSize="$(stat --format '%s' --dereference "$oci/blobs/sha256/$rootfs")"
rootfs="sha256:$rootfs"

SOURCE_DATE_EPOCH="$(docker run --rm "$base-builder" cat /usr/src/busybox.SOURCE_DATE_EPOCH)"
createdBy="$(docker run --rm --env variant="$variant" "$base-builder" sh -euc '. /etc/os-release && echo "BusyBox $BUSYBOX_VERSION ($variant)${BUILDROOT_VERSION:+, Buildroot $BUILDROOT_VERSION}, ${NAME%% *} ${VERSION_ID:-$VERSION_CODENAME}"')"
jq -n --tab --arg SOURCE_DATE_EPOCH "$SOURCE_DATE_EPOCH" --arg diffId "$diffId" --arg createdBy "$createdBy" --argjson platform "$platform" '
($SOURCE_DATE_EPOCH | tonumber | strftime("%Y-%m-%dT%H:%M:%SZ")) as $created
| {
config: {
Cmd: [ "sh" ],
},
created: $created,
history: [ {
created: $created,
created_by: $createdBy,
} ],
rootfs: {
type: "layers",
diff_ids: [ $diffId ],
},
} + $platform
' > "$oci/image-config.json"
config="$(sha256sum "$oci/image-config.json" | cut -d' ' -f1)"
ln -svfT --relative "$oci/image-config.json" "$oci/blobs/sha256/$config"
configSize="$(stat --format '%s' --dereference "$oci/blobs/sha256/$config")"
config="sha256:$config"

version="$(cut <<<"$createdBy" -d' ' -f2)" # a better way to scrape the BusyBox version? maybe this is fine (want to avoid yet another container run)
jq -n --tab --arg version "$version" --arg variant "$variant" --arg config "$config" --arg configSize "$configSize" --arg rootfs "$rootfs" --arg rootfsSize "$rootfsSize" '
{
schemaVersion: 2,
mediaType: "application/vnd.oci.image.manifest.v1+json",
config: {
mediaType: "application/vnd.oci.image.config.v1+json",
digest: $config,
size: ($configSize | tonumber),
},
layers: [ {
mediaType: "application/vnd.oci.image.layer.v1.tar+gzip",
digest: $rootfs,
size: ($rootfsSize | tonumber),
} ],
annotations: {
"org.opencontainers.image.url": "https://github.com/docker-library/busybox",
"org.opencontainers.image.version": ($version + "-" + $variant),
},
}
' > "$oci/image-manifest.json"
manifest="$(sha256sum "$oci/image-manifest.json" | cut -d' ' -f1)"
ln -svfT --relative "$oci/image-manifest.json" "$oci/blobs/sha256/$manifest"
manifestSize="$(stat --format '%s' --dereference "$oci/blobs/sha256/$manifest")"
manifest="sha256:$manifest"

jq -nc '{ imageLayoutVersion:"1.0.0" }' > "$oci/oci-layout"
jq -n --tab --arg version "$version" --arg variant "$variant" --arg manifest "$manifest" --arg manifestSize "$manifestSize" --argjson platform "$platform" '
{
schemaVersion: 2,
mediaType: "application/vnd.oci.image.index.v1+json",
manifests: [ {
mediaType: "application/vnd.oci.image.manifest.v1+json",
digest: $manifest,
size: ($manifestSize | tonumber),
platform: $platform,
annotations: {
"org.opencontainers.image.ref.name": ("busybox:" + $version + "-" + $variant),
"io.containerd.image.name": ("busybox:" + $version + "-" + $variant),
},
} ],
}
' > "$oci/index.json"

ln -svfT --relative "$oci/rootfs.tar.gz" "$dir/busybox.tar.gz"
docker build -t "$base-test" "$dir"
docker run --rm "$base-test" sh -xec 'true'

Expand Down
23 changes: 16 additions & 7 deletions generate-stackbrew-library.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Maintainers: Tianon Gravi <[email protected]> (@tianon),
Joseph Ferguson <[email protected]> (@yosifkit)
GitRepo: $gitHubUrl.git
GitCommit: $selfCommit
Builder: oci-import
File: index.json
EOH
for arch in "${arches[@]}"; do
commit="${archCommits[$arch]}"
Expand Down Expand Up @@ -97,6 +99,7 @@ for version; do
fi
versionAliases+=( latest )

actualArches=()
declare -A archLatestDir=()
for variant in "${variants[@]}"; do
dir="$version/$variant"
Expand All @@ -107,25 +110,31 @@ for version; do
variantArches=()
for arch in "${arches[@]}"; do
archCommit="${archCommits[$arch]}"
if wget --quiet --spider -O /dev/null -o /dev/null "$rawGitUrl/$archCommit/$dir/busybox.tar.xz"; then
if wget --quiet --spider -O /dev/null -o /dev/null "$rawGitUrl/$archCommit/$dir/$arch/rootfs.tar.gz"; then
variantArches+=( "$arch" )
: "${archLatestDir[$arch]:=$dir}" # record the first supported directory per architecture for "latest" and friends
if [ -z "${archLatestDir[$arch]:-}" ]; then
# record the first supported directory per architecture for "latest" and friends
archLatestDir["$arch"]="$dir/$arch"
actualArches+=( "$arch" )
fi
fi
done

if _tags "${variantAliases[@]}"; then
if [ "${#variantArches[@]}" -gt 0 ] && _tags "${variantAliases[@]}"; then
cat <<-EOE
Architectures: $(join ', ' "${variantArches[@]}")
Directory: $dir
EOE
for arch in "${variantArches[@]}"; do
echo "$arch-Directory: $dir/$arch"
done
fi
done

if _tags "${versionAliases[@]}"; then
if [ "${#actualArches[@]}" -gt 0 ] && _tags "${versionAliases[@]}"; then
cat <<-EOE
Architectures: $(join ', ' "${arches[@]}")
Architectures: $(join ', ' "${actualArches[@]}")
EOE
for arch in "${arches[@]}"; do
for arch in "${actualArches[@]}"; do
archDir="${archLatestDir[$arch]}"
cat <<-EOA
${arch}-Directory: $archDir
Expand Down
14 changes: 14 additions & 0 deletions hack-unstable.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -Eeuo pipefail

if [ "$#" -eq 0 ]; then
set -- */*/
fi

set -x

# This is used to modify "Dockerfile.builder" for architectures that are not (yet) supported by stable releases (notably, riscv64).
sed -ri \
-e 's/^(FROM debian:)[^ -]+/\1unstable/g' \
-e 's/^(FROM alpine:)[^ -]+/\1edge/g' \
"${@/%//Dockerfile.builder}"
4 changes: 3 additions & 1 deletion latest-1/glibc/Dockerfile

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 6ded0a4

Please sign in to comment.