From 7853bcd7eb6ce79fd0512eac3d721b7630fd2ba9 Mon Sep 17 00:00:00 2001 From: DolceTriade Date: Mon, 28 Oct 2024 13:52:18 -0700 Subject: [PATCH] tar: make all gzip compression hermetic (#75) We weren't passing the flag to make gzip hermetic. Without the flag to ignore timestamp, the shasum will differ based on the system time. Also, DEFAULT_ARGS contained "--options=gzip:...", which is invalid when using any other compression scheme. Therefore, remove that option from DEFAULT_ARGS and provide an alternate function called `add_default_compression_args` to add per-compression type default arguments which can do things like make the compression more hermetic. --- distroless/private/flatten.bzl | 2 ++ distroless/private/group.bzl | 2 +- distroless/private/home.bzl | 2 +- distroless/private/locale.sh | 4 ++-- distroless/private/os_release.bzl | 2 +- distroless/private/passwd.bzl | 2 +- distroless/private/tar.bzl | 17 ++++++++++++++--- 7 files changed, 22 insertions(+), 9 deletions(-) diff --git a/distroless/private/flatten.bzl b/distroless/private/flatten.bzl index 2c6346a..7d3afca 100644 --- a/distroless/private/flatten.bzl +++ b/distroless/private/flatten.bzl @@ -11,8 +11,10 @@ def _flatten_impl(ctx): output = ctx.actions.declare_file(ctx.attr.name + ext) args = ctx.actions.args() + args.add_all(tar_lib.DEFAULT_ARGS) args.add("--create") tar_lib.common.add_compression_args(ctx.attr.compress, args) + tar_lib.add_default_compression_args(ctx.attr.compress, args) args.add("--file", output) args.add_all(ctx.files.tars, format_each = "@%s") diff --git a/distroless/private/group.bzl b/distroless/private/group.bzl index a47be27..5752977 100644 --- a/distroless/private/group.bzl +++ b/distroless/private/group.bzl @@ -53,7 +53,7 @@ def group(name, entries, time = "0.0", mode = "0644", **kwargs): name = name, srcs = [":%s_content" % name], mtree = mtree.content(), - args = tar_lib.DEFAULT_ARGS, + args = tar_lib.DEFAULT_ARGS + tar_lib.DEFAULT_COMPRESSION_ARGS["gzip"], compress = "gzip", **common_kwargs ) diff --git a/distroless/private/home.bzl b/distroless/private/home.bzl index a038d24..9902f10 100644 --- a/distroless/private/home.bzl +++ b/distroless/private/home.bzl @@ -28,7 +28,7 @@ def home(name, dirs, **kwargs): tar( name = name, mtree = mtree.content(), - args = tar_lib.DEFAULT_ARGS, + args = tar_lib.DEFAULT_ARGS + tar_lib.DEFAULT_COMPRESSION_ARGS["gzip"], compress = "gzip", **kwargs ) diff --git a/distroless/private/locale.sh b/distroless/private/locale.sh index 40c9bdd..9d0130f 100755 --- a/distroless/private/locale.sh +++ b/distroless/private/locale.sh @@ -10,8 +10,8 @@ shift 4 # TODO: there must be a better way to manipulate tars! # "$bsdtar" -cf $out --posix --no-same-owner --options="" $@ "@$package_path" # "$bsdtar" -cf to.mtree $@ --format=mtree --options '!gname,!uname,!sha1,!nlink' "@$package_path" -# "$bsdtar" --older "0" -Uf $out @to.mtree +# "$bsdtar" --older "0" -Uf $out @to.mtree tmp=$(mktemp -d) "$bsdtar" -xf "$package_path" $@ -C "$tmp" -"$bsdtar" -cf - $@ --format=mtree --options '!gname,!uname,!sha1,!nlink,!time' "@$package_path" | sed 's/$/ time=0.0/' | "$bsdtar" --gzip -cf "$out" -C "$tmp/" @- \ No newline at end of file +"$bsdtar" -cf - $@ --format=mtree --options '!gname,!uname,!sha1,!nlink,!time' "@$package_path" | sed 's/$/ time=0.0/' | "$bsdtar" --gzip --options 'gzip:!timestamp' -cf "$out" -C "$tmp/" @- \ No newline at end of file diff --git a/distroless/private/os_release.bzl b/distroless/private/os_release.bzl index fa6701c..fed92ef 100644 --- a/distroless/private/os_release.bzl +++ b/distroless/private/os_release.bzl @@ -54,7 +54,7 @@ def os_release( name = name, srcs = [":%s_content" % name], mtree = mtree.content(), - args = tar_lib.DEFAULT_ARGS, + args = tar_lib.DEFAULT_ARGS + tar_lib.DEFAULT_COMPRESSION_ARGS["gzip"], compress = "gzip", **common_kwargs ) diff --git a/distroless/private/passwd.bzl b/distroless/private/passwd.bzl index e064bc9..c38a866 100644 --- a/distroless/private/passwd.bzl +++ b/distroless/private/passwd.bzl @@ -63,7 +63,7 @@ def passwd(name, entries, mode = "0644", time = "0.0", **kwargs): name = name, srcs = [":%s_content" % name], mtree = mtree.content(), - args = tar_lib.DEFAULT_ARGS, + args = tar_lib.DEFAULT_ARGS + tar_lib.DEFAULT_COMPRESSION_ARGS["gzip"], compress = "gzip", **common_kwargs ) diff --git a/distroless/private/tar.bzl b/distroless/private/tar.bzl index be94b64..cc3fb13 100644 --- a/distroless/private/tar.bzl +++ b/distroless/private/tar.bzl @@ -11,10 +11,15 @@ DEFAULT_ARGS = [ # TODO: distroless uses gnu archives "--format", "gnutar", - # Gzip timestamps are source of non-hermeticity. disable them - "--options=gzip:!timestamp", ] +DEFAULT_COMPRESSION_ARGS = { + "gzip": [ + # Gzip timestamps are source of non-hermeticity. disable them + "--options=gzip:!timestamp", + ], +} + def _mtree_line(dest, type, content = None, uid = DEFAULT_UID, gid = DEFAULT_GID, time = DEFAULT_TIME, mode = DEFAULT_MODE): # mtree expects paths to start with ./ so normalize paths that starts with # `/` or relative path (without / and ./) @@ -56,7 +61,8 @@ def _build_tar(ctx, mtree, output, inputs = [], compression = "gzip", mnemonic = args = ctx.actions.args() args.add_all(DEFAULT_ARGS) args.add("--create") - args.add(compression, format = "--%s") + tar.common.add_compression_args(compression, args) + _add_default_compression_args(compression, args) args.add("--file", output) args.add(mtree, format = "@%s") @@ -100,9 +106,14 @@ def _create_mtree(ctx = None): content = lambda: content.to_list() + ["#end"], ) +def _add_default_compression_args(compression, args): + args.add_all(DEFAULT_COMPRESSION_ARGS.get(compression, [])) + tar_lib = struct( TOOLCHAIN_TYPE = tar.toolchain_type, DEFAULT_ARGS = DEFAULT_ARGS, + DEFAULT_COMPRESSION_ARGS = DEFAULT_COMPRESSION_ARGS, + add_default_compression_args = _add_default_compression_args, create_mtree = _create_mtree, common = tar.common, )