Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support flat repos [OLD, see #90] #86

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module(
compatibility_level = 1,
)

bazel_dep(name = "bazel_skylib", version = "1.5.0")
bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "aspect_bazel_lib", version = "2.7.9")

bazel_lib_toolchains = use_extension("@aspect_bazel_lib//lib:extensions.bzl", "toolchains")
Expand All @@ -21,7 +21,7 @@ use_repo(bazel_lib_toolchains, "yq_linux_s390x")
use_repo(bazel_lib_toolchains, "yq_windows_amd64")

bazel_dep(name = "gazelle", version = "0.34.0", dev_dependency = True, repo_name = "bazel_gazelle")
bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.5.0", dev_dependency = True)
bazel_dep(name = "bazel_skylib_gazelle_plugin", version = "1.7.1", dev_dependency = True)
bazel_dep(name = "buildifier_prebuilt", version = "6.1.2", dev_dependency = True)
bazel_dep(name = "platforms", version = "0.0.10", dev_dependency = True)
bazel_dep(name = "rules_oci", version = "2.0.0-rc0", dev_dependency = True)
Expand Down
12 changes: 6 additions & 6 deletions MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions WORKSPACE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,28 @@ load("@bullseye//:packages.bzl", "bullseye_packages")

bullseye_packages()

# bazel run @bullseye_rproject//:lock
deb_index(
name = "bullseye_rproject",
lock = "//examples/debian_flat_repo:bullseye_rproject.lock.json",
manifest = "//examples/debian_flat_repo:bullseye_rproject.yaml",
)

load("@bullseye_rproject//:packages.bzl", "bullseye_rproject_packages")

bullseye_rproject_packages()

# bazel run @nvidia_ubuntu2404_cuda//:lock
deb_index(
name = "nvidia_ubuntu2404_cuda",
lock = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.lock.json",
manifest = "//examples/debian_flat_repo:nvidia_ubuntu2404_cuda.yaml",
)

load("@nvidia_ubuntu2404_cuda//:packages.bzl", "nvidia_ubuntu2404_cuda_packages")

nvidia_ubuntu2404_cuda_packages()

# bazel run @apt_security//:lock
deb_index(
name = "apt_security",
Expand Down
5 changes: 4 additions & 1 deletion apt/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ bzl_library(
name = "package_index",
srcs = ["package_index.bzl"],
visibility = ["//apt:__subpackages__"],
deps = [":util"],
deps = [
":util",
"@bazel_skylib//lib:paths",
],
)

bzl_library(
Expand Down
2 changes: 1 addition & 1 deletion apt/private/lockfile.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _add_package(lock, package, arch):
"key": k,
"name": package["Package"],
"version": package["Version"],
"url": "%s/%s" % (package["Root"], package["Filename"]),
"url": package["FileUrl"],
"sha256": package["SHA256"],
"arch": arch,
"dependencies": [],
Expand Down
164 changes: 145 additions & 19 deletions apt/private/package_index.bzl
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"package index"

load("@bazel_skylib//lib:paths.bzl", "paths")
load(":util.bzl", "util")

def _fetch_package_index(rctx, url, dist, comp, arch, integrity):
target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch)
def _fetch_package_index(rctx, url, arch, dist = None, comp = None, directory = None):
# TODO: validate mutually exclusive args (dist, comp) VS directory

# See https://linux.die.net/man/1/xz and https://linux.die.net/man/1/gzip
# --keep -> keep the original file (Bazel might be still committing the output to the cache)
Expand All @@ -16,23 +17,53 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity):

failed_attempts = []

for (ext, cmd) in supported_extensions.items():
output = "{}/Packages.{}".format(target_triple, ext)
dist_url = "{}/dists/{}/{}/binary-{}/Packages.{}".format(url, dist, comp, arch, ext)
for ext, cmd in supported_extensions.items():
index = "Packages"
index_full = "{}.{}".format(index, ext)

if directory == None: # canonical repo
output = "{dist}/{comp}/{arch}/{index}".format(
dist = dist,
comp = comp,
arch = arch,
index = index,
)

index_url = "{url}/dists/{dist}/{comp}/binary-{arch}/{index_full}".format(
url = url,
dist = dist,
comp = comp,
arch = arch,
index_full = index_full,
)
else: # flat repo
output = "{directory}/{arch}/{index}".format(
directory = directory,
arch = arch,
index = index,
)

index_url = "{url}/{directory}/{index_full}".format(
url = url,
directory = directory,
index_full = index_full,
)

output_full = "{}.{}".format(output, ext)

download = rctx.download(
url = dist_url,
output = output,
integrity = integrity,
url = index_url,
output = output_full,
allow_fail = True,
)
decompress_r = None
if download.success:
decompress_r = rctx.execute(cmd + [output])
decompress_r = rctx.execute(cmd + [output_full])
if decompress_r.return_code == 0:
integrity = download.integrity
break

failed_attempts.append((dist_url, download, decompress_r))
failed_attempts.append((index_url, download, decompress_r))

if len(failed_attempts) == len(supported_extensions):
attempt_messages = []
Expand All @@ -51,11 +82,67 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity):
{}
""".format(len(failed_attempts), "\n".join(attempt_messages)))

return ("{}/Packages".format(target_triple), integrity)
return (output, integrity)

def _parse_url(url):
scheme = ""
host = ""
path = "/"

if "://" not in url:
fail("Invalid URL: %s" % url)

scheme, url_ = url.split("://", 1)

if "/" in url_:
host, path_ = url_.split("/", 1)
path += path_
else:
host = url

return struct(scheme = scheme, host = host, path = path)

def _make_file_url(pkg, root_url_, directory = None):
root_url = _parse_url(root_url_)

filename = pkg["Filename"]

invalid_filename = not paths.is_normalized(
filename,
look_for_same_level_references = True,
)

if invalid_filename:
# NOTE:
# Although the Debian repo spec for 'Filename' (see
# https://wiki.debian.org/DebianRepository/Format#Filename) clearly
# says that 'Filename' should be relative to the base directory of the
# repo and should be in canonical form (i.e. without '.' or '..') there
# are cases where this is not honored.
#
# In those cases we try to work around this by assuming 'Filename' is
# relative to the sources.list directory/ so we combine them and
# normalize the new 'Filename' path.
#
# Note that, so far, only the NVIDIA CUDA repos needed this workaround
# so maybe this heuristic will break for other repos that don't conform
# to the Debian repo spec.
filename = paths.normalize(paths.join(directory, filename))

file_url = "{}://{}{}".format(
root_url.scheme,
root_url.host,
paths.join(root_url.path, filename),
)

def _parse_package_index(state, contents, arch, root):
return file_url, invalid_filename

def _parse_package_index(state, contents, arch, root_url, directory = None):
last_key = ""
pkg = {}
total_pkgs = 0
out_of_spec = []

for group in contents.split("\n\n"):
for line in group.split("\n"):
if line.strip() == "":
Expand All @@ -82,10 +169,20 @@ def _parse_package_index(state, contents, arch, root):
pkg[key] = value

if len(pkg.keys()) != 0:
pkg["Root"] = root
util.set_dict(state.packages, value = pkg, keys = (arch, pkg["Package"], pkg["Version"]))
pkg["FileUrl"], invalid_filename = _make_file_url(pkg, root_url, directory)

if invalid_filename:
out_of_spec.append(pkg["Package"])

# NOTE: this fixes the arch for multi-arch flat repos
arch_ = arch if pkg["Architecture"] == "all" else pkg["Architecture"]

util.set_dict(state.packages, value = pkg, keys = (arch_, pkg["Package"], pkg["Version"]))
last_key = ""
pkg = {}
total_pkgs += 1

return out_of_spec, total_pkgs

def _package_versions(state, name, arch):
if name not in state.packages[arch]:
Expand All @@ -105,20 +202,49 @@ def _create(rctx, sources, archs):
)

for arch in archs:
for (url, dist, comp) in sources:
for source in sources:
if len(source) == 2: # flat repo
url, directory = source
index = directory
dist, comp = None, None
else:
url, dist, comp = source
index = "%s/%s" % (dist, comp)
directory = None

# We assume that `url` does not contain a trailing forward slash when passing to
# functions below. If one is present, remove it. Some HTTP servers do not handle
# redirects properly when a path contains "//"
# (ie. https://mymirror.com/ubuntu//dists/noble/stable/... may return a 404
# on misconfigured HTTP servers)
url = url.rstrip("/")

rctx.report_progress("Fetching package index: {}/{} for {}".format(dist, comp, arch))
(output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "")
rctx.report_progress("Fetching %s package index: %s" % (arch, index))
output, _ = _fetch_package_index(
rctx,
url,
arch,
dist = dist,
comp = comp,
directory = directory,
)

rctx.report_progress("Parsing %s package index: %s" % (arch, index))

# TODO: this is expensive to perform.
rctx.report_progress("Parsing package index: {}/{} for {}".format(dist, comp, arch))
_parse_package_index(state, rctx.read(output), arch, url)
out_of_spec, total_pkgs = _parse_package_index(
state,
rctx.read(output),
arch,
url,
directory,
)

if out_of_spec:
count = len(out_of_spec)
pct = int(100.0 * count / total_pkgs)
msg = "Warning: {} index {} has {} packages ({}%) with invalid 'Filename' fields"
print(msg.format(arch, index, count, pct))

return struct(
package_versions = lambda **kwargs: _package_versions(state, **kwargs),
Expand Down
28 changes: 21 additions & 7 deletions apt/private/resolve.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,27 @@ def internal_resolve(rctx, yq_toolchain_prefix, manifest, include_transitive):
sources = []

for src in manifest["sources"]:
distr, components = src["channel"].split(" ", 1)
for comp in components.split(" "):
sources.append((
src["url"],
distr,
comp,
))
channel_chunks = src["channel"].split(" ")

if len(channel_chunks) == 1:
# it's a flat repo, see:
# https://wiki.debian.org/DebianRepository/Format#Flat_Repository_Format
# vs the "canonical" repo:
# https://wiki.debian.org/DebianRepository/Format#Overview
directory = channel_chunks[0]

if not directory.endswith("/"):
fail("Debian flat repo directory must end in '/'")

sources.append((src["url"], directory.rstrip("/")))
else:
distr, components = channel_chunks[0], channel_chunks[1:]

if distr.endswith("/"):
fail("Debian distribution ends in '/' but this is not a flat repo")

for comp in components:
sources.append((src["url"], distr, comp))

pkgindex = package_index.new(rctx, sources = sources, archs = manifest["archs"])
pkgresolution = package_resolution.new(index = pkgindex)
Expand Down
Loading
Loading