Skip to content

Commit

Permalink
general purpose archive package, start extracting gpkgs
Browse files Browse the repository at this point in the history
  • Loading branch information
jaredallard committed Mar 23, 2024
1 parent af5042d commit ca230db
Show file tree
Hide file tree
Showing 6 changed files with 332 additions and 10 deletions.
8 changes: 7 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ module github.com/jaredallard/binhost

go 1.22

require gotest.tools/v3 v3.5.1
// https://github.com/jaredallard/xz/commit/c20e88619f2c09907bb17bf0b18bbe2627ee570a
replace github.com/jamespfennell/xz => github.com/jaredallard/xz v0.0.0-20240323042956-c20e88619f2c

require (
github.com/jamespfennell/xz v0.1.2
gotest.tools/v3 v3.5.1
)

require github.com/google/go-cmp v0.5.9 // indirect
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jaredallard/xz v0.0.0-20240323042956-c20e88619f2c h1:tKFrBmEbpqNndOwf7ue8rv5ucI8prRlZj+kA4mQLNpw=
github.com/jaredallard/xz v0.0.0-20240323042956-c20e88619f2c/go.mod h1:DhpWvZY1xDkK/6BREFl3c3R/fZh7IBdYq2m7xh4uLl0=
gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU=
gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
94 changes: 94 additions & 0 deletions internal/archive/archive.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright (C) 2024 Jared Allard
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

// Package archive implements a helper for extracting archives without
// needing to configure the extraction process.
package archive

import (
"fmt"
"io"
"path/filepath"
"strings"
)

// Configures extractors supported by this package and values
// initialized by the init function.
var (
extractors = []Extractor{}
extensions = map[string]Extractor{}
)

// init initializes calls all extractors to register their supported
// extensions.
func init() {
for i := range extractors {
for _, ext := range extractors[i].Extensions() {
extensions[ext] = extractors[i]
}
}
}

// ExtractOptions contains the options for extracting an archive.
type ExtractOptions struct {
// Reader is the io.Reader to read the archive from. Either [Reader]
// or [Path] must be provided.
Reader io.Reader

// Extension is the extension of the archive to extract. This
// overrides the extension detection from [Path] if provided. This is
// required if [Reader] is provided.
Extension string

// Path is the path to the archive to extract. Either [Reader] or
// [Path] must be provided.
Path string
}

// Extract extracts an archive to the provided destination.
func Extract(opts ExtractOptions, dest string) error {
if opts.Reader == nil && opts.Path == "" {
return fmt.Errorf("either reader or path must be provided")
}

ext := strings.TrimPrefix(opts.Extension, ".")
if opts.Reader != nil {
if ext == "" {
return fmt.Errorf("extension must be provided when using a reader (set opts.Extension)")
}
} else if opts.Path != "" && ext == "" {
// If not set, default to the extension of the provided path.
ext = filepath.Ext(opts.Path)
}

for eext, extractor := range extensions {
if ext == eext {
return extractor.Extract(opts.Reader, ext, dest)
}
}

return fmt.Errorf("unsupported archive extension: %s", ext)
}

// Extractor is an interface for extracting archives.
type Extractor interface {
// Extract extracts all files from the provided reader to the
// destination.
Extract(r io.Reader, ext, dest string) error

// Extensions should return a list of supported extensions for this
// extractor.
Extensions() []string
}
105 changes: 105 additions & 0 deletions internal/archive/tar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright (C) 2024 Jared Allard
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

package archive

import (
"archive/tar"
"errors"
"fmt"
"io"
"os"
"path/filepath"
)

// _ ensures that TarExtractor implements the Extractor interface.
var _ Extractor = (&TarExtractor{})

// TarExtractor implements the Extractor interface for tar archives.
type TarExtractor struct{}

func (t *TarExtractor) Extensions() []string {
return []string{"tar", "tgz", "tar.gz", "tar.xz", "txz", "tar.bz2"}
}

func (t *TarExtractor) Extract(r io.Reader, ext, dest string) error {
var container io.ReadCloser
switch ext {
case "tar":
container = io.NopCloser(r)
case "tgz", "tar.gz":
var err error
container, err = newGzipReader(r)
if err != nil {
return fmt.Errorf("failed to create gzip reader: %w", err)
}
case "tar.bz2":
container = newBzip2Reader(r)
case "tar.xz", "txz":
container = newXZReader(r)
default:
// This only happens if we're missing a case in the switch statement.
return fmt.Errorf("unsupported tar extension: %s", ext)
}
defer container.Close()

tr := tar.NewReader(container)
for {
h, err := tr.Next()
if err != nil {
if errors.Is(err, io.EOF) {
break
}

return fmt.Errorf("failed to read tar header: %w", err)
}

path := filepath.Join(dest, h.Name)
switch h.Typeflag {
case tar.TypeDir:
if err := os.MkdirAll(path, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
case tar.TypeReg:
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create file: %w", err)
}

if _, err := io.Copy(f, tr); err != nil {
_ = f.Close() //nolint:errcheck // Why: Best effort to close the file.
return fmt.Errorf("failed to copy file contents: %w", err)
}

if err := f.Close(); err != nil {
return fmt.Errorf("failed to close file: %w", err)
}
default:
return fmt.Errorf("unsupported file type in package (%s: %v)", h.Name, h.Typeflag)
}

if err := os.Chmod(path, os.FileMode(h.Mode)); err != nil {
return fmt.Errorf("failed to set file permissions: %w", err)
}

if err := os.Chtimes(path, h.AccessTime, h.ModTime); err != nil {
return fmt.Errorf("failed to set file times: %w", err)
}

// TODO(jaredallard): Symlinks, ownership information, etc...
}

return nil
}
39 changes: 39 additions & 0 deletions internal/archive/tar_decompressors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (C) 2024 Jared Allard
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

package archive

import (
"compress/bzip2"
"compress/gzip"
"io"

"github.com/jamespfennell/xz"
)

// newGzipReader creates a new gzip reader from the provided reader.
func newGzipReader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}

// newXZReader creates a new xz reader from the provided reader.
func newXZReader(r io.Reader) io.ReadCloser {
return xz.NewReader(r)
}

// newBzip2Reader creates a new bzip2 reader from the provided reader.
func newBzip2Reader(r io.Reader) io.ReadCloser {
return io.NopCloser(bzip2.NewReader(r))
}
94 changes: 85 additions & 9 deletions internal/packages/packages.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,98 @@
package packages

import (
"archive/tar"
"fmt"
"io"
"os"
"path/filepath"

"github.com/jaredallard/binhost/internal/archive"
)

// supportedCompressionExtensions is a list of supported compression
// extensions that this package can handle.
var supportedCompressionExtensions = []string{"xz", "gz"}

// Package represents a Gentoo gpkg (xpkg is not supported).
type Package struct {
// TODO(jaredallard): Add fields for a package here.
// Name is the name of the package.

// Fields below are populated from the extracted contents of the gpkg.

// imagePath contains the path on disk to the extracted image archive.
imagePath string
// metadataPath contains the path on disk to the extracted metadata archive.
metadataPath string
}

// New creates a new Package from the provided [io.ReadCloser].
// New creates a new Package from the provided [io.ReadCloser]. The
// provided ReadCloser should be streaming the raw contents of a Gentoo
// package (gpkg).
//
// The package will be stored on disk in a temporary directory due to
// the nature of gpkgs being usually a large tarball.
func New(r io.ReadCloser) (*Package, error) {
var p Package
tmpDir, err := os.MkdirTemp("", "binhost-extract-")
if err != nil {
return nil, fmt.Errorf("failed to create temporary directory: %w", err)
}

// Cleanup the temp directory if we fail.
var keepTempDir bool
defer func() {
if !keepTempDir {
os.RemoveAll(tmpDir)
}
}()

if err := archive.Extract(archive.ExtractOptions{
Reader: r,
Extension: "tar", // gpkg files are tar archives.
}, tmpDir); err != nil {
return nil, fmt.Errorf("failed to extract gpkg: %w", err)
}

p, err := packageFromDir(tmpDir)
if err != nil {
return nil, fmt.Errorf("failed to create package from extracted contents: %w", err)
}

keepTempDir = true
return p, nil
}

// packageFromDir creates a Package from the extracted contents of a
// gpkg tar. The supplied directory's Manifest is used to validate the
// contents of the package.
func packageFromDir(dir string) (*Package, error) {
expectedFiles := []string{"Manifest", "gpkg-1"}
expectedArchives := []string{"image", "metadata"}

for _, name := range expectedFiles {
if _, err := os.Stat(filepath.Join(dir, name)); err != nil {
return nil, fmt.Errorf("package missing required file: %s", name)
}
}

archives := make(map[string]string)
for _, name := range expectedArchives {
for _, ext := range supportedCompressionExtensions {
archiveName := name + ".tar." + ext
if _, err := os.Stat(filepath.Join(dir, archiveName)); err == nil {
archives[name] = archiveName
break
}

t := tar.NewReader(r)
// Extract the archive
if err := archive.Extract(archive.ExtractOptions{
Path: filepath.Join(dir, archiveName)}, filepath.Join(dir, name),
); err != nil {
return nil, fmt.Errorf("failed to extract archive %s: %w", archiveName, err)
}
}
if _, ok := archives[name]; !ok {
return nil, fmt.Errorf("package missing required archive: %s", name)
}
}

// TODO(jaredallard): Once we have internet, we should actually do
// something with the tar reader.
_ = t
return &p, nil
}

0 comments on commit ca230db

Please sign in to comment.