Skip to content

Commit

Permalink
add table-converter utility
Browse files Browse the repository at this point in the history
  • Loading branch information
DenKoren committed Oct 10, 2024
1 parent 4183b82 commit c687752
Show file tree
Hide file tree
Showing 8 changed files with 529 additions and 2 deletions.
9 changes: 7 additions & 2 deletions scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ set -o errexit
set -o nounset
set -o pipefail

: "${BUILD_DIR:="build"}"

script_dir="$(cd "$(dirname "${0}")" && pwd)"
cd "${script_dir}/.."
repo_root="$(pwd)"

: "${BUILD_DIR:="${repo_root}/build"}"

pkg_content_root() {
local _pkg_name="${1}"
Expand All @@ -32,8 +34,9 @@ build_binary() {
printf "## os='%s', arch='%s':\n" "${_os_go}" "${_arch_go}"
env GOOS="${_os_go}" GOARCH="${_arch_go}" \
go build \
-C "$(dirname "./${_go_name}")" \
-o "${_pkg_root}/${_bin_name}${_ext}" \
"./${_go_name}"
"./$(basename "${_go_name}")"
}

additional_file() {
Expand Down Expand Up @@ -102,6 +105,8 @@ add_file() {

rm -rf "${script_dir}/${BUILD_DIR}"

build_binaries "table-converter" "table-converter/table-converter" "table-converter"

build_binaries "runenv-java-stub" "dump-args.go" "bin/java"

build_binaries "runenv-python-stub" "runenv-python-stub.go" "bin/python"
Expand Down
1 change: 1 addition & 0 deletions table-converter/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/table-converter/table-converter
3 changes: 3 additions & 0 deletions table-converter/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/milaboratory/small-binaries/table-converter

go 1.22.3
60 changes: 60 additions & 0 deletions table-converter/internal/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package converter

import "errors"

const (
DefaultSamplesColumnName = "Sample"
DefaultMetricColumnLabel = "Metric"
DefaultValueColumnLabel = "Value"
)

type Config struct {
InputFileName string
InputFileSeparator rune

OutputFileName string
OutputFileSeparator rune

SampleColumnName string

MetricColumnLabel string
ValueColumnLabel string
}

func (c *Config) LoadDefaults() {
if c.InputFileSeparator == 0 {
c.InputFileSeparator, _ = DetectTableSeparator(c.InputFileName)
}

if c.OutputFileSeparator == 0 {
if c.InputFileSeparator != 0 {
c.OutputFileSeparator = c.InputFileSeparator
} else {
c.OutputFileSeparator, _ = DetectTableSeparator(c.OutputFileName)
}
}

if c.MetricColumnLabel == "" {
c.MetricColumnLabel = DefaultMetricColumnLabel
}
if c.ValueColumnLabel == "" {
c.ValueColumnLabel = DefaultValueColumnLabel
}
if c.SampleColumnName == "" {
c.SampleColumnName = DefaultSamplesColumnName
}
}

func (c *Config) Validate() error {
var errs []error
if c.InputFileSeparator == 0 {
_, err := DetectTableSeparator(c.InputFileName)
errs = append(errs, Wrap(err, "[input]"))
}
if c.OutputFileSeparator == 0 {
_, err := DetectTableSeparator(c.OutputFileName)
errs = append(errs, Wrap(err, "[output]"))
}

return errors.Join(errs...)
}
116 changes: 116 additions & 0 deletions table-converter/internal/converter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package converter

import (
"encoding/csv"
"fmt"
"io"
"os"
)

const SampleColumnName = "Sample"

type Converter struct {
config Config
}

func New(conf Config) *Converter {
conf.LoadDefaults()
return &Converter{config: conf}
}

func (c *Converter) parser(input io.Reader) *csv.Reader {
r := csv.NewReader(input)
r.Comma = c.config.InputFileSeparator
return r
}

func (c *Converter) formatter(output io.Writer) *csv.Writer {
w := csv.NewWriter(output)
w.Comma = c.config.OutputFileSeparator
return w
}

func (c *Converter) Convert(input io.Reader, output io.Writer) error {
reader := c.parser(input)
writer := c.formatter(output)
defer writer.Flush()

headers, err := reader.Read()
if err != nil {
return Wrap(err, "[input]: failed to read header")
}

sampleIndex, metricIndices := c.detectColumns(headers)
if sampleIndex == -1 {
return fmt.Errorf("sample name column %q not found in input header", c.config.SampleColumnName)
}

err = writer.Write([]string{SampleColumnName, c.config.MetricColumnLabel, c.config.ValueColumnLabel})
if err != nil {
return Wrap(err, "[output]: failed to write output header")
}

if len(metricIndices) == 0 {
return nil
}

for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return Wrapf(err, "[input]")
}

sampleName := record[sampleIndex]
var metricI = 0
for _, metricIndex := range metricIndices {
metricI++
err = writer.Write([]string{sampleName, headers[metricIndex], record[metricIndex]})
if err != nil {
return Wrapf(err, "[output]: failed to write metric %d of sample %q", metricI, sampleName)
}
}
}

return nil
}

func (c *Converter) detectColumns(headers []string) (sampleIndex int, metricIndices []int) {
sampleIndex = -1
for i, header := range headers {
if header == c.config.SampleColumnName {
sampleIndex = i
} else {
metricIndices = append(metricIndices, i)
}
}
return
}

func (c *Converter) Run() error {
var input io.Reader = os.Stdin
if c.config.InputFileName != "-" {
inputFile, err := os.Open(c.config.InputFileName)
if err != nil {
return Wrapf(err, "[input]: failed to open input file %q", c.config.InputFileName)
}
defer inputFile.Close()

input = inputFile
}

var output io.Writer = os.Stdout
if c.config.OutputFileName != "-" {
outputFile, err := os.Create(c.config.OutputFileName)
if err != nil {
return Wrapf(err, "[output]: failed to open output file %q", c.config.OutputFileName)
}
defer outputFile.Close()

output = outputFile
}

return c.Convert(input, output)
}
Loading

0 comments on commit c687752

Please sign in to comment.