Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parquet test improvements #175

Merged
merged 10 commits into from
Oct 13, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ Result runIncTest(String name, String operation, String read, String... loadColu
stats = new_table([
double_col("elapsed_nanos", [end_time - begin_time]),
long_col("processed_row_count", [loaded_tbl_size]),
long_col("result_row_count", [result.size]),
long_col("result_row_count", [result.size])
])
""";
return runTest(name + " -Inc", incQuery, operation, read, loadColumns);
Expand Down Expand Up @@ -245,7 +245,8 @@ Result runTest(String name, String query, String operation, String read, String.
var metrics = new Metrics(Timer.now(), "test-runner", "setup", "test");
metrics.set("static_scale_factor", staticFactor);
metrics.set("inc_scale_factor", incFactor);
metrics.set("row count factor", rowCountFactor);
metrics.set("row_count_factor", rowCountFactor);
api.metrics().add(metrics);
}).execute();
api.result().test("deephaven-engine", result.get().elapsedTime(), result.get().loadedRowCount());
return result.get();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package io.deephaven.benchmark.tests.standard.parquet;

import org.junit.jupiter.api.*;
import org.junit.jupiter.api.MethodOrderer.OrderAnnotation;

/**
* Standard tests for writing/reading multi-column data with different codec/compression. To save time, the parquet
* generated by the "write" tests is used by the "read" tests
*/
@TestMethodOrder(OrderAnnotation.class)
class ParquetCodecTest {
final ParquetTestRunner runner = new ParquetTestRunner(this);
final String[] usedColumns = {"str10K", "long10K", "int10K", "short10K", "bigDec10K", "intArr5", "intVec5"};

@BeforeEach
void setup() {
runner.setScaleFactors(5, 1);
}

@Test
@Order(1)
void writeMultiColSnappy() {
runner.runWriteTest("ParquetWrite- Snappy Multi Col -Static", "SNAPPY", usedColumns);
}

@Test
@Order(2)
void readMultiColSnappy() {
runner.runReadTest("ParquetRead- Snappy Multi Col -Static");
}

@Test
@Order(3)
void writeMultiColZstd() {
runner.runWriteTest("ParquetWrite- Zstd Multi Col -Static", "ZSTD", usedColumns);
}

@Test
@Order(4)
void readMultiColZstd() {
runner.runReadTest("ParquetRead- Zstd Multi Col -Static");
}

@Test
@Order(5)
void writeMultiColLzo() {
runner.runWriteTest("ParquetWrite- Lzo Multi Col -Static", "LZO", usedColumns);
}

@Test
@Order(6)
void readMultiColLzo() {
runner.runReadTest("ParquetRead- Lzo Multi Col -Static");
}

@Test
@Order(7)
void writeMultiColLz4Raw() {
runner.runWriteTest("ParquetWrite- Lz4Raw Multi Col -Static", "LZ4_RAW", usedColumns);
}

@Test
@Order(8)
void readMultiColLz4Raw() {
runner.runReadTest("ParquetRead- Lz4Raw Multi Col -Static");
}

@Test
@Order(9)
void writeMultiColGzip() {
runner.runWriteTest("ParquetWrite- Gzip Multi Col -Static", "GZIP", usedColumns);
}

@Test
@Order(10)
void readMultiColGzip() {
runner.runReadTest("ParquetRead- Gzip Multi Col -Static");
}

@Test
@Order(11)
void writeMultiColNone() {
runner.runWriteTest("ParquetWrite- No Codec Multi Col -Static", "NONE", usedColumns);
}

@Test
@Order(12)
void readMultiColNone() {
runner.runReadTest("ParquetRead- No Codec Multi Col -Static");
}

@Test
@Order(13)
void writeMultiColDefaultSnappy() {
runner.useParquetDefaultSettings();
runner.runWriteTest("ParquetWrite- Snappy Multi Col Defaults -Static", "SNAPPY", usedColumns);
}

@Test
@Order(14)
void readMultiColDefaultSnappy() {
runner.useParquetDefaultSettings();
runner.runReadTest("ParquetRead- Snappy Multi Col Defaults -Static");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package io.deephaven.benchmark.tests.standard.parquet;

import org.junit.jupiter.api.*;

/**
* Standard tests for writing single column parquet for different column types.
*/
class ParquetSingleColTest {
final ParquetTestRunner runner = new ParquetTestRunner(this);

@Test
void writeOneStringCol() {
runner.setScaleFactors(5, 15);
runner.runWriteTest("ParquetWrite- 1 String Col -Static", "SNAPPY", "str10K");
}

@Test
void writeOneBigDecimalCol() {
runner.setScaleFactors(5, 6);
runner.runWriteTest("ParquetWrite- 1 Big Decimal Col -Static", "SNAPPY", "bigDec10K");
}

@Test
void writeOneLongCol() {
runner.setScaleFactors(5, 15);
runner.runWriteTest("ParquetWrite- 1 Long Col -Static", "SNAPPY", "long10K");
}

@Test
void writeOneIntCol() {
runner.setScaleFactors(5, 30);
runner.runWriteTest("ParquetWrite- 1 Int Col -Static", "SNAPPY", "int10K");
}

@Test
void writeOneShortCol() {
runner.setScaleFactors(5, 35);
runner.runWriteTest("ParquetWrite- 1 Short Col -Static", "SNAPPY", "short10K");
}

@Test
void writeOneInt1KArrayCol() {
runner.setScaleFactors(0.10, 1);
runner.runWriteTest("ParquetWrite- 1 Array Col of 1K Ints -Static", "SNAPPY", "intArr1K");
}

@Test
void writeOneInt1KVectorCol() {
runner.setScaleFactors(0.10, 1);
runner.runWriteTest("ParquetWrite- 1 Vector Col of 1K Ints -Static", "SNAPPY", "intVec1K");
}

@Test
void writeOneInt5ArrayCol() {
runner.setScaleFactors(2, 4);
runner.runWriteTest("ParquetWrite- 1 Array Col of 5 Ints -Static", "SNAPPY", "intArr5");
}

@Test
void writeOneInt5VectorCol() {
runner.setScaleFactors(2, 4);
runner.runWriteTest("ParquetWrite- 1 Vector Col of 5 Ints -Static", "SNAPPY", "intVec5");
}

@Test
void writeOneObjectArrayCol() {
runner.setScaleFactors(2, 2);
runner.runWriteTest("ParquetWrite- 1 Array Col of 3 Strings and 2 Nulls -Static", "SNAPPY", "objArr5");
}

@Test
void writeOneObjectVectorCol() {
runner.setScaleFactors(2, 1);
runner.runWriteTest("ParquetWrite- 1 Vector Col of 3 String and 2 Nulls -Static", "SNAPPY", "objVec5");
}

}
Loading