Skip to content

Commit

Permalink
Tag Iterate for worst score offenders (#331)
Browse files Browse the repository at this point in the history
  • Loading branch information
stanbrub authored Jul 31, 2024
1 parent a4f6938 commit 17ae7de
Show file tree
Hide file tree
Showing 18 changed files with 64 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ void medianBy0Groups() {
}

@Test
@Tag("Iterate")
void medianBy1Group() {
runner.setScaleFactors(12, 11);
var q = "source.median_by(by=['key1'])";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ void readOneStringCol() {

@Test
@Order(5)
@Tag("Iterate")
void writeOneBigDecimalCol() {
runner.setScaleFactors(5, 5);
runner.runParquetWriteTest("ParquetWrite- 1 Big Decimal Col -Static", "NONE", "bigDec10K");
}

@Test
@Order(6)
@Tag("Iterate")
void readOneBigDecimalCol() {
runner.setScaleFactors(5, 5);
runner.runParquetReadTest("ParquetRead- 1 Big Decimal Col -Static");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ void setup() {

@Test
@Order(1)
@Tag("Iterate")
void writeMultiColSnappy() {
runner.runParquetWriteTest("ParquetWrite- Snappy Multi Col -Static", "SNAPPY", usedColumns);
}
Expand All @@ -31,6 +32,7 @@ void readMultiColSnappy() {

@Test
@Order(3)
@Tag("Iterate")
void writeMultiColZstd() {
runner.runParquetWriteTest("ParquetWrite- Zstd Multi Col -Static", "ZSTD", usedColumns);
}
Expand All @@ -43,6 +45,7 @@ void readMultiColZstd() {

@Test
@Order(5)
@Tag("Iterate")
void writeMultiColLzo() {
runner.runParquetWriteTest("ParquetWrite- Lzo Multi Col -Static", "LZO", usedColumns);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,34 +22,39 @@ void setup(int rowFactor, int staticFactor, int incFactor) {
}

@Test
@Tag("Iterate")
void select1Calc1ColFormula() {
setup(6, 22, 8);
var q = "source.select(['num2',${calcs}]).sum_by()".replace("${calcs}", calc1col1);
runner.test("Select-Sum- 1 Calc Using 1 Col", 1, q, "num2");
}

@Test
@Tag("Iterate")
void select1Calc2ColsFormula() {
setup(6, 14, 7);
var q = "source.select(['num1','num2',${calcs}]).sum_by()".replace("${calcs}", calc1cols2);
runner.test("Select-Sum- 1 Calc Using 2 Cols", 1, q, "num1", "num2");
}

@Test
@Tag("Iterate")
void select2Calcs2ColsFormula() {
setup(6, 12, 6);
var q = "source.select(['num1','num2',${calcs}]).sum_by()".replace("${calcs}", calc2cols2);
runner.test("Select-Sum- 2 Calcs Using 2 Cols", 1, q, "num1", "num2");
}

@Test
@Tag("Iterate")
void update1Calc1ColsFormula() {
setup(6, 32, 20);
var q = "source.update([${calcs}]).sum_by()".replace("${calcs}", calc1col1);
runner.test("Update-Sum- 1 Calc Using 1 Col", 1, q, "num2");
}

@Test
@Tag("Iterate")
void update1Calc2ColsFormula() {
setup(6, 22, 16);
var q = "source.update([${calcs}]).sum_by()".replace("${calcs}", calc1cols2);
Expand Down Expand Up @@ -86,20 +91,23 @@ void view2Calcs2ColsFormula() {
}

@Test
@Tag("Iterate")
void updateView1Calc1ColFormula() {
setup(6, 37, 35);
var q = "source.update_view([${calcs}]).sum_by()".replace("${calcs}", calc1col1);
runner.test("UpdateView-Sum- 1 Calc Using 1 Col", 1, q, "num2");
}

@Test
@Tag("Iterate")
void updateView1Calc2ColsFormula() {
setup(6, 22, 20);
var q = "source.update_view([${calcs}]).sum_by()".replace("${calcs}", calc1cols2);
runner.test("UpdateView-Sum- 1 Calc Using 2 Cols", 1, q, "num1", "num2");
}

@Test
@Tag("Iterate")
void updateView2Calcs2ColsFormula() {
setup(6, 17, 17);
var q = "source.update_view([${calcs}]).sum_by()".replace("${calcs}", calc2cols2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public class EmMaxTickTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emMaxTick0Group1Col() {
setup.factors(5, 14, 12);
setup.emTick0Groups("emmax_tick");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class EmMaxTimeTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emMaxTime0Group1Col() {
setup.factors(5, 10, 8);
setup.emTime0Groups("emmax_time");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class EmMinTickTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emMinTick0Group1Col() {
setup.factors(6, 14, 11);
setup.emTick0Groups("emmin_tick");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public class EmMinTimeTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emMinTime0Group1Col() {
setup.factors(5, 10, 8);
setup.emTime0Groups("emmin_time");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public class EmStdTickTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emStdTick0Group1Col() {
setup.factors(6, 18, 10);
setup.emTick0Groups("emstd_tick");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class EmStdTimeTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emStdTime0Group1Col() {
setup.factors(5, 12, 10);
setup.emTime0Groups("emstd_time");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class EmsTickTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emsTick0Group1Col() {
setup.factors(6, 18, 14);
setup.emTick0Groups("ems_tick");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class EmsTimeTest {
final Setup setup = new Setup(runner);

@Test
@Tag("Iterate")
void emsTime0Group1Col() {
setup.factors(5, 11, 8);
setup.emTime0Groups("ems_time");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Standard tests for the whereIn table operation. Filters rows of data from the source table where the rows match
* column values in the filter table.
*/
@Tag("Iterate")
public class WhereInTest {
final StandardTestRunner runner = new StandardTestRunner(this);

Expand All @@ -28,7 +29,6 @@ void setup() {
}

@Test
@Tag("Iterate")
void whereIn1Filter() {
runner.setScaleFactors(135, 100);
var q = "source.where_in(where_filter, cols=['key1 = set1'])";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Standard tests for the whereNotIn table operation. Filters rows of data from the source table where the rows match
* column values in the filter table. Note: These benchmarks do the converse of the ones in WhereInTest
*/
@Tag("Iterate")
public class WhereNotInTest {
final StandardTestRunner runner = new StandardTestRunner(this);

Expand All @@ -28,7 +29,6 @@ void setup() {
}

@Test
@Tag("Iterate")
void whereNotIn1Filter() {
runner.setScaleFactors(80, 75);
var q = "source.where_not_in(where_filter, cols=['key1 = set1'])";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* Standard tests for the whereOneOf table operation. Filters rows of data from the source table where the rows match
* column values in the filter table.
*/
@Tag("Iterate")
public class WhereOneOfTest {
final StandardTestRunner runner = new StandardTestRunner(this);

Expand All @@ -18,7 +19,6 @@ void setup() {
}

@Test
@Tag("Iterate")
void whereOneOf1Filter() {
runner.setScaleFactors(365, 300);
var q = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
/**
* Standard tests for the where table operation. Filters rows of data from the source table.
*/
@Tag("Iterate")
public class WhereTest {
final StandardTestRunner runner = new StandardTestRunner(this);

Expand All @@ -17,7 +18,6 @@ void setup() {
}

@Test
@Tag("Iterate")
void where1Filter() {
runner.setScaleFactors(330, 310);
var q = """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def normalize_name(name):
# Get the latest GCloud run_ids for the benchmark category up to max_runs
def get_remote_children(parent_uri, category, max_runs=10):
run_ids = []
search_uri = parent_uri + '?delimiter=/&prefix=' + category + '/' + '&max-keys=100000'
search_uri = parent_uri + '?delimiter=/&prefix=' + category + '/' + '&max-keys=10000'
with urlopen(search_uri) as r:
text = r.read().decode()
for run_id in re.findall('<Prefix>{}/([^/><]+)/</Prefix>'.format(category), text, re.MULTILINE):
Expand All @@ -45,6 +45,7 @@ def get_children(storage_uri, category, max_runs):
def get_run_paths(storage_uri, category, actor_filter, set_filter, max_sets):
set_matcher = re.compile(set_filter)
actor_matcher = re.compile(actor_filter)
run_matcher = re.compile('run-[0-9A-Za-z]+')
benchmark_sets = []
for actor in get_children(storage_uri, category, 1000):
if actor_matcher.match(actor):
Expand All @@ -56,7 +57,8 @@ def get_run_paths(storage_uri, category, actor_filter, set_filter, max_sets):
benchmark_runs = []
for set_path in benchmark_sets:
for run_id in get_children(storage_uri, category + '/' + set_path, 1000):
benchmark_runs.append(set_path + '/' + run_id)
if run_matcher.match(run_id):
benchmark_runs.append(set_path + '/' + run_id)
return benchmark_runs

# Cache an HTTP url into a local directory and return the local path
Expand Down Expand Up @@ -152,6 +154,7 @@ def get_default_set_filter(category):
print('Running:', {'storage_uri':storage_uri,'category':category,'max_sets':max_sets,'history_runs':history_runs,
'actor_filter':actor_filter,'set_filter':set_filter,'platform_props':platform_props,'metric_props':metric_props})


run_ids = get_run_paths(storage_uri, category, actor_filter, set_filter, max_sets)
bench_results = merge_run_tables(storage_uri, run_ids, category, 'benchmark-results.csv', convert_result)
bench_metrics = merge_run_tables(storage_uri, run_ids, category, 'benchmark-metrics.csv', convert_metric)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from deephaven.updateby import rolling_group_tick
from urllib.request import urlopen; import os

score_threshold = 10.0

root = 'file:///nfs' if os.path.exists('/nfs/deephaven-benchmark') else 'https://storage.googleapis.com'
with urlopen(root + '/deephaven-benchmark/benchmark_tables.dh.py') as r:
benchmark_storage_uri_arg = root + '/deephaven-benchmark'
benchmark_max_sets_arg = 100
benchmark_category_arg ='nightly'
exec(r.read().decode(), globals(), locals())

def count_threshold_hits(scores) -> int:
scores = array('d', scores)
hits = 0
for score in scores:
if score <= -score_threshold or score >= score_threshold:
hits = hits + 1
return hits

op_day5 = rolling_group_tick(cols=['op_rates=op_rate','set_op_rates=set_op_rates'], rev_ticks=6, fwd_ticks=-1)
scores = bench_results_sets.sort_descending(['benchmark_name','set_id']) \
.update_by(ops=[op_day5], by="benchmark_name") \
.where(['op_rates.size() > 2']) \
.update(['set_op_rates=(long[])merge_arrays(`long`,set_op_rates)','score=zscore(op_rate, op_rates)',
'var5d=rstd(op_rates)']) \
.group_by(['benchmark_name']) \
.update(['op_rate=avg(op_rate)','var5d=avg(var5d)','score_min=min(score)','score_max=max(score)',
'score_avg=avg(score)','hits=count_threshold_hits(score)']) \
.view(['Benchmark=benchmark_name','Rate=op_rate','Var5d=var5d','ScoreMin=score_min',"ScoreMax=score_max",
'ScoreAvg=score_avg','Hits=hits'
])

0 comments on commit 17ae7de

Please sign in to comment.