deephaven · stanbrub · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/.github/scripts/make-release-distro.sh b/.github/scripts/make-release-distro.sh
@@ -8,6 +8,8 @@ set -o nounset
 
 # Create a tar file with the given version using the git project located in the 
 # working directory.
+#
+# ex. .github/scripts/make-release-distro.sh 0.33.4 .github/distro
 
 if [[ $# != 2 ]]; then
     echo "$0: Missing release version or distro source argument"
@@ -35,8 +37,11 @@ cp ${DISTRO_SOURCE}/* ${DISTRO_DEST}
 rm ${DISTRO_DEST}/dependency-pom.xml
 cp target/${ARTIFACT}.jar ${DISTRO_DEST}/libs/
 cp target/${ARTIFACT}-tests.jar ${DISTRO_DEST}/libs/
+cp target/${ARTIFACT}-sources.jar ${DISTRO_DEST}/libs/
+cp target/${ARTIFACT}-javadoc.jar ${DISTRO_DEST}/libs/
 echo "VERSION=${RELEASE_VERSION}" > ${DISTRO_DEST}/.env
 
 cd ${DISTRO_DEST}
 tar cvzf ../${ARTIFACT}.tar * .env
 
+
diff --git a/.github/workflows/publish-benchmarks.yml b/.github/workflows/publish-benchmarks.yml
@@ -103,6 +103,8 @@ jobs:
           deephaven-benchmark-${{env.VERSION}}.tar
           deephaven-benchmark-${{env.VERSION}}-results.tar
           release-notes.md
+          deephaven-benchmark-${{env.VERSION}}-sources.jar
+          deephaven-benchmark-${{env.VERSION}}-javadoc.jar
 
     - name: Publish Github Release
       if: ${{ github.ref_name == 'main' }}

diff --git a/pom.xml b/pom.xml
@@ -105,7 +105,41 @@
 				</executions>
 			</plugin>
 			<plugin>
-				<!-- For spotless to work on Windows, Set git config global 'core.autocrlf' to true -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <version>3.3.1</version>
+        <executions>
+          <execution>
+            <id>attach-sources</id>
+            <phase>install</phase>
+            <goals>
+              <goal>jar-no-fork</goal>
+            </goals>
+          </execution>
+        </executions>
+        </plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-javadoc-plugin</artifactId>
+				<version>3.10.1</version>
+				<configuration>
+					<excludePackageNames>*.connect,*.controller,*.generator,*.jfr,*.metric,*.run,*.util</excludePackageNames>
+					<show>public</show>
+					<nohelp>true</nohelp>
+				</configuration>
+				<executions>
+                <execution>
+                    <id>attach-javadocs</id>
+                    <phase>install</phase>
+                    <goals>
+                        <goal>jar</goal>
+                    </goals>
+                </execution>
+            </executions>
+			</plugin>
+			<plugin>
+				<!-- For spotless to work on Windows, Set git config global
+				'core.autocrlf' to true -->
 				<groupId>com.diffplug.spotless</groupId>
 				<artifactId>spotless-maven-plugin</artifactId>
 				<version>2.43.0</version>

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java
@@ -17,12 +17,12 @@
  * A wrapper for the Bench api that allows running tests for the purpose of comparing Deephaven to other products that
  * perform similar operations. It allows running Deephaven operations or using Deephaven as an agent to run command line
  * python tests in the same environment (e.g. Docker).
- * <p/>
+ * <p>
  * One of two initializers must be called to set up which type of tests is desired; {@code initDeephaven()} or
  * {@code initPython()}. Deephaven tests run queries inside of Deephaven like the standard benchmarks. Python tests use
  * Deephaven as an agent to run python scripts from the command line by first installing required pip modules in a
  * python virtual environment and then running each test from there.
- * <p/>
+ * <p>
  * Note: This runner requires test ordering, so it follows that tests in a single test class are meant to be run as a
  * group. This violates the standard Benchmark convention that every test be able to be run by itself. This is done for
  * practical purposes, though it is not ideal.

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/agg/AverageByTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/agg/AverageByTest.java
@@ -10,9 +10,9 @@
  * Product comparison tests for the average by group operation. Tests read the same parquet data. To avoid an unfair
  * advantage where some products may partition or group data during the read, parquet read time is included in the
  * benchmark results.
- * <p/>
+ * <p>
  * Each test calculates two new average columns and groups by a string and an integer.
- * <p/>
+ * <p>
  * Data generation only happens in the first tests, the Deephaven test. Tests can be run individually, but only after
  * the desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/distinct/DistinctTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/distinct/DistinctTest.java
@@ -10,9 +10,9 @@
  * Product comparison tests for the distinct (or select distinct) group operation. Tests read the same parquet data. To
  * avoid an unfair advantage where some products may partition or group data during the read, parquet read time is
  * included in the benchmark results.
- * <p/>
+ * <p>
  * Each test produces a table result that contains rows unique according to a string and an integer.
- * <p/>
+ * <p>
  * Data generation only happens in the first tests, the Deephaven test. Tests can be run individually, but only after
  * the desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/filter/FilterTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/filter/FilterTest.java
@@ -10,10 +10,10 @@
  * Product comparison tests for filter (where) operations. Tests read the same parquet data. To avoid an unfair
  * advantage where some products may partition or group data during the read, parquet read time is included in the
  * benchmark results.
- * <p/>
+ * <p>
  * Each test produces a table result filtered by three criteria; value is an exact string, value > an integer, value <
  * an integer.
- * <p/>
+ * <p>
  * Data generation only happens in the first tests, the Deephaven test. Tests can be run individually, but only after
  * the desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/iterate/RowIteratorTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/iterate/RowIteratorTest.java
@@ -9,11 +9,11 @@
  * Product comparison tests for iterating and summing table columns. Tests read the same parquet data. To avoid an
  * unfair advantage where some products may partition or group data during the read, parquet read time is included in
  * the benchmark results.
- * <p/>
+ * <p>
  * Each test produces a table result containing one row with one column that is the total of the result of the sum of
  * two columns for each row. ex. sum((r1c1 + r1c2)..(rNc1 + rNc2)). This is achieved without creating an extra column to
  * hold the column sums.
- * <p/>
+ * <p>
  * Data generation only happens in the first test, the Deephaven test. Tests can be run individually, but only after the
  * desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/join/InnerJoinTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/join/InnerJoinTest.java
@@ -10,9 +10,9 @@
  * Product comparison tests for inner join operations. Tests read the same parquet data. To avoid an unfair advantage
  * where some products may partition or group data during the read, parquet read time is included in the benchmark
  * results.
- * <p/>
+ * <p>
  * Each test produces a table that is the result of two tables intersected by a string and an integer.
- * <p/>
+ * <p>
  * Data generation only happens in the first tests, the Deephaven test. Tests can be run individually, but only after
  * the desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/sort/SortTest.java b/src/it/java/io/deephaven/benchmark/tests/compare/sort/SortTest.java
@@ -9,9 +9,9 @@
 /**
  * Product comparison tests for sort operations. Tests read the same parquet data. To avoid an unfair advantage where
  * some products may partition or group data during the read, parquet read time is included in the benchmark results.
- * <p/>
+ * <p>
  * Each test sorts a table by a string and an integer.
- * <p/>
+ * <p>
  * Data generation only happens in the first tests, the Deephaven test. Tests can be run individually, but only after
  * the desired data has been generated.
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/experimental/ExperimentalTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/experimental/ExperimentalTestRunner.java
@@ -14,7 +14,7 @@
  * boilerplate logic like imports, parquet reads, time measurement logic, etc. Each <code>test</code> runs two tests;
  * one reading from a static parquet, and the other exercising ticking tables through the
  * <code>AutotuningIncrementalReleaseFilter</code>.
- * <p/>
+ * <p>
  * Note: This class is for running tests in the <code>experimental</code> package. It will change as new experiments are
  * added and may require external setup (i.e. parquet files) to work.
  */
@@ -112,7 +112,7 @@ public void table(String name, long rowCount) {
      * Run the benchmark test according to the operation and the columns loaded from the source table. The name will
      * show in the benchmark result output. The expected row count, since tests can scale, is an upper bound what result
      * row count is expected.
-     * <p/>
+     * <p>
      * This method assembles and runs two queries according to the settings provided previously: static and incremental
      * release. Both runs are expected to produce the same resulting row count.
      * 

diff --git a/src/it/java/io/deephaven/benchmark/tests/experimental/mergescale/ScaleTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/experimental/mergescale/ScaleTestRunner.java
@@ -12,7 +12,7 @@
  * This tests a sort operation based on generated rows of data. The expected row count is achieved by fully generating
  * the data to a parquet file or partially generating the data and merging that by <code>tableFactor</code> to get row
  * count.
- * <p/>
+ * <p>
  * Note: For best results, use base and row counts that are highly divisible and clear like 1,000,000 so that the
  * <code>tableFactor</code> is a whole number.
  */

diff --git a/.../io/deephaven/benchmark/tests/internal/examples/stream/JoinTablesFromKafkaStreamTest.java b/.../io/deephaven/benchmark/tests/internal/examples/stream/JoinTablesFromKafkaStreamTest.java
@@ -44,7 +44,7 @@ public void setup() {
      * </ol>
      * This test is identical to {@link #joinTwoTablesFromKafkaStream_Shorthand} except without the use of
      * <code>bench_api_</code> functions for Kafka consumers and table waiting.
-     * <p/>
+     * <p>
      * Properties (e.g. ${kafka.consumer.addr}) are automatically filled in during query execution.
      */
     @Test
@@ -116,7 +116,7 @@ with exclusive_lock(table):
      * </ol>
      * This test is identical to {@link #joinTwoTablesFromKafkaStream_Longhand} except without the use of
      * <code>bench_api_</code> functions for Kafka consumers and table waiting.
-     * <p/>
+     * <p>
      * Properties (e.g. ${kafka.consumer.addr}) are automatically filled in during query execution.
      */
     @Test

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java
@@ -51,7 +51,7 @@ public Bench api() {
      * Generate the given pre-defined tables according to the default data distribution defined by the
      * <code>default.data.distribution</code> property. The first table name provided will be the main
      * <code>source</code> table.
-     * <p/>
+     * <p>
      * This method should only be called once per test.
      * 
      * @param names the table names
@@ -81,7 +81,7 @@ public void table(String name, String distribution) {
     /**
      * Generate a pre-defined table and set a column grouping for the resulting table. The given table name will be used
      * as the main table used by subsequent queries.
-     * <p/>
+     * <p>
      * 
      * @param name the table name to generate
      * @param groups
@@ -163,7 +163,7 @@ public void test(String name, String operation, String... loadColumns) {
      * <li>If static test duration <code>&lt; scale.elapsed.time.target</code>, scale row count and do it again</li>
      * <li>Run test with auto increment release filter according to the previously determined row count</li>
      * <li>Assert that both static and incremental result tables have the same number of rows</li>
-     * <p/>
+     * <p>
      * 
      * @param name the name of the test as it will show in the result file
      * @param expectedRowCount the max row count expected from the operation regardless of scale, or zero if the count

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/by/UngroupTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/by/UngroupTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the groupBy table operation. Ungroups column content. It is the inverse of groupBy.
  * Ungroup unwraps columns containing Deephaven arrays or vectors.
- * <p/>
+ * <p>
  * Note: These tests do group then ungroup, since the data generator does not support arrays
  */
 public class UngroupTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/formula/UserFormulaTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/formula/UserFormulaTest.java
@@ -6,10 +6,10 @@
 
 /**
  * Standard tests for running user-defined functions. These tests are meant to be compared, and so use the same data.
- * <p/>
+ * <p>
  * Note: When scaling row count, vector size should not get bigger. That would cause more than one axis change and
  * invalidate any expected comparisons.
- * <p/>
+ * <p>
  * Note: The "No Hints" tests have casts to make them equivalent to the hints tests, otherwise the return value would
  * always be a PyObject and not really the same test. They use two formulas to achieve this, otherwise vectorization
  * would not happen on "No Hints" benchmarks.

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMaxTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMaxTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a tick-based exponential moving maximum for specified
  * columns and places the result into a new column for each row.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_ticks</code> that are comparable to
  * <code>EmMaxTimeTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMaxTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMaxTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a time-based exponential moving maximum for specified
  * columns and places the result into a new column for each row.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_time</code> that are comparable to <code>EmMaxTickTest</code>
  */
 public class EmMaxTimeTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMinTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMinTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a tick-based exponential moving minimum for specified
  * columns and places the result into a new column for each row. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_time</code> that are comparable to <code>EmMinTickTest</code>
  */
 public class EmMinTickTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMinTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmMinTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a time-based exponential moving minimum for specified
  * columns and places the result into a new column for each row.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_ticks</code> that are comparable to
  * <code>EmMinTimeTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmStdTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmStdTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a tick-based exponential moving standard deviation for
  * specified columns and places the result into a new column for each row. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_ticks</code> that are comparable to
  * <code>EmStdTimeTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmStdTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmStdTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a time-based exponential moving standard deviation for
  * specified columns and places the result into a new column for each row. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_time</code> that are comparable to <code>EmStdTickTest</code>
  */
 public class EmStdTimeTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmaTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmaTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a tick-based exponential moving average for specified
  * columns and places the result into a new column for each row.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_ticks</code> that are comparable to <code>EmaTimeTest</code>
  */
 public class EmaTickTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmaTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmaTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a time-based exponential moving average for specified
  * columns and places the result into a new column for each row.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_time</code> that are comparable to <code>EmaTickTest</code>
  */
 public class EmaTimeTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmsTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmsTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a tick-based exponential moving sum for specified columns
  * and places the result into a new column for each row. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_ticks</code> that are comparable to <code>EmsTimeTest</code>
  */
 public class EmsTickTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmsTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/EmsTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Calculates a time-based exponential moving average for specified
  * columns and places the result into a new column for each row. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>decay_time</code> that are comparable to <code>EmsTickTest</code>
  */
 public class EmsTimeTest {

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingAvgTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingAvgTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Defines a tick-based rolling average. The result table contains
  * additional columns with windowed rolling averages for each specified column in the source table. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>rev_ticks/fwd_ticks</code> that are comparable to
  * <code>RollingAvgTimeTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingAvgTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingAvgTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Defines a time-based rolling average. The result table contains
  * additional columns with windowed rolling averages for each specified column in the source table. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>rev_time/fwd_time</code> that are comparable to
  * <code>RollingAvgTickTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingCountTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingCountTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Defines a tick-based rolling count. The result table contains
  * additional columns with windowed rolling count1 for each specified column in the source table.
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>rev_ticks/fwd_ticks</code> that are comparable to
  * <code>RollingCountTimeTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingCountTimeTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingCountTimeTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Defines a time-based rolling count. The result table contains
  * additional columns with windowed rolling counts for each specified column in the source table. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>rev_time/fwd_time</code> that are comparable to
  * <code>RollingCountTickTest</code>
  */

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingGroupTickTest.java b/src/it/java/io/deephaven/benchmark/tests/standard/updateby/RollingGroupTickTest.java
@@ -7,7 +7,7 @@
 /**
  * Standard tests for the updateBy table operation. Defines a tick-based rolling group. The result table contains
  * additional columns with windowed rolling groups for each specified column in the source table. *
- * <p/>
+ * <p>
  * Note: This test must contain benchmarks and <code>rev_ticks/fwd_ticks</code> that are comparable to
  * <code>RollingGroupTimeTest</code>
  */