diff --git a/README.md b/README.md index 937fd14..e7961af 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ However, with some Reflection magic it is possible to do without this. This is w Here is an example: ```groovy -@Grab('se.alipsa.groovy:data-utils:1.0.0') +@Grab('se.alipsa.groovy:data-utils:1.0.2') @Grab('org.postgresql:postgresql:42.4.0') import se.alipsa.groovy.datautil.SqlUtil @@ -31,7 +31,7 @@ data-utils is available from maven central Groovy: ```groovy -implementation "se.alipsa.groovy:data-utils:1.0.1" +implementation "se.alipsa.groovy:data-utils:1.0.2" ``` Maven: @@ -39,12 +39,15 @@ Maven: se.alipsa.groovy data-utils - 1.0.1 + 1.0.2 ``` ## Version history +### 1.0.2, 2022-08-17 +- Add TableUtil with support for frequency tables + ### 1.0.1, 2022-07-25 - Upgrade to Groovy 4.0.4 - Build script fixes diff --git a/build.gradle b/build.gradle index b1c457b..388b696 100644 --- a/build.gradle +++ b/build.gradle @@ -13,14 +13,17 @@ repositories { dependencies { def groovyVersion = "[4.0.4, )" + def tablesawVersion = "[0.43.1, )" implementation "org.apache.groovy:groovy:${groovyVersion}" implementation "org.apache.groovy:groovy-sql:${groovyVersion}" + implementation "tech.tablesaw:tablesaw-core:${tablesawVersion}" testImplementation 'org.junit.jupiter:junit-jupiter:5.8.2' testImplementation 'com.h2database:h2:2.1.214' + testImplementation "org.slf4j:slf4j-simple:1.7.36" } group = 'se.alipsa.groovy' -version = '1.0.1' +version = '1.0.2' description = 'Java FX GUI for Groovy based data science analysis and applications' java.sourceCompatibility = JavaVersion.VERSION_17 diff --git a/src/main/groovy/se/alipsa/groovy/datautil/TableUtil.groovy b/src/main/groovy/se/alipsa/groovy/datautil/TableUtil.groovy new file mode 100644 index 0000000..85f6122 --- /dev/null +++ b/src/main/groovy/se/alipsa/groovy/datautil/TableUtil.groovy @@ -0,0 +1,45 @@ +package se.alipsa.groovy.datautil + +import tech.tablesaw.api.ColumnType +import tech.tablesaw.api.Row +import tech.tablesaw.api.Table +import tech.tablesaw.columns.Column + +import java.math.RoundingMode +import java.util.concurrent.atomic.AtomicInteger + +class TableUtil { + + static Table frequency(Column column) { + Map freq = new HashMap<>() + column.forEach(v -> { + freq.computeIfAbsent(v, k -> new AtomicInteger(0)).incrementAndGet() + }); + int size = column.size() + def table = Table.create(column.name()) + def valueCol = ColumnType.STRING.create("Value") + def freqCol = ColumnType.INTEGER.create("Frequency") + def percentCol = ColumnType.DOUBLE.create("Percent") + table.addColumns(valueCol, freqCol, percentCol) + for (Map.Entry entry : freq.entrySet()) { + Row row = table.appendRow() + row.setString("Value", String.valueOf(entry.getKey())) + int numOccurrence = entry.getValue().intValue() + row.setInt("Frequency", numOccurrence) + row.setDouble("Percent", round(numOccurrence * 100.0 / size, 2)) + } + return table.sortDescendingOn("Frequency") + } + + static Table frequency(Table table, String columnName) { + return frequency(table.column(columnName)) + } + + static double round(double value, int numDecimals) { + if (numDecimals < 0) throw new IllegalArgumentException("numDecimals cannot be a negative number: was " + numDecimals) + + BigDecimal bd = BigDecimal.valueOf(value) + bd = bd.setScale(numDecimals, RoundingMode.HALF_UP) + return bd.doubleValue() + } +} diff --git a/src/test/groovy/test/alipsa/groovy/datautil/TableUtilTest.groovy b/src/test/groovy/test/alipsa/groovy/datautil/TableUtilTest.groovy new file mode 100644 index 0000000..ace795b --- /dev/null +++ b/src/test/groovy/test/alipsa/groovy/datautil/TableUtilTest.groovy @@ -0,0 +1,26 @@ +package test.alipsa.groovy.datautil + +import org.junit.jupiter.api.Assertions +import se.alipsa.groovy.datautil.TableUtil; + +import static tech.tablesaw.api.ColumnType.* + +import org.junit.jupiter.api.Test +import tech.tablesaw.api.* +import tech.tablesaw.io.csv.CsvReadOptions + +public class TableUtilTest { + + @Test + void testFrequency() { + def csv = getClass().getResource("/glaciers.csv") + CsvReadOptions.Builder builder = CsvReadOptions.builder(csv) + .separator(',' as Character) + .columnTypes([INTEGER, DOUBLE, INTEGER] as ColumnType[]) + + def glaciers = Table.read().usingOptions(builder.build()) + def freq = TableUtil.frequency(glaciers, "Number of observations") + Assertions.assertEquals(20, freq.size()) + Assertions.assertEquals(31, freq.get(0, 1)) + } +} diff --git a/src/test/resources/glaciers.csv b/src/test/resources/glaciers.csv new file mode 100644 index 0000000..386ec13 --- /dev/null +++ b/src/test/resources/glaciers.csv @@ -0,0 +1,71 @@ +Year,Mean cumulative mass balance,Number of observations +1945,0, +1946,-1.13,1 +1947,-3.19,1 +1948,-3.19,1 +1949,-3.82,3 +1950,-4.887,3 +1951,-5.217,3 +1952,-5.707,3 +1953,-6.341,7 +1954,-6.825,6 +1955,-6.575,7 +1956,-6.814,7 +1957,-6.989,9 +1958,-7.693,9 +1959,-8.325,11 +1960,-8.688,14 +1961,-8.935,15 +1962,-9.109,20 +1963,-9.567,22 +1964,-9.699,22 +1965,-9.298,24 +1966,-9.436,27 +1967,-9.303,29 +1968,-9.219,31 +1969,-9.732,31 +1970,-10.128,32 +1971,-10.288,32 +1972,-10.441,32 +1973,-10.538,32 +1974,-10.613,32 +1975,-10.534,33 +1976,-10.633,35 +1977,-10.682,37 +1978,-10.754,37 +1979,-11.127,37 +1980,-11.318,36 +1981,-11.394,35 +1982,-11.849,36 +1983,-11.846,37 +1984,-11.902,37 +1985,-12.238,37 +1986,-12.782,37 +1987,-12.795,37 +1988,-13.26,37 +1989,-13.343,37 +1990,-13.687,37 +1991,-14.255,37 +1992,-14.501,36 +1993,-14.695,37 +1994,-15.276,37 +1995,-15.486,37 +1996,-15.89,37 +1997,-16.487,37 +1998,-17.31,37 +1999,-17.697,37 +2000,-17.727,37 +2001,-18.032,37 +2002,-18.726,37 +2003,-19.984,37 +2004,-20.703,37 +2005,-21.405,37 +2006,-22.595,37 +2007,-23.255,37 +2008,-23.776,37 +2009,-24.459,37 +2010,-25.158,37 +2011,-26.294,37 +2012,-26.93,36 +2013,-27.817,31 +2014,-28.652,24