From c384e2bfa679cb3fa1835eebefe4bcb07014e1a0 Mon Sep 17 00:00:00 2001 From: eemhu <125959687+eemhu@users.noreply.github.com> Date: Thu, 4 Jan 2024 11:54:32 +0200 Subject: [PATCH] fix remaining references to old Array(Array(ByteType)) instead of Array(BinaryType) (#25) * change BloomFilterAggregator to use Array(BinaryType) instead of Array(Array(ByteType)) * replace Array(Array(ByteType)) UDF call with Array(BinaryType) in TokenizerTest, fix ByteArrayListAsStringListUDF to use WrappedArray to match it --- .../dpf_03/ByteArrayListAsStringListUDF.java | 18 +++++------------- src/test/scala/TokenizerTest.scala | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java b/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java index 556fb88..d8f55c8 100644 --- a/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java +++ b/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java @@ -54,25 +54,17 @@ import java.util.ArrayList; import java.util.List; -public class ByteArrayListAsStringListUDF implements UDF1>, List> { +public class ByteArrayListAsStringListUDF implements UDF1, List> { @Override - public List call(WrappedArray> wrappedArrayWrappedArray) throws Exception { + public List call(WrappedArray wrappedByteArray) { List rv = new ArrayList<>(); - Iterator> listIterator = wrappedArrayWrappedArray.iterator(); + Iterator listIterator = wrappedByteArray.iterator(); while (listIterator.hasNext()) { - WrappedArray boxedBytes = listIterator.next(); - int dataLength = boxedBytes.length(); - byte[] unboxedBytes = new byte[dataLength]; - - Iterator stringIterator = boxedBytes.iterator(); - for (int i = 0; i < dataLength; i++) { - unboxedBytes[i] = stringIterator.next(); - } - - rv.add(new String(unboxedBytes, StandardCharsets.UTF_8)); + byte[] bytes = listIterator.next(); + rv.add(new String(bytes, StandardCharsets.UTF_8)); } return rv; diff --git a/src/test/scala/TokenizerTest.scala b/src/test/scala/TokenizerTest.scala index 8efd5c7..11fa922 100644 --- a/src/test/scala/TokenizerTest.scala +++ b/src/test/scala/TokenizerTest.scala @@ -91,7 +91,7 @@ class TokenizerTest { var rowDataset = rowMemoryStream.toDF // create Scala udf for tokenizer - val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.createArrayType(ByteType, false), false)) + val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.BinaryType, false)) // register tokenizer udf sparkSession.udf.register("tokenizer_udf", tokenizerUDF)