fix remaining references to old Array(Array(ByteType)) instead of Arr…

…ay(BinaryType) (#25) * change BloomFilterAggregator to use Array(BinaryType) instead of Array(Array(ByteType)) * replace Array(Array(ByteType)) UDF call with Array(BinaryType) in TokenizerTest, fix ByteArrayListAsStringListUDF to use WrappedArray<byte[]> to match it
teragrep · Jan 4, 2024 · c384e2b · c384e2b
1 parent f9cf288
commit c384e2b
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 14 deletions.
diff --git a/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java b/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java
@@ -54,25 +54,17 @@
 import java.util.ArrayList;
 import java.util.List;
 
-public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<WrappedArray<Byte>>, List<String>> {
+public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<byte[]>, List<String>> {
 
 
     @Override
-    public List<String> call(WrappedArray<WrappedArray<Byte>> wrappedArrayWrappedArray) throws Exception {
+    public List<String> call(WrappedArray<byte[]> wrappedByteArray) {
         List<String> rv = new ArrayList<>();
 
-        Iterator<WrappedArray<Byte>> listIterator = wrappedArrayWrappedArray.iterator();
+        Iterator<byte[]> listIterator = wrappedByteArray.iterator();
         while (listIterator.hasNext()) {
-            WrappedArray<Byte> boxedBytes = listIterator.next();
-            int dataLength = boxedBytes.length();
-            byte[] unboxedBytes = new byte[dataLength];
-
-            Iterator<Byte> stringIterator = boxedBytes.iterator();
-            for (int i = 0; i < dataLength; i++) {
-                unboxedBytes[i] = stringIterator.next();
-            }
-
-            rv.add(new String(unboxedBytes, StandardCharsets.UTF_8));
+            byte[] bytes = listIterator.next();
+            rv.add(new String(bytes, StandardCharsets.UTF_8));
         }
 
         return rv;

diff --git a/src/test/scala/TokenizerTest.scala b/src/test/scala/TokenizerTest.scala
@@ -91,7 +91,7 @@ class TokenizerTest {
     var rowDataset = rowMemoryStream.toDF
 
     // create Scala udf for tokenizer
-    val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.createArrayType(ByteType, false), false))
+    val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.BinaryType, false))
     // register tokenizer udf
     sparkSession.udf.register("tokenizer_udf", tokenizerUDF)