From c384e2bfa679cb3fa1835eebefe4bcb07014e1a0 Mon Sep 17 00:00:00 2001
From: eemhu <125959687+eemhu@users.noreply.github.com>
Date: Thu, 4 Jan 2024 11:54:32 +0200
Subject: [PATCH] fix remaining references to old Array(Array(ByteType))
 instead of Array(BinaryType) (#25)

* change BloomFilterAggregator to use Array(BinaryType) instead of Array(Array(ByteType))

* replace Array(Array(ByteType)) UDF call with Array(BinaryType) in TokenizerTest, fix ByteArrayListAsStringListUDF to use WrappedArray<byte[]> to match it
---
 .../dpf_03/ByteArrayListAsStringListUDF.java   | 18 +++++-------------
 src/test/scala/TokenizerTest.scala             |  2 +-
 2 files changed, 6 insertions(+), 14 deletions(-)
diff --git a/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java b/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java
index 556fb88..d8f55c8 100644
--- a/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java
+++ b/src/main/scala/com/teragrep/functions/dpf_03/ByteArrayListAsStringListUDF.java
@@ -54,25 +54,17 @@
 import java.util.ArrayList;
 import java.util.List;
 
-public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<WrappedArray<Byte>>, List<String>> {
+public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<byte[]>, List<String>> {
 
 
     @Override
-    public List<String> call(WrappedArray<WrappedArray<Byte>> wrappedArrayWrappedArray) throws Exception {
+    public List<String> call(WrappedArray<byte[]> wrappedByteArray) {
         List<String> rv = new ArrayList<>();
 
-        Iterator<WrappedArray<Byte>> listIterator = wrappedArrayWrappedArray.iterator();
+        Iterator<byte[]> listIterator = wrappedByteArray.iterator();
         while (listIterator.hasNext()) {
-            WrappedArray<Byte> boxedBytes = listIterator.next();
-            int dataLength = boxedBytes.length();
-            byte[] unboxedBytes = new byte[dataLength];
-
-            Iterator<Byte> stringIterator = boxedBytes.iterator();
-            for (int i = 0; i < dataLength; i++) {
-                unboxedBytes[i] = stringIterator.next();
-            }
-
-            rv.add(new String(unboxedBytes, StandardCharsets.UTF_8));
+            byte[] bytes = listIterator.next();
+            rv.add(new String(bytes, StandardCharsets.UTF_8));
         }
 
         return rv;
diff --git a/src/test/scala/TokenizerTest.scala b/src/test/scala/TokenizerTest.scala
index 8efd5c7..11fa922 100644
--- a/src/test/scala/TokenizerTest.scala
+++ b/src/test/scala/TokenizerTest.scala
@@ -91,7 +91,7 @@ class TokenizerTest {
     var rowDataset = rowMemoryStream.toDF
 
     // create Scala udf for tokenizer
-    val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.createArrayType(ByteType, false), false))
+    val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.BinaryType, false))
     // register tokenizer udf
     sparkSession.udf.register("tokenizer_udf", tokenizerUDF)