Skip to content

Commit

Permalink
fix remaining references to old Array(Array(ByteType)) instead of Arr…
Browse files Browse the repository at this point in the history
…ay(BinaryType) (#25)

* change BloomFilterAggregator to use Array(BinaryType) instead of Array(Array(ByteType))

* replace Array(Array(ByteType)) UDF call with Array(BinaryType) in TokenizerTest, fix ByteArrayListAsStringListUDF to use WrappedArray<byte[]> to match it
  • Loading branch information
eemhu authored Jan 4, 2024
1 parent f9cf288 commit c384e2b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,17 @@
import java.util.ArrayList;
import java.util.List;

public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<WrappedArray<Byte>>, List<String>> {
public class ByteArrayListAsStringListUDF implements UDF1<WrappedArray<byte[]>, List<String>> {


@Override
public List<String> call(WrappedArray<WrappedArray<Byte>> wrappedArrayWrappedArray) throws Exception {
public List<String> call(WrappedArray<byte[]> wrappedByteArray) {
List<String> rv = new ArrayList<>();

Iterator<WrappedArray<Byte>> listIterator = wrappedArrayWrappedArray.iterator();
Iterator<byte[]> listIterator = wrappedByteArray.iterator();
while (listIterator.hasNext()) {
WrappedArray<Byte> boxedBytes = listIterator.next();
int dataLength = boxedBytes.length();
byte[] unboxedBytes = new byte[dataLength];

Iterator<Byte> stringIterator = boxedBytes.iterator();
for (int i = 0; i < dataLength; i++) {
unboxedBytes[i] = stringIterator.next();
}

rv.add(new String(unboxedBytes, StandardCharsets.UTF_8));
byte[] bytes = listIterator.next();
rv.add(new String(bytes, StandardCharsets.UTF_8));
}

return rv;
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/TokenizerTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class TokenizerTest {
var rowDataset = rowMemoryStream.toDF

// create Scala udf for tokenizer
val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.createArrayType(ByteType, false), false))
val tokenizerUDF = functions.udf(new TokenizerUDF, DataTypes.createArrayType(DataTypes.BinaryType, false))
// register tokenizer udf
sparkSession.udf.register("tokenizer_udf", tokenizerUDF)

Expand Down

0 comments on commit c384e2b

Please sign in to comment.