apache · ForeverAngry · Apr 29, 2024 · May 3, 2024 · May 4, 2024 · May 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -41,3 +41,4 @@ target/
 *.crc
 demo/jars/*
 demo/notebook/.ipynb_checkpoints/*
+/.history
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "java.compile.nullAnalysis.mode": "automatic"
+}
diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java b/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java
@@ -32,6 +32,8 @@
 import lombok.ToString;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.catalyst.expressions.Literal;
 import org.apache.spark.sql.types.StructField;
@@ -65,12 +67,16 @@
 import org.apache.xtable.model.storage.TableFormat;
 import org.apache.xtable.spi.sync.ConversionTarget;
 
+// to support getting the min read and writer dynamically
+import io.delta.tables.*;
+
 public class DeltaConversionTarget implements ConversionTarget {
-  private static final String MIN_READER_VERSION = String.valueOf(1);
+  // private static final String MIN_READER_VERSION = String.valueOf(1);
   // gets access to generated columns.
-  private static final String MIN_WRITER_VERSION = String.valueOf(4);
+  // private static final String MIN_WRITER_VERSION = String.valueOf(4);
 
   private DeltaLog deltaLog;
+  private DeltaTable deltaTable;
   private DeltaSchemaExtractor schemaExtractor;
   private DeltaPartitionExtractor partitionExtractor;
   private DeltaDataFileUpdatesExtractor dataFileUpdatesExtractor;
@@ -79,6 +85,9 @@ public class DeltaConversionTarget implements ConversionTarget {
   private int logRetentionInHours;
   private TransactionState transactionState;
 
+  private String minReaderVersion;
+  private String minWriterVersion;
+
   public DeltaConversionTarget() {}
 
   public DeltaConversionTarget(PerTableConfig perTableConfig, SparkSession sparkSession) {
@@ -121,6 +130,9 @@ private void _init(
       DeltaPartitionExtractor partitionExtractor,
       DeltaDataFileUpdatesExtractor dataFileUpdatesExtractor) {
     DeltaLog deltaLog = DeltaLog.forTable(sparkSession, tableDataPath);
+    DeltaTable deltaTable = DeltaTable.forPath(sparkSession, tableDataPath);
+    minReaderVersion = String.valueOf(1);
+    minWriterVersion = String.valueOf(4);
     boolean deltaTableExists = deltaLog.tableExists();
     if (!deltaTableExists) {
       deltaLog.ensureLogDirectoryExist();
@@ -129,6 +141,7 @@ private void _init(
     this.partitionExtractor = partitionExtractor;
     this.dataFileUpdatesExtractor = dataFileUpdatesExtractor;
     this.deltaLog = deltaLog;
+    this.deltaTable = deltaTable;
     this.tableName = tableName;
     this.logRetentionInHours = logRetentionInHours;
   }
@@ -268,9 +281,23 @@ private void commitTransaction() {
     }
 
     private Map<String, String> getConfigurationsForDeltaSync() {
+
+      // The output of detail() method has only one row with the following schema.
+      // deltaTable.detail() is added to the constructor for this class, and sets 
+      // a private variable to of deltaTable 
+
+      // limit the results to the attributes needed
+      Dataset<Row> record = deltaTable.detail().select("minWriterVersion", "minReaderVersion");
+
+      // Collect the first row and extract data (in this instance the function only yields a single row)
+      Row row = record.first();
+
+      minWriterVersion = row.getAs("minWriterVersion").toString();
+      minReaderVersion = row.getAs("minReaderVersion").toString();
+
       Map<String, String> configMap = new HashMap<>();
-      configMap.put(DeltaConfigs.MIN_READER_VERSION().key(), MIN_READER_VERSION);
-      configMap.put(DeltaConfigs.MIN_WRITER_VERSION().key(), MIN_WRITER_VERSION);
+      configMap.put(DeltaConfigs.MIN_READER_VERSION().key(), minReaderVersion);
+      configMap.put(DeltaConfigs.MIN_WRITER_VERSION().key(), minWriterVersion);
       configMap.put(TableSyncMetadata.XTABLE_METADATA, metadata.toJson());
       // Sets retention for the Delta Log, does not impact underlying files in the table
       configMap.put(

diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java b/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java
@@ -42,6 +42,7 @@
 import org.apache.xtable.model.schema.InternalField;
 import org.apache.xtable.model.schema.InternalSchema;
 import org.apache.xtable.model.schema.InternalType;
+
 import org.apache.xtable.schema.SchemaUtils;
 
 /**
@@ -60,6 +61,10 @@ public class DeltaSchemaExtractor {
   private static final String DELTA_COLUMN_MAPPING_ID = "delta.columnMapping.id";
   private static final DeltaSchemaExtractor INSTANCE = new DeltaSchemaExtractor();
 
+  // Timestamps in Delta are microsecond precision by default
+  private static final Map<InternalSchema.MetadataKey, Object> DEFAULT_TIMESTAMP_PRECISION_METADATA = Collections.singletonMap(
+    InternalSchema.MetadataKey.TIMESTAMP_PRECISION, InternalSchema.MetadataValue.MICROS);
+
   public static DeltaSchemaExtractor getInstance() {
     return INSTANCE;
   }
@@ -86,7 +91,6 @@ private DataType convertFieldType(InternalField field) {
       case INT:
         return DataTypes.IntegerType;
       case LONG:
-      case TIMESTAMP_NTZ:
         return DataTypes.LongType;
       case BYTES:
       case FIXED:
@@ -99,6 +103,8 @@ private DataType convertFieldType(InternalField field) {
         return DataTypes.DateType;
       case TIMESTAMP:
         return DataTypes.TimestampType;
+      case TIMESTAMP_NTZ:
+        return DataTypes.TimestampNTZType;
       case DOUBLE:
         return DataTypes.DoubleType;
       case DECIMAL:
@@ -183,10 +189,11 @@ private InternalSchema toInternalSchema(
       case "timestamp":
         type = InternalType.TIMESTAMP;
         // Timestamps in Delta are microsecond precision by default
-        metadata =
-            Collections.singletonMap(
-                InternalSchema.MetadataKey.TIMESTAMP_PRECISION,
-                InternalSchema.MetadataValue.MICROS);
+        metadata = DEFAULT_TIMESTAMP_PRECISION_METADATA;
+        break;
+      case "timestamp_ntz":
+        type = InternalType.TIMESTAMP_NTZ;
+        metadata = DEFAULT_TIMESTAMP_PRECISION_METADATA;
         break;
       case "struct":
         StructType structType = (StructType) dataType;

diff --git a/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java b/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java
@@ -57,7 +57,7 @@
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
-
+import org.apache.spark.sql.delta.DeltaConfigs;
 import org.apache.spark.sql.delta.GeneratedColumn;
 
 import scala.collection.JavaConverters;
@@ -406,6 +406,8 @@ private void validateDeltaTable(
         internalDataFiles.size(), count, "Number of files from DeltaScan don't match expectation");
   }
 
+
+
   private InternalSnapshot buildSnapshot(InternalTable table, InternalDataFile... dataFiles) {
     return InternalSnapshot.builder()
         .table(table)
@@ -508,4 +510,5 @@ private static SparkSession buildSparkSession() {
             .set("spark.master", "local[2]");
     return SparkSession.builder().config(sparkConf).getOrCreate();
   }
+
 }