lanl · jpulidojr · Oct 29, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 18, 2024
diff --git a/.github/workflows/test_file_reader.yml b/.github/workflows/test_file_reader.yml
@@ -27,8 +27,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
-        pip install pyyaml
-        pip install toml
         pip install .  
     - name: Test reader
       run: |

diff --git a/.github/workflows/test_file_writer.yml b/.github/workflows/test_file_writer.yml
@@ -26,8 +26,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
-        pip install pyyaml
-        pip install toml
         pip install .  
     - name: Test reader
       run: |

diff --git a/.github/workflows/test_plugin.yml b/.github/workflows/test_plugin.yml
@@ -27,8 +27,6 @@ jobs:
         python -m pip install --upgrade pip
         pip install -r requirements.txt
         python -m pip install opencv-python
-        pip install pyyaml
-        pip install toml
         pip install .
         pip install graphviz
         sudo apt-get install graphviz

diff --git a/.github/workflows/test_sqlite.yml b/.github/workflows/test_sqlite.yml
@@ -26,8 +26,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install -r requirements.txt
-        pip install pyyaml
-        pip install toml
         pip install .
     - name: Test reader
       run: |

diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py
@@ -20,7 +20,7 @@
 # Holds table name and data properties
 
 class DataType:
-    name = "TABLENAME" # Note: using the word DEFAULT outputs a syntax error
+    name = "" # Note: using the word DEFAULT outputs a syntax error
     properties = {}
     units = {}
 
@@ -33,7 +33,7 @@ class Artifact:
         An Artifact is a generic construct that defines the schema for metadata that
         defines the tables inside of SQL
     """
-    name = "TABLENAME"
+    name = ""
     properties = {}
 
 
@@ -73,7 +73,7 @@ def check_type(self, text):
     # Note 1: 'add column types' to be implemented.
     # Note 2: TABLENAME is the default name for all tables created which might cause issues when creating multiple Sqlite files.
 
-    def put_artifact_type(self, types, isVerbose=False):
+    def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
         """
         Primary class for defining metadata Artifact schema.
 
@@ -82,10 +82,17 @@ def put_artifact_type(self, types, isVerbose=False):
 
         `return`: none
         """
-
-        col_names = ', '.join(types.properties.keys())
-
-        str_query = "CREATE TABLE IF NOT EXISTS {} ({});".format(str(types.name), col_names)
+        key_names = types.properties.keys()
+        if "_units" in types.name:
+            key_names = [item + " UNIQUE" for item in types.properties.keys()]
+
+        col_names = ', '.join(key_names)
+
+        str_query = "CREATE TABLE IF NOT EXISTS {} ({}".format(str(types.name), col_names)
+
+        if foreign_query != None:
+            str_query += foreign_query
+        str_query += ");"
 
         if isVerbose:
             print(str_query)
@@ -124,42 +131,62 @@ def put_artifacts(self, collection, isVerbose=False):
         """
         # Core compatibility name assignment
         artifacts = collection
-
-        types = DataType()
-        types.properties = {}
-
-        # Check if this has been defined from helper function
-        if self.types != None:
-            types.name = self.types.name
-
-        for key in artifacts:
-            types.properties[key.replace('-','_minus_')] = artifacts[key]
-
-        self.put_artifact_type(types)
-
-        col_names = ', '.join(types.properties.keys())
-        placeholders = ', '.join('?' * len(types.properties))
 
-        str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
+        for tableName, tableData in artifacts.items():
+            if "dsi_relations" in tableName:
+                continue
+
+            types = DataType()
+            types.properties = {}
+
+            # Check if this has been defined from helper function
+            '''if self.types != None:
+                types.name = self.types.name'''
+            types.name = tableName
+
+            foreign_query = ""
+            for key in tableData:
+                comboTuple = (tableName, key)
+                dsi_name = tableName[:tableName.find("__")] + "__dsi_relations"
+                if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["primary_key"]:
+                    key += " PRIMARY KEY"
+                if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["foreign_key"]:
+                    foreignIndex = artifacts[dsi_name]["foreign_key"].index(comboTuple)
+                    foreign_query += f", FOREIGN KEY ({key}) REFERENCES {artifacts[dsi_name]['primary_key'][foreignIndex][0]} ({artifacts[dsi_name]['primary_key'][foreignIndex][1]})"
+
+                types.properties[key.replace('-','_minus_')] = tableData[key]
+
+            if foreign_query != "":
+                self.put_artifact_type(types, foreign_query)
+            else:
+                self.put_artifact_type(types)
 
-        # col_list helps access the specific keys of the dictionary in the for loop
-        col_list = col_names.split(', ')
+            col_names = ', '.join(types.properties.keys())
+            placeholders = ', '.join('?' * len(types.properties))
 
-        # loop through the contents of each column and insert into table as a row
-        for ind1 in range(len(types.properties[col_list[0]])):
-            vals = []
-            for ind2 in range(len(types.properties.keys())):
-                vals.append(str(types.properties[col_list[ind2]][ind1]))
-            # Make sure this works if types.properties[][] is already a string
-            tup_vals = tuple(vals)
-            self.cur.execute(str_query,tup_vals)
+            if "_units" in tableName:
+                str_query = "INSERT OR IGNORE INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
+            else:
+                str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
 
-        if isVerbose:
-            print(str_query)
+            # col_list helps access the specific keys of the dictionary in the for loop
+            col_list = col_names.split(', ')
 
-        self.con.commit()
-
-        self.types = types
+            # loop through the contents of each column and insert into table as a row
+            for ind1 in range(len(types.properties[col_list[0]])):
+                vals = []
+                for ind2 in range(len(types.properties.keys())):
+                    vals.append(str(types.properties[col_list[ind2]][ind1]))
+                # Make sure this works if types.properties[][] is already a string
+                tup_vals = tuple(vals)
+                self.cur.execute(str_query,tup_vals)
+
+            if isVerbose:
+                print(str_query)
+
+            self.con.commit()
+
+            self.types = types #This will only copy the last collection from artifacts (collections input)
 
     def put_artifacts_only(self, artifacts, isVerbose=False):
         """
@@ -337,13 +364,13 @@ def put_artifacts_csv(self, fname, tname, isVerbose=False):
       #[END NOTE 2]
 
     # Returns text list from query
-    def get_artifact_list(self, isVerbose=False):
+    def get_artifact_list(self, query, isVerbose=False):
         """
         Function that returns a list of all of the Artifact names (represented as sql tables)
 
         `return`: list of Artifact names
         """
-        str_query = "SELECT name FROM sqlite_master WHERE type='table';"
+        str_query = query
         if isVerbose:
             print(str_query)
 
@@ -357,8 +384,8 @@ def get_artifact_list(self, isVerbose=False):
         return resout
 
     # Returns reference from query
-    def get_artifacts(self, query):
-        self.get_artifacts_list()
+    def get_artifacts(self, query, isVerbose=False):
+        self.get_artifact_list(query, isVerbose)
 
     # Closes connection to server
     def close(self):
@@ -577,6 +604,7 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
         `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
         """
 
+        sql_statements = []
         if isinstance(filenames, str):
             filenames = [filenames]
 
@@ -600,9 +628,15 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
                             createStmt += f"{key} {data_types[type(val)]}, "
                             insertUnitStmt+= "NULL, "
 
-                    sql_file.write(createStmt[:-2] + ");\n\n")
-                    sql_file.write(createUnitStmt[:-2] + ");\n\n")
-                    sql_file.write(insertUnitStmt[:-2] + ");\n\n")
+                    if createStmt not in sql_statements:
+                        sql_statements.append(createStmt)
+                        sql_file.write(createStmt[:-2] + ");\n\n")
+                    if createUnitStmt not in sql_statements:
+                        sql_statements.append(createUnitStmt)
+                        sql_file.write(createUnitStmt[:-2] + ");\n\n")
+                    if insertUnitStmt not in sql_statements:
+                        sql_statements.append(insertUnitStmt)
+                        sql_file.write(insertUnitStmt[:-2] + ");\n\n")
 
                 insertStmt = f"INSERT INTO {tableName} VALUES( "
                 for val in table['columns'].values():
@@ -613,12 +647,14 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
                     else:
                         insertStmt+= f"{val}, "
 
-                sql_file.write(insertStmt[:-2] + ");\n\n")
+                if insertStmt not in sql_statements:
+                    sql_statements.append(insertStmt)
+                    sql_file.write(insertStmt[:-2] + ");\n\n")
 
-            subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
+        subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
 
-            if deleteSql == True:
-                os.remove(db_name+".sql")
+        if deleteSql == True:
+            os.remove(db_name+".sql")
 
     def tomlDataToList(self, filenames):
         """

diff --git a/dsi/backends/tests/test_sqlite.py b/dsi/backends/tests/test_sqlite.py
@@ -32,7 +32,7 @@ def test_wildfire_data_csv_artifact():
     assert True
 
 def test_wildfiredata_artifact_put():
-   valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
+   valid_middleware_datastructure = OrderedDict({"wildfire": OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})})
    dbpath = 'test_wildfiredata_artifact.sqlite_data'
    store = Sqlite(dbpath)
    store.put_artifacts(valid_middleware_datastructure)
@@ -44,7 +44,7 @@ def test_wildfiredata_artifact_put_t():
    valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
    dbpath = 'test_wildfiredata_artifact.sqlite_data'
    store = Sqlite(dbpath)
-   store.put_artifacts_t(valid_middleware_datastructure, tableName="Wildfire")
+   store.put_artifacts_t(OrderedDict([("wildfire", valid_middleware_datastructure)]), tableName="Wildfire")
    store.close()
    # No error implies success
    assert True
@@ -69,32 +69,15 @@ def test_yosemite_data_csv_artifact():
     assert True
 
 
-def test_artifact_query():
-    dbpath = "wildfire.db"
-    store = Sqlite(dbpath)
-    _ = store.get_artifact_list(isVerbose=isVerbose)
-    data_type = DataType()
-    data_type.name = "simulation"
-    result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
-                            str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
-    store.export_csv(result, "TABLENAME", "query.csv")
-    store.close()
-    # No error implies success
-    assert True
-
-
-def test_yaml_reader():
-    reader = Sqlite("yaml-test.db")
-    reader.yamlToSqlite(["examples/data/schema.yml", "examples/data/schema2.yml"], "yaml-test", deleteSql=False)
-    subprocess.run(["diff", "examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
-    file_size = os.path.getsize("compare_sql.txt")
-
-    assert file_size == 0 #difference between sql files should be 0 characters
-
-def test_toml_reader():
-    reader = Sqlite("toml-test.db")
-    reader.tomlToSqlite(["examples/data/schema.toml", "examples/data/schema2.toml"], "toml-test", deleteSql=False)
-    subprocess.run(["diff", "examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
-    file_size = os.path.getsize("compare_sql.txt")
-
-    assert file_size == 0 #difference between sql files should be 0 characters
+# def test_artifact_query():
+#     dbpath = "wildfire.db"
+#     store = Sqlite(dbpath)
+#     _ = store.get_artifact_list(isVerbose=isVerbose)
+#     data_type = DataType()
+#     data_type.name = "simulation"
+#     result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
+#                             str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
+#     store.export_csv(result, "TABLENAME", "query.csv")
+#     store.close()
+#     # No error implies success
+#     assert True
diff --git a/dsi/core.py b/dsi/core.py
@@ -21,8 +21,8 @@ class Terminal():
     BACKEND_PREFIX = ['dsi.backends']
     BACKEND_IMPLEMENTATIONS = ['gufi', 'sqlite', 'parquet']
     PLUGIN_PREFIX = ['dsi.plugins']
-    PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader']
-    VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv']
+    PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader', 'file_writer']
+    VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv', 'ER_Diagram', 'YAML', 'TOML', "Table_Plot"]
     VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
     VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS
     VALID_MODULE_FUNCTIONS = {'plugin': [
@@ -166,20 +166,25 @@ def transload(self, **kwargs):
         data sources to a single DSI Core Middleware data structure.
         """
         selected_function_modules = dict(
-            (k, self.active_modules[k]) for k in ('writer', 'reader'))
+            (k, self.active_modules[k]) for k in ('reader', 'writer'))
         # Note this transload supports plugin.env Environment types now.
         for module_type, objs in selected_function_modules.items():
             for obj in objs:
-                obj.add_rows(**kwargs)
-                for col_name, col_metadata in obj.output_collector.items():
-                    self.active_metadata[col_name] = col_metadata
+                if module_type == "reader":
+                    obj.add_rows(**kwargs)
+                    for table_name, table_metadata in obj.output_collector.items():
+                        self.active_metadata[table_name] = table_metadata
+                elif module_type == "writer":
+                    obj.get_rows(self.active_metadata, **kwargs)
 
         # Plugins may add one or more rows (vector vs matrix data).
         # You may have two or more plugins with different numbers of rows.
         # Consequently, transload operations may have unstructured shape for
         # some plugin configurations. We must force structure to create a valid
         # middleware data structure.
         # To resolve, we pad the shorter columns to match the max length column.
+        #COMMENTED OUT TILL LATER
+        '''
         max_len = max([len(col) for col in self.active_metadata.values()])
         for colname, coldata in self.active_metadata.items():
             if len(coldata) != max_len:
@@ -188,6 +193,7 @@ def transload(self, **kwargs):
 
         assert all([len(col) == max_len for col in self.active_metadata.values(
         )]), "All columns must have the same number of rows"
+        '''
 
         self.transload_lock = True