Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core terminal update #121

Merged
merged 14 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/test_file_reader.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_file_writer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ jobs:
python -m pip install --upgrade pip
pip install -r requirements.txt
python -m pip install opencv-python
pip install pyyaml
pip install toml
pip install .
pip install graphviz
sudo apt-get install graphviz
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/test_sqlite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pyyaml
pip install toml
pip install .
- name: Test reader
run: |
Expand Down
134 changes: 85 additions & 49 deletions dsi/backends/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# Holds table name and data properties

class DataType:
name = "TABLENAME" # Note: using the word DEFAULT outputs a syntax error
name = "" # Note: using the word DEFAULT outputs a syntax error
properties = {}
units = {}

Expand All @@ -33,7 +33,7 @@ class Artifact:
An Artifact is a generic construct that defines the schema for metadata that
defines the tables inside of SQL
"""
name = "TABLENAME"
name = ""
properties = {}


Expand Down Expand Up @@ -73,7 +73,7 @@ def check_type(self, text):
# Note 1: 'add column types' to be implemented.
# Note 2: TABLENAME is the default name for all tables created which might cause issues when creating multiple Sqlite files.

def put_artifact_type(self, types, isVerbose=False):
def put_artifact_type(self, types, foreign_query = None, isVerbose=False):
"""
Primary class for defining metadata Artifact schema.

Expand All @@ -82,10 +82,17 @@ def put_artifact_type(self, types, isVerbose=False):

`return`: none
"""

col_names = ', '.join(types.properties.keys())

str_query = "CREATE TABLE IF NOT EXISTS {} ({});".format(str(types.name), col_names)
key_names = types.properties.keys()
if "_units" in types.name:
key_names = [item + " UNIQUE" for item in types.properties.keys()]

col_names = ', '.join(key_names)

str_query = "CREATE TABLE IF NOT EXISTS {} ({}".format(str(types.name), col_names)

if foreign_query != None:
str_query += foreign_query
str_query += ");"

if isVerbose:
print(str_query)
Expand Down Expand Up @@ -124,42 +131,62 @@ def put_artifacts(self, collection, isVerbose=False):
"""
# Core compatibility name assignment
artifacts = collection

types = DataType()
types.properties = {}

# Check if this has been defined from helper function
if self.types != None:
types.name = self.types.name

for key in artifacts:
types.properties[key.replace('-','_minus_')] = artifacts[key]

self.put_artifact_type(types)

col_names = ', '.join(types.properties.keys())
placeholders = ', '.join('?' * len(types.properties))

str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
for tableName, tableData in artifacts.items():
if "dsi_relations" in tableName:
continue

types = DataType()
types.properties = {}

# Check if this has been defined from helper function
'''if self.types != None:
types.name = self.types.name'''
types.name = tableName

foreign_query = ""
for key in tableData:
comboTuple = (tableName, key)
dsi_name = tableName[:tableName.find("__")] + "__dsi_relations"
if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["primary_key"]:
key += " PRIMARY KEY"
if dsi_name in artifacts.keys() and comboTuple in artifacts[dsi_name]["foreign_key"]:
foreignIndex = artifacts[dsi_name]["foreign_key"].index(comboTuple)
foreign_query += f", FOREIGN KEY ({key}) REFERENCES {artifacts[dsi_name]['primary_key'][foreignIndex][0]} ({artifacts[dsi_name]['primary_key'][foreignIndex][1]})"

types.properties[key.replace('-','_minus_')] = tableData[key]

if foreign_query != "":
self.put_artifact_type(types, foreign_query)
else:
self.put_artifact_type(types)

# col_list helps access the specific keys of the dictionary in the for loop
col_list = col_names.split(', ')
col_names = ', '.join(types.properties.keys())
placeholders = ', '.join('?' * len(types.properties))

# loop through the contents of each column and insert into table as a row
for ind1 in range(len(types.properties[col_list[0]])):
vals = []
for ind2 in range(len(types.properties.keys())):
vals.append(str(types.properties[col_list[ind2]][ind1]))
# Make sure this works if types.properties[][] is already a string
tup_vals = tuple(vals)
self.cur.execute(str_query,tup_vals)
if "_units" in tableName:
str_query = "INSERT OR IGNORE INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)
else:
str_query = "INSERT INTO {} ({}) VALUES ({});".format(str(types.name), col_names, placeholders)

if isVerbose:
print(str_query)
# col_list helps access the specific keys of the dictionary in the for loop
col_list = col_names.split(', ')

self.con.commit()

self.types = types
# loop through the contents of each column and insert into table as a row
for ind1 in range(len(types.properties[col_list[0]])):
vals = []
for ind2 in range(len(types.properties.keys())):
vals.append(str(types.properties[col_list[ind2]][ind1]))
# Make sure this works if types.properties[][] is already a string
tup_vals = tuple(vals)
self.cur.execute(str_query,tup_vals)

if isVerbose:
print(str_query)

self.con.commit()

self.types = types #This will only copy the last collection from artifacts (collections input)

def put_artifacts_only(self, artifacts, isVerbose=False):
"""
Expand Down Expand Up @@ -337,13 +364,13 @@ def put_artifacts_csv(self, fname, tname, isVerbose=False):
#[END NOTE 2]

# Returns text list from query
def get_artifact_list(self, isVerbose=False):
def get_artifact_list(self, query, isVerbose=False):
"""
Function that returns a list of all of the Artifact names (represented as sql tables)

`return`: list of Artifact names
"""
str_query = "SELECT name FROM sqlite_master WHERE type='table';"
str_query = query
if isVerbose:
print(str_query)

Expand All @@ -357,8 +384,8 @@ def get_artifact_list(self, isVerbose=False):
return resout

# Returns reference from query
def get_artifacts(self, query):
self.get_artifacts_list()
def get_artifacts(self, query, isVerbose=False):
self.get_artifact_list(query, isVerbose)

# Closes connection to server
def close(self):
Expand Down Expand Up @@ -577,6 +604,7 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""

sql_statements = []
if isinstance(filenames, str):
filenames = [filenames]

Expand All @@ -600,9 +628,15 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
createStmt += f"{key} {data_types[type(val)]}, "
insertUnitStmt+= "NULL, "

sql_file.write(createStmt[:-2] + ");\n\n")
sql_file.write(createUnitStmt[:-2] + ");\n\n")
sql_file.write(insertUnitStmt[:-2] + ");\n\n")
if createStmt not in sql_statements:
sql_statements.append(createStmt)
sql_file.write(createStmt[:-2] + ");\n\n")
if createUnitStmt not in sql_statements:
sql_statements.append(createUnitStmt)
sql_file.write(createUnitStmt[:-2] + ");\n\n")
if insertUnitStmt not in sql_statements:
sql_statements.append(insertUnitStmt)
sql_file.write(insertUnitStmt[:-2] + ");\n\n")

insertStmt = f"INSERT INTO {tableName} VALUES( "
for val in table['columns'].values():
Expand All @@ -613,12 +647,14 @@ def yamlToSqlite(self, filenames, db_name, deleteSql=True):
else:
insertStmt+= f"{val}, "

sql_file.write(insertStmt[:-2] + ");\n\n")
if insertStmt not in sql_statements:
sql_statements.append(insertStmt)
sql_file.write(insertStmt[:-2] + ");\n\n")

subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

if deleteSql == True:
os.remove(db_name+".sql")
if deleteSql == True:
os.remove(db_name+".sql")

def tomlDataToList(self, filenames):
"""
Expand Down
45 changes: 14 additions & 31 deletions dsi/backends/tests/test_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_wildfire_data_csv_artifact():
assert True

def test_wildfiredata_artifact_put():
valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
valid_middleware_datastructure = OrderedDict({"wildfire": OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})})
dbpath = 'test_wildfiredata_artifact.sqlite_data'
store = Sqlite(dbpath)
store.put_artifacts(valid_middleware_datastructure)
Expand All @@ -44,7 +44,7 @@ def test_wildfiredata_artifact_put_t():
valid_middleware_datastructure = OrderedDict({'foo':[1,2,3],'bar':[3,2,1]})
dbpath = 'test_wildfiredata_artifact.sqlite_data'
store = Sqlite(dbpath)
store.put_artifacts_t(valid_middleware_datastructure, tableName="Wildfire")
store.put_artifacts_t(OrderedDict([("wildfire", valid_middleware_datastructure)]), tableName="Wildfire")
store.close()
# No error implies success
assert True
Expand All @@ -69,32 +69,15 @@ def test_yosemite_data_csv_artifact():
assert True


def test_artifact_query():
dbpath = "wildfire.db"
store = Sqlite(dbpath)
_ = store.get_artifact_list(isVerbose=isVerbose)
data_type = DataType()
data_type.name = "simulation"
result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
store.export_csv(result, "TABLENAME", "query.csv")
store.close()
# No error implies success
assert True


def test_yaml_reader():
reader = Sqlite("yaml-test.db")
reader.yamlToSqlite(["examples/data/schema.yml", "examples/data/schema2.yml"], "yaml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")

assert file_size == 0 #difference between sql files should be 0 characters

def test_toml_reader():
reader = Sqlite("toml-test.db")
reader.tomlToSqlite(["examples/data/schema.toml", "examples/data/schema2.toml"], "toml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")

assert file_size == 0 #difference between sql files should be 0 characters
# def test_artifact_query():
# dbpath = "wildfire.db"
# store = Sqlite(dbpath)
# _ = store.get_artifact_list(isVerbose=isVerbose)
# data_type = DataType()
# data_type.name = "simulation"
# result = store.sqlquery("SELECT *, MAX(wind_speed) AS max_windspeed FROM " +
# str(data_type.name) + " GROUP BY safe_unsafe_fire_behavior")
# store.export_csv(result, "TABLENAME", "query.csv")
# store.close()
# # No error implies success
# assert True
18 changes: 12 additions & 6 deletions dsi/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ class Terminal():
BACKEND_PREFIX = ['dsi.backends']
BACKEND_IMPLEMENTATIONS = ['gufi', 'sqlite', 'parquet']
PLUGIN_PREFIX = ['dsi.plugins']
PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader']
VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv']
PLUGIN_IMPLEMENTATIONS = ['env', 'file_reader', 'file_writer']
VALID_PLUGINS = ['Hostname', 'SystemKernel', 'GitInfo', 'Bueno', 'Csv', 'ER_Diagram', 'YAML', 'TOML', "Table_Plot"]
VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS
VALID_MODULE_FUNCTIONS = {'plugin': [
Expand Down Expand Up @@ -166,20 +166,25 @@ def transload(self, **kwargs):
data sources to a single DSI Core Middleware data structure.
"""
selected_function_modules = dict(
(k, self.active_modules[k]) for k in ('writer', 'reader'))
(k, self.active_modules[k]) for k in ('reader', 'writer'))
# Note this transload supports plugin.env Environment types now.
for module_type, objs in selected_function_modules.items():
for obj in objs:
obj.add_rows(**kwargs)
for col_name, col_metadata in obj.output_collector.items():
self.active_metadata[col_name] = col_metadata
if module_type == "reader":
obj.add_rows(**kwargs)
for table_name, table_metadata in obj.output_collector.items():
self.active_metadata[table_name] = table_metadata
elif module_type == "writer":
obj.get_rows(self.active_metadata, **kwargs)

# Plugins may add one or more rows (vector vs matrix data).
# You may have two or more plugins with different numbers of rows.
# Consequently, transload operations may have unstructured shape for
# some plugin configurations. We must force structure to create a valid
# middleware data structure.
# To resolve, we pad the shorter columns to match the max length column.
#COMMENTED OUT TILL LATER
'''
max_len = max([len(col) for col in self.active_metadata.values()])
for colname, coldata in self.active_metadata.items():
if len(coldata) != max_len:
Expand All @@ -188,6 +193,7 @@ def transload(self, **kwargs):

assert all([len(col) == max_len for col in self.active_metadata.values(
)]), "All columns must have the same number of rows"
'''

self.transload_lock = True

Expand Down
Loading