Skip to content

Commit

Permalink
Merge pull request #122 from lanl/core_logger
Browse files Browse the repository at this point in the history
Core logger
  • Loading branch information
jpulidojr authored Oct 31, 2024
2 parents 436a49d + 1a48365 commit 3a9857a
Show file tree
Hide file tree
Showing 14 changed files with 271 additions and 227 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_file_reader.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']
python-version: ['3.11']

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_file_writer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']
python-version: ['3.11']

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']
python-version: ['3.11']

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_sqlite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10']
python-version: ['3.11']

steps:
- uses: actions/checkout@v4
Expand Down
299 changes: 149 additions & 150 deletions dsi/backends/sqlite.py

Large diffs are not rendered by default.

68 changes: 58 additions & 10 deletions dsi/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import os
import shutil
from pathlib import Path
import logging
from datetime import datetime

from dsi.backends.filesystem import Filesystem
from dsi.backends.sqlite import Sqlite, DataType, Artifact
Expand All @@ -15,7 +17,7 @@ class Terminal():
An instantiated Terminal is the DSI human/machine interface.
Terminals are a home for Plugins and an interface for Backends. Backends may be
front-ends or back-ends. Plugins may be Writers or readers. See documentation
back-reads or back-writes. Plugins may be Writers or readers. See documentation
for more information.
"""
BACKEND_PREFIX = ['dsi.backends']
Expand All @@ -26,10 +28,10 @@ class Terminal():
VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS
VALID_MODULE_FUNCTIONS = {'plugin': [
'writer', 'reader'], 'backend': ['front-end', 'back-end']}
'writer', 'reader'], 'backend': ['back-read', 'back-write']}
VALID_ARTIFACT_INTERACTION_TYPES = ['get', 'set', 'put', 'inspect']

def __init__(self):
def __init__(self, debug_flag = False):
# Helper function to get parent module names.
def static_munge(prefix, implementations):
return (['.'.join(i) for i in product(prefix, implementations)])
Expand All @@ -55,6 +57,16 @@ def static_munge(prefix, implementations):
self.active_metadata = OrderedDict()
self.transload_lock = False

self.logger = logging.getLogger(self.__class__.__name__)

if debug_flag:
logging.basicConfig(
filename='logger.txt', # Name of the log file
filemode='a', # Append mode ('w' for overwrite)
format='%(asctime)s - %(levelname)s - %(message)s', # Log message format
level=logging.INFO # Minimum log level to capture
)

def list_available_modules(self, mod_type):
"""
List available DSI modules of an arbitrary module type.
Expand Down Expand Up @@ -82,16 +94,25 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
We expect most users will work with module implementations rather than templates, but
but all high level class abstractions are accessible with this method.
"""
self.logger.info(f"-------------------------------------")
self.logger.info(f"Loading {mod_name} {mod_function} {mod_type}")
start = datetime.now()
if self.transload_lock and mod_type == 'plugin':
print('Plugin module loading is prohibited after transload. No action taken.')
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
return
if mod_function not in self.VALID_MODULE_FUNCTIONS[mod_type]:
print(
'Hint: Did you declare your Module Function in the Terminal Global vars?')
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
raise NotImplementedError
if mod_name in [obj.__class__.__name__ for obj in self.active_modules[mod_function]]:
print('{} {} already loaded as {}. Nothing to do.'.format(
mod_name, mod_type, mod_function))
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
return
# DSI Modules are Python classes.
class_name = mod_name
Expand All @@ -109,7 +130,11 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
mod_name, mod_type, mod_function))
else:
print('Hint: Did you declare your Plugin/Backend in the Terminal Global vars?')
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
raise NotImplementedError
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")

def unload_module(self, mod_type, mod_name, mod_function):
"""
Expand Down Expand Up @@ -170,12 +195,20 @@ def transload(self, **kwargs):
# Note this transload supports plugin.env Environment types now.
for module_type, objs in selected_function_modules.items():
for obj in objs:
self.logger.info(f"-------------------------------------")
self.logger.info(obj.__class__.__name__ + f" {module_type}")
if module_type == "reader":
start = datetime.now()
obj.add_rows(**kwargs)
for table_name, table_metadata in obj.output_collector.items():
self.active_metadata[table_name] = table_metadata
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
elif module_type == "writer":
start = datetime.now()
obj.get_rows(self.active_metadata, **kwargs)
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")

# Plugins may add one or more rows (vector vs matrix data).
# You may have two or more plugins with different numbers of rows.
Expand All @@ -197,12 +230,12 @@ def transload(self, **kwargs):

self.transload_lock = True

def artifact_handler(self, interaction_type, **kwargs):
def artifact_handler(self, interaction_type, query = None, **kwargs):
"""
Store or retrieve using all loaded DSI Backends with storage functionality.
A DSI Core Terminal may load zero or more Backends with storage functionality.
Calling artifact_handler will execute all back-end functionality currently loaded, given
Calling artifact_handler will execute all back-write functionality currently loaded, given
the provided ``interaction_type``.
"""
if interaction_type not in self.VALID_ARTIFACT_INTERACTION_TYPES:
Expand All @@ -213,25 +246,40 @@ def artifact_handler(self, interaction_type, **kwargs):
# Perform artifact movement first, because inspect implementation may rely on
# self.active_metadata or some stored artifact.
selected_function_modules = dict(
(k, self.active_modules[k]) for k in (['back-end']))
(k, self.active_modules[k]) for k in (['back-write']))
for module_type, objs in selected_function_modules.items():
for obj in objs:
self.logger.info(f"-------------------------------------")
self.logger.info(obj.__class__.__name__ + f" {module_type} - {interaction_type} the data")
if interaction_type == 'put' or interaction_type == 'set':
start = datetime.now()
obj.put_artifacts(
collection=self.active_metadata, **kwargs)
operation_success = True
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
elif interaction_type == 'get':
self.active_metadata = obj.get_artifacts(**kwargs)
self.logger.info(f"Query to get data: {query}")
start = datetime.now()
if query != None:
self.active_metadata = obj.get_artifacts(query, **kwargs)
else:
raise ValueError("Need to specify a query of the database to return data")
operation_success = True
if interaction_type == 'inspect':
for module_type, objs in selected_function_modules.items():
for obj in objs:
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
elif interaction_type == 'inspect':
start = datetime.now()
obj.put_artifacts(
collection=self.active_metadata, **kwargs)
self.active_metadata = obj.inspect_artifacts(
collection=self.active_metadata, **kwargs)
operation_success = True
end = datetime.now()
self.logger.info(f"Runtime: {end-start}")
if operation_success:
if self.active_metadata:
return self.active_metadata
return
else:
print(
Expand Down
93 changes: 42 additions & 51 deletions dsi/plugins/file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from pandas import DataFrame, read_csv, concat
import re
import yaml
import toml
import ast
try: import tomllib
except ModuleNotFoundError: import pip._vendor.tomli as tomllib

# import ast

from dsi.plugins.metadata import StructuredMetadata

Expand Down Expand Up @@ -284,59 +286,48 @@ def pack_header(self) -> None:
table_info.append((self.target_table_prefix + "__" + table_name, list(self.toml_data[table_name].keys())))
self.set_schema(table_info)

def check_type(self, text):
"""
Tests input text and returns a predicted compatible SQL Type
`text`: text string
`return`: string returned as int, float or still a string
"""
try:
_ = int(text)
return int(text)
except ValueError:
try:
_ = float(text)
return float(text)
except ValueError:
return text

def add_rows(self) -> None:
"""
Parses TOML data and creates an ordered dict whose keys are table names and values are an ordered dict for each table.
"""
for filename in self.toml_files:
with open(filename, 'r+') as temp_file:
editedString = temp_file.read()
if '"{' not in editedString:
editedString = re.sub('{', '"{', editedString)
editedString = re.sub('}', '}"', editedString)
temp_file.seek(0)
temp_file.write(editedString)

with open(filename, 'r') as toml_file:
toml_load_data = toml.load(toml_file)

if not self.schema_is_set():
for tableName, tableData in toml_load_data.items():
self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys())
self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys())
self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])])
self.pack_header()
# with open(filename, 'r+') as temp_file:
# editedString = temp_file.read()
# if '"{' not in editedString:
# editedString = re.sub('{', '"{', editedString)
# editedString = re.sub('}', '}"', editedString)
# temp_file.seek(0)
# temp_file.write(editedString)

toml_load_data = None
with open(filename, 'rb') as toml_file:
toml_load_data = tomllib.load(toml_file)

if not self.schema_is_set():
for tableName, tableData in toml_load_data.items():
row = []
unit_row = []
for col_name, data in tableData.items():
unit_data = "NULL"
if isinstance(data, str) and data[0] == "{" and data[-1] == "}":
data = ast.literal_eval(data)
unit_data = data["units"]
data = data["value"]
self.toml_data[tableName][col_name].append(data)
if len(self.toml_data[tableName + "_units"][col_name]) < 1:
unit_row.append(unit_data)
self.toml_data[tableName + "_units"][col_name].append(unit_data)
row.append(data)
self.add_to_output(row, self.target_table_prefix + "__" + tableName)
if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1:
self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units")
self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys())
self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys())
self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])])
self.pack_header()

for tableName, tableData in toml_load_data.items():
row = []
unit_row = []
for col_name, data in tableData.items():
unit_data = "NULL"
if isinstance(data, dict):
unit_data = data["units"]
data = data["value"]
# IF statement for manual data parsing for python 3.10 and below
# if isinstance(data, str) and data[0] == "{" and data[-1] == "}":
# data = ast.literal_eval(data)
# unit_data = data["units"]
# data = data["value"]
self.toml_data[tableName][col_name].append(data)
if len(self.toml_data[tableName + "_units"][col_name]) < 1:
unit_row.append(unit_data)
self.toml_data[tableName + "_units"][col_name].append(unit_data)
row.append(data)
self.add_to_output(row, self.target_table_prefix + "__" + tableName)
if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1:
self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units")
22 changes: 12 additions & 10 deletions examples/coreterminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

'''This is an example workflow using core.py'''

a=Terminal()
a=Terminal(debug_flag=True)

# a.list_available_modules('plugin')
# ['GitInfo', 'Hostname', 'SystemKernel', 'Bueno', 'Csv']
Expand All @@ -17,30 +17,32 @@
# a.list_available_modules('backend')
# ['Gufi', 'Sqlite', 'Parquet']

#a.load_module('plugin', 'YAML', 'reader', filenames=["data/schema.yml", "data/schema2.yml"], target_table_prefix = "schema")
a.load_module('plugin', 'YAML', 'reader', filenames=["data/student_test1.yml", "data/student_test2.yml"], target_table_prefix = "student")
#a.load_module('plugin', 'YAML', 'reader', filenames=["data/cmf.yml", "data/cmf.yml"], target_table_name = "cmf")

# print(a.active_metadata)
a.load_module('plugin', 'TOML', 'reader', filenames=["data/schema.toml", "data/schema2.toml"], target_table_prefix = "schema")
a.load_module('plugin', 'TOML', 'reader', filenames=["data/results.toml"], target_table_prefix = "results")
# print(a.active_metadata)
a.load_module('backend','Sqlite','back-end', filename='data/data.db')
#a.load_module('backend','Sqlite','back-end', filename='data/data2.db')
# a.load_module('backend','Parquet','back-end',filename='./data/bueno.pq')
a.load_module('backend','Sqlite','back-write', filename='data/data.db')
#a.load_module('backend','Sqlite','back-write', filename='data/data2.db')
# a.load_module('backend','Parquet','back-write',filename='./data/bueno.pq')

a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics")
#a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics")

a.transload()
a.artifact_handler(interaction_type='put')
# a.list_loaded_modules()
# {'writer': [<dsi.plugins.env.Hostname object at 0x7f21232474d0>],
# 'reader': [<dsi.plugins.env.Bueno object at 0x7f2123247410>],
# 'front-end': [],
# 'back-end': []}
# 'back-write': []}


# Example use
# a.load_module('plugin','Bueno','reader',filenames='data/bueno1.data')
# a.load_module('backend','Sqlite','back-end',filename='data/bueno.db')
# a.load_module('backend','Sqlite','back-write',filename='data/bueno.db')
# a.transload()
# a.artifact_handler(interaction_type='put')
# a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';", isVerbose = True)
data = a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';")#, isVerbose = True)
#CAN PRINT THE DATA OUTPUT
# print(data)
5 changes: 5 additions & 0 deletions examples/data/results.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

[people]
avg_height = {'value'= 5.5, 'units'= 'm'} # overall average height
median_speed = {'value'= 6.95, 'units'= 's'} # overall median speed
std_gravity = {'value'= 9.83, 'units'= 'm/s/s'} # overall std dev gravity
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ pydantic>=2.1.1
nbconvert>=7.13.0
gitpython>=3.0.0
matplotlib>=3.6.0
pyyaml>=6.0
toml>=0.10.2
pyyaml>=6.0

0 comments on commit 3a9857a

Please sign in to comment.