Merge pull request #122 from lanl/core_logger

Core logger
lanl · Oct 31, 2024 · 3a9857a · 3a9857a
2 parents 436a49d + 1a48365
commit 3a9857a
Show file tree

Hide file tree

Showing 14 changed files with 271 additions and 227 deletions.
diff --git a/.github/workflows/test_file_reader.yml b/.github/workflows/test_file_reader.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.10']
+        python-version: ['3.11']
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.github/workflows/test_file_writer.yml b/.github/workflows/test_file_writer.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.10']
+        python-version: ['3.11']
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.github/workflows/test_plugin.yml b/.github/workflows/test_plugin.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.10']
+        python-version: ['3.11']
 
     steps:
     - uses: actions/checkout@v4

diff --git a/.github/workflows/test_sqlite.yml b/.github/workflows/test_sqlite.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.10']
+        python-version: ['3.11']
 
     steps:
     - uses: actions/checkout@v4

diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py
diff --git a/dsi/core.py b/dsi/core.py
@@ -5,6 +5,8 @@
 import os
 import shutil
 from pathlib import Path
+import logging
+from datetime import datetime
 
 from dsi.backends.filesystem import Filesystem
 from dsi.backends.sqlite import Sqlite, DataType, Artifact
@@ -15,7 +17,7 @@ class Terminal():
     An instantiated Terminal is the DSI human/machine interface.
 
     Terminals are a home for Plugins and an interface for Backends. Backends may be
-    front-ends or back-ends. Plugins may be Writers or readers. See documentation
+    back-reads or back-writes. Plugins may be Writers or readers. See documentation
     for more information.
     """
     BACKEND_PREFIX = ['dsi.backends']
@@ -26,10 +28,10 @@ class Terminal():
     VALID_BACKENDS = ['Gufi', 'Sqlite', 'Parquet']
     VALID_MODULES = VALID_PLUGINS + VALID_BACKENDS
     VALID_MODULE_FUNCTIONS = {'plugin': [
-        'writer', 'reader'], 'backend': ['front-end', 'back-end']}
+        'writer', 'reader'], 'backend': ['back-read', 'back-write']}
     VALID_ARTIFACT_INTERACTION_TYPES = ['get', 'set', 'put', 'inspect']
 
-    def __init__(self):
+    def __init__(self, debug_flag = False):
         # Helper function to get parent module names.
         def static_munge(prefix, implementations):
             return (['.'.join(i) for i in product(prefix, implementations)])
@@ -55,6 +57,16 @@ def static_munge(prefix, implementations):
         self.active_metadata = OrderedDict()
         self.transload_lock = False
 
+        self.logger = logging.getLogger(self.__class__.__name__)
+
+        if debug_flag:
+            logging.basicConfig(
+                filename='logger.txt',         # Name of the log file
+                filemode='a',               # Append mode ('w' for overwrite)
+                format='%(asctime)s - %(levelname)s - %(message)s',  # Log message format
+                level=logging.INFO          # Minimum log level to capture
+            )
+
     def list_available_modules(self, mod_type):
         """
         List available DSI modules of an arbitrary module type.
@@ -82,16 +94,25 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
         We expect most users will work with module implementations rather than templates, but
         but all high level class abstractions are accessible with this method.
         """
+        self.logger.info(f"-------------------------------------")
+        self.logger.info(f"Loading {mod_name} {mod_function} {mod_type}")
+        start = datetime.now()
         if self.transload_lock and mod_type == 'plugin':
             print('Plugin module loading is prohibited after transload. No action taken.')
+            end = datetime.now()
+            self.logger.info(f"Runtime: {end-start}")
             return
         if mod_function not in self.VALID_MODULE_FUNCTIONS[mod_type]:
             print(
                 'Hint: Did you declare your Module Function in the Terminal Global vars?')
+            end = datetime.now()
+            self.logger.info(f"Runtime: {end-start}")
             raise NotImplementedError
         if mod_name in [obj.__class__.__name__ for obj in self.active_modules[mod_function]]:
             print('{} {} already loaded as {}. Nothing to do.'.format(
                 mod_name, mod_type, mod_function))
+            end = datetime.now()
+            self.logger.info(f"Runtime: {end-start}")
             return
         # DSI Modules are Python classes.
         class_name = mod_name
@@ -109,7 +130,11 @@ def load_module(self, mod_type, mod_name, mod_function, **kwargs):
                 mod_name, mod_type, mod_function))
         else:
             print('Hint: Did you declare your Plugin/Backend in the Terminal Global vars?')
+            end = datetime.now()
+            self.logger.info(f"Runtime: {end-start}")
             raise NotImplementedError
+        end = datetime.now()
+        self.logger.info(f"Runtime: {end-start}")
 
     def unload_module(self, mod_type, mod_name, mod_function):
         """
@@ -170,12 +195,20 @@ def transload(self, **kwargs):
         # Note this transload supports plugin.env Environment types now.
         for module_type, objs in selected_function_modules.items():
             for obj in objs:
+                self.logger.info(f"-------------------------------------")
+                self.logger.info(obj.__class__.__name__ + f" {module_type}")
                 if module_type == "reader":
+                    start = datetime.now()
                     obj.add_rows(**kwargs)
                     for table_name, table_metadata in obj.output_collector.items():
                         self.active_metadata[table_name] = table_metadata
+                    end = datetime.now()
+                    self.logger.info(f"Runtime: {end-start}")
                 elif module_type == "writer":
+                    start = datetime.now()
                     obj.get_rows(self.active_metadata, **kwargs)
+                    end = datetime.now()
+                    self.logger.info(f"Runtime: {end-start}")
 
         # Plugins may add one or more rows (vector vs matrix data).
         # You may have two or more plugins with different numbers of rows.
@@ -197,12 +230,12 @@ def transload(self, **kwargs):
 
         self.transload_lock = True
 
-    def artifact_handler(self, interaction_type, **kwargs):
+    def artifact_handler(self, interaction_type, query = None, **kwargs):
         """
         Store or retrieve using all loaded DSI Backends with storage functionality.
 
         A DSI Core Terminal may load zero or more Backends with storage functionality.
-        Calling artifact_handler will execute all back-end functionality currently loaded, given
+        Calling artifact_handler will execute all back-write functionality currently loaded, given
         the provided ``interaction_type``.
         """
         if interaction_type not in self.VALID_ARTIFACT_INTERACTION_TYPES:
@@ -213,25 +246,40 @@ def artifact_handler(self, interaction_type, **kwargs):
         # Perform artifact movement first, because inspect implementation may rely on
         # self.active_metadata or some stored artifact.
         selected_function_modules = dict(
-            (k, self.active_modules[k]) for k in (['back-end']))
+            (k, self.active_modules[k]) for k in (['back-write']))
         for module_type, objs in selected_function_modules.items():
             for obj in objs:
+                self.logger.info(f"-------------------------------------")
+                self.logger.info(obj.__class__.__name__ + f" {module_type} - {interaction_type} the data")
                 if interaction_type == 'put' or interaction_type == 'set':
+                    start = datetime.now()
                     obj.put_artifacts(
                         collection=self.active_metadata, **kwargs)
                     operation_success = True
+                    end = datetime.now()
+                    self.logger.info(f"Runtime: {end-start}")
                 elif interaction_type == 'get':
-                    self.active_metadata = obj.get_artifacts(**kwargs)
+                    self.logger.info(f"Query to get data: {query}")
+                    start = datetime.now()
+                    if query != None:
+                        self.active_metadata = obj.get_artifacts(query, **kwargs)
+                    else:
+                        raise ValueError("Need to specify a query of the database to return data")
                     operation_success = True
-        if interaction_type == 'inspect':
-            for module_type, objs in selected_function_modules.items():
-                for obj in objs:
+                    end = datetime.now()
+                    self.logger.info(f"Runtime: {end-start}")
+                elif interaction_type == 'inspect':
+                    start = datetime.now()
                     obj.put_artifacts(
                         collection=self.active_metadata, **kwargs)
                     self.active_metadata = obj.inspect_artifacts(
                         collection=self.active_metadata, **kwargs)
                     operation_success = True
+                    end = datetime.now()
+                    self.logger.info(f"Runtime: {end-start}")
         if operation_success:
+            if self.active_metadata:
+                return self.active_metadata
             return
         else:
             print(

diff --git a/dsi/plugins/file_reader.py b/dsi/plugins/file_reader.py
@@ -6,8 +6,10 @@
 from pandas import DataFrame, read_csv, concat
 import re
 import yaml
-import toml
-import ast
+try: import tomllib
+except ModuleNotFoundError: import pip._vendor.tomli as tomllib
+
+# import ast
 
 from dsi.plugins.metadata import StructuredMetadata
 
@@ -284,59 +286,48 @@ def pack_header(self) -> None:
             table_info.append((self.target_table_prefix + "__" + table_name, list(self.toml_data[table_name].keys())))
         self.set_schema(table_info)
 
-    def check_type(self, text):
-        """
-        Tests input text and returns a predicted compatible SQL Type
-        `text`: text string
-        `return`: string returned as int, float or still a string
-        """
-        try:
-            _ = int(text)
-            return int(text)
-        except ValueError:
-            try:
-                _ = float(text)
-                return float(text)
-            except ValueError:
-                return text
-
     def add_rows(self) -> None:
         """
         Parses TOML data and creates an ordered dict whose keys are table names and values are an ordered dict for each table.
         """
         for filename in self.toml_files:
-            with open(filename, 'r+') as temp_file:
-                editedString = temp_file.read()
-                if '"{' not in editedString:
-                    editedString = re.sub('{', '"{', editedString)
-                    editedString = re.sub('}', '}"', editedString)
-                    temp_file.seek(0)
-                    temp_file.write(editedString)
-
-            with open(filename, 'r') as toml_file:
-                toml_load_data = toml.load(toml_file)
-
-                if not self.schema_is_set():
-                    for tableName, tableData in toml_load_data.items():
-                        self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys())
-                        self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys())
-                    self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])])
-                    self.pack_header()
+            # with open(filename, 'r+') as temp_file:
+            #     editedString = temp_file.read()
+            #     if '"{' not in editedString:
+            #         editedString = re.sub('{', '"{', editedString)
+            #         editedString = re.sub('}', '}"', editedString)
+            #         temp_file.seek(0)
+            #         temp_file.write(editedString)
+
+            toml_load_data = None
+            with open(filename, 'rb') as toml_file:
+                toml_load_data = tomllib.load(toml_file)
 
+            if not self.schema_is_set():
                 for tableName, tableData in toml_load_data.items():
-                    row = []
-                    unit_row = []
-                    for col_name, data in tableData.items():
-                        unit_data = "NULL"
-                        if isinstance(data, str) and data[0] == "{" and data[-1] == "}":
-                            data = ast.literal_eval(data)
-                            unit_data = data["units"]
-                            data = data["value"]
-                        self.toml_data[tableName][col_name].append(data)
-                        if len(self.toml_data[tableName + "_units"][col_name]) < 1:
-                            unit_row.append(unit_data)
-                            self.toml_data[tableName + "_units"][col_name].append(unit_data)
-                        row.append(data)
-                    self.add_to_output(row, self.target_table_prefix + "__" + tableName)
-                    if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1:
-                        self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units")
+                    self.toml_data[tableName] = OrderedDict((key, []) for key in tableData.keys())
+                    self.toml_data[tableName + "_units"] = OrderedDict((key, []) for key in tableData.keys())
+                self.toml_data["dsi_relations"] = OrderedDict([('primary_key', []), ('foreign_key', [])])
+                self.pack_header()
+
+            for tableName, tableData in toml_load_data.items():
+                row = []
+                unit_row = []
+                for col_name, data in tableData.items():
+                    unit_data = "NULL"
+                    if isinstance(data, dict):
+                        unit_data = data["units"]
+                        data = data["value"]
+                    # IF statement for manual data parsing for python 3.10 and below
+                    # if isinstance(data, str) and data[0] == "{" and data[-1] == "}":
+                    #     data = ast.literal_eval(data)
+                    #     unit_data = data["units"]
+                    #     data = data["value"]
+                    self.toml_data[tableName][col_name].append(data)
+                    if len(self.toml_data[tableName + "_units"][col_name]) < 1:
+                        unit_row.append(unit_data)
+                        self.toml_data[tableName + "_units"][col_name].append(unit_data)
+                    row.append(data)
+                self.add_to_output(row, self.target_table_prefix + "__" + tableName)
+                if len(next(iter(self.output_collector[self.target_table_prefix + "__" + tableName + "_units"].values()))) < 1:
+                    self.add_to_output(unit_row, self.target_table_prefix + "__" + tableName + "_units")
diff --git a/examples/coreterminal.py b/examples/coreterminal.py
@@ -3,7 +3,7 @@
 
 '''This is an example workflow using core.py'''
 
-a=Terminal()
+a=Terminal(debug_flag=True)
 
 # a.list_available_modules('plugin')
 # ['GitInfo', 'Hostname', 'SystemKernel', 'Bueno', 'Csv']
@@ -17,30 +17,32 @@
 # a.list_available_modules('backend')
 # ['Gufi', 'Sqlite', 'Parquet']
 
-#a.load_module('plugin', 'YAML', 'reader', filenames=["data/schema.yml", "data/schema2.yml"], target_table_prefix = "schema")
+a.load_module('plugin', 'YAML', 'reader', filenames=["data/student_test1.yml", "data/student_test2.yml"], target_table_prefix = "student")
 #a.load_module('plugin', 'YAML', 'reader', filenames=["data/cmf.yml", "data/cmf.yml"], target_table_name = "cmf")
 
 # print(a.active_metadata)
-a.load_module('plugin', 'TOML', 'reader', filenames=["data/schema.toml", "data/schema2.toml"], target_table_prefix = "schema")
+a.load_module('plugin', 'TOML', 'reader', filenames=["data/results.toml"], target_table_prefix = "results")
 # print(a.active_metadata)
-a.load_module('backend','Sqlite','back-end', filename='data/data.db')   
-#a.load_module('backend','Sqlite','back-end', filename='data/data2.db')   
-# a.load_module('backend','Parquet','back-end',filename='./data/bueno.pq')
+a.load_module('backend','Sqlite','back-write', filename='data/data.db')   
+#a.load_module('backend','Sqlite','back-write', filename='data/data2.db')   
+# a.load_module('backend','Parquet','back-write',filename='./data/bueno.pq')
 
-a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics")
+#a.load_module('plugin', "Table_Plot", "writer", table_name = "schema_physics", filename = "schema_physics")
 
 a.transload()
 a.artifact_handler(interaction_type='put')
 # a.list_loaded_modules()
 # {'writer': [<dsi.plugins.env.Hostname object at 0x7f21232474d0>],
 #  'reader': [<dsi.plugins.env.Bueno object at 0x7f2123247410>],
 #  'front-end': [],
-#   'back-end': []}
+#   'back-write': []}
 
 
 # Example use
 # a.load_module('plugin','Bueno','reader',filenames='data/bueno1.data')
-# a.load_module('backend','Sqlite','back-end',filename='data/bueno.db')
+# a.load_module('backend','Sqlite','back-write',filename='data/bueno.db')
 # a.transload()
 # a.artifact_handler(interaction_type='put')
-# a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';", isVerbose = True)
+data = a.artifact_handler(interaction_type='get', query = "SELECT * FROM sqlite_master WHERE type='table';")#, isVerbose = True)
+#CAN PRINT THE DATA OUTPUT
+# print(data)
diff --git a/examples/data/results.toml b/examples/data/results.toml
@@ -0,0 +1,5 @@
+
+[people]
+avg_height = {'value'= 5.5, 'units'= 'm'} # overall average height
+median_speed = {'value'= 6.95, 'units'= 's'} # overall median speed
+std_gravity = {'value'= 9.83, 'units'= 'm/s/s'} # overall std dev gravity
diff --git a/examples/data/schema.yml → examples/data/student_test1.yml b/examples/data/schema.yml → examples/data/student_test1.yml
diff --git a/examples/data/schema2.yml → examples/data/student_test2.yml b/examples/data/schema2.yml → examples/data/student_test2.yml
diff --git a/examples/data/schema.toml → examples/data/teacher_test1.toml b/examples/data/schema.toml → examples/data/teacher_test1.toml
diff --git a/examples/data/schema2.toml → examples/data/teacher_test2.toml b/examples/data/schema2.toml → examples/data/teacher_test2.toml
diff --git a/requirements.txt b/requirements.txt
@@ -4,5 +4,4 @@ pydantic>=2.1.1
 nbconvert>=7.13.0
 gitpython>=3.0.0
 matplotlib>=3.6.0
-pyyaml>=6.0
-toml>=0.10.2
+pyyaml>=6.0