From bb8d4e5833506d06ffe3c8d388cfc9edcb000bf3 Mon Sep 17 00:00:00 2001 From: "Staiger, Christine" Date: Fri, 22 Nov 2024 12:26:59 +0100 Subject: [PATCH 1/6] fixing notebook --- src/validation_pipeline.ipynb | 254 ++++++++++++---------------------- src/validation_utils.py | 4 +- 2 files changed, 95 insertions(+), 163 deletions(-) diff --git a/src/validation_pipeline.ipynb b/src/validation_pipeline.ipynb index f05bf90..15f0746 100644 --- a/src/validation_pipeline.ipynb +++ b/src/validation_pipeline.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "import toml\n", + "import tomllib\n", "import pandas as pd\n", "\n", "from pathlib import Path" @@ -62,7 +62,8 @@ "metadata": {}, "outputs": [], "source": [ - "validation = toml.load(validation_path)\n", + "from validate import read_toml\n", + "validation = read_toml(validation_path)\n", "validation" ] }, @@ -107,7 +108,7 @@ "metadata": {}, "outputs": [], "source": [ - "proj_data_path = Path.cwd().parent / \"data\" / \"synthetic_data\"\n", + "proj_data_path = Path.cwd().parent / \"data\" / \"synthetic_data_2\"\n", "sep = \",\"" ] }, @@ -126,37 +127,8 @@ "metadata": {}, "outputs": [], "source": [ - "files = [f.name for f in proj_data_path.glob('**/*.csv') if f.is_file()]\n", - "files = [f for f in files if f in ['environment_events.csv', 'environment.csv', 'host_events.csv', 'hosts.csv']]\n", - "assert \"environment.csv\" in files\n", - "assert \"environment_events.csv\" in files\n", - "assert \"hosts.csv\" in files\n", - "assert \"host_events.csv\" in files\n", - "print(f\"Found files: {files}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9331bd62-22e7-4b4a-ad45-d12ed587d87b", - "metadata": {}, - "source": [ - "If all files are found, read them in as pandas dataframes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9fe43c78-6ff0-4d6d-8a99-11de63258e33", - "metadata": {}, - "outputs": [], - "source": [ - "data = {}\n", - "for f in files:\n", - " try:\n", - " data[f] = pd.read_csv(proj_data_path.joinpath(f), sep=sep)\n", - " except pd.errors.EmptyDataError:\n", - " print(f\"File is empty: {f}\")\n", - "assert list(data.keys()) == files # NOTE: environment_events\n", + "from validate import read_csv_files\n", + "data = read_csv_files(proj_data_path, sep)\n", "print(data.keys())" ] }, @@ -165,7 +137,7 @@ "id": "283342cc-cdec-42ad-8b8f-b33652c5d05c", "metadata": {}, "source": [ - "## Check column names and each file" + "## Check column names in each file" ] }, { @@ -175,178 +147,134 @@ "metadata": {}, "outputs": [], "source": [ - "from validation_utils import *" - ] - }, - { - "cell_type": "markdown", - "id": "b0163cd4-4b64-4fec-89c1-658220c616df", - "metadata": {}, - "source": [ - "### If identifier columns are not of type string, cast to string." + "from validation_utils import check_column_exists\n", + "result = check_column_exists(data, validation)\n", + "print(f\"Missing columns (file, column name):\\n {result}\")" ] }, { "cell_type": "markdown", - "id": "804f95ff-7544-4318-8bb2-38efdff55b32", + "id": "3a8d2352-7b40-46ff-aa4a-560fc92f2e9b", "metadata": {}, "source": [ - "*Parameters*" + "## Find columns which are empty" ] }, { "cell_type": "code", "execution_count": null, - "id": "a459263c-9d5b-4dc6-9ec8-f50ef7438f3e", + "id": "1cb39cc1-7148-45af-8f9a-b65b1e8078d6", "metadata": {}, "outputs": [], "source": [ - "id_col_names = [\"host_id\", \"environment_id\"]" + "from validation_utils import find_empty_columns\n", + "result = find_empty_columns(data)\n", + "print(f\"Empty columns (file, column name):\\n {result}\")" ] }, { "cell_type": "markdown", - "id": "1c5a2f7e-cedc-490b-b963-a0c4ab6a4c28", - "metadata": {}, - "source": [ - "*Code*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4d7557a3-55b0-4b3e-b7f5-6ee7a0c984fb", + "id": "95ead3f0-8ea6-475f-970e-ac394caacfee", "metadata": {}, - "outputs": [], "source": [ - "for data_name in data:\n", - " for col_name in id_col_names:\n", - " cast_col_to_string(data[data_name], col_name)" + "## Check column types" ] }, { "cell_type": "markdown", - "id": "d848836d-4628-4796-ab85-d2e6be8b1dcf", - "metadata": {}, - "source": [ - "### Check individual files according to section in validation file" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef9e2645-ad0e-4ff3-a851-6c792c4b5c4b", + "id": "f8cb0392-78ea-45a9-9581-1a8f1d5f2e81", "metadata": {}, - "outputs": [], "source": [ - "check_column_exists(data, validation)" + "Check whether a column has the expected type(s) defined in the configuration file. " ] }, { "cell_type": "code", "execution_count": null, - "id": "e3ae9335-d544-4f36-96a2-c4f9f6b8bbde", + "id": "88fc6991-a18f-4569-94a1-16a81fa4360b", "metadata": {}, "outputs": [], "source": [ - "check_column_types(data, validation)" + "from validation_utils import check_column_types\n", + "result = check_column_types(data, validation)\n", + "print(f\"Column type check (file, column name, type, expected type):\\n {result}\")" ] }, { "cell_type": "markdown", - "id": "f05c0842-52ba-4568-83e9-6c543abb3e8a", + "id": "12b92b62-ce98-4be6-b535-eaac5d6bb17c", "metadata": {}, "source": [ - "### Check that identifers `host_id` and `environment_id` are defined" + "## Columns with categorical values" ] }, { "cell_type": "markdown", - "id": "ed69e30b-f848-4d0c-8cae-7d22ca0d301f", - "metadata": {}, - "source": [ - "### Are identifiers unique?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42943b13-4b2f-4dc3-9065-fad287532098", + "id": "56de8bd4-bf45-4c94-8fe4-d93c36191271", "metadata": {}, - "outputs": [], "source": [ - "ids = {}\n", - "\n", - "ids[\"host_id\"] = data[\"hosts.csv\"][\"host_id\"].unique()\n", - "if not len(ids[\"host_id\"]) == len(data[\"hosts.csv\"][\"host_id\"]):\n", - " print(\"hosts.csv: Column host_id contains duplicates.\")\n", - " assert False" + "The validation file also contains information on categorical values. Check if columns for which categorical values are defined really only contain those values." ] }, { "cell_type": "code", "execution_count": null, - "id": "9d32e6a3-d588-4809-9330-bd811fc6cc44", + "id": "29d79c9b-0a84-4f86-8ce7-f6a4a52ed7c0", "metadata": {}, "outputs": [], "source": [ - "ids[\"environment_id\"] = data[\"environment.csv\"][\"environment_id\"].unique()\n", - "if not len(ids[\"environment_id\"]) == len(data[\"environment.csv\"][\"environment_id\"]):\n", - " print(\"environment.csv: Column environment_id contains duplicates.\")\n", - " assert False" + "from validation_utils import check_column_values\n", + "result = check_column_values(data, validation)\n", + "print(f\"Undefined categorical values (file, column name, undefined values):\\n {result}\")" ] }, { "cell_type": "markdown", - "id": "4dd1fa05-d260-425a-b311-9c9afa439be6", + "id": "b0d04452-460d-4f82-a16f-0963f71ff399", "metadata": {}, "source": [ - "### Are there None values in the identifier columns?" + "## Identifier columns" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "b56b73a4-63e4-43dc-a744-c4c9cbc31b4e", + "cell_type": "markdown", + "id": "3b664d5f-33b4-4658-ab9b-3c6b32256f52", "metadata": {}, - "outputs": [], "source": [ - "if sum(pd.isna(ids[\"host_id\"])):\n", - " print(f\"hosts.csv: There are {sum(pd.isna(ids['host_id']))} empty cells in host_id.\")" + "In the current set up we have two identifier columns which can occurr in all of the files:" ] }, { "cell_type": "code", "execution_count": null, - "id": "f6df6a71-138c-461a-aead-8c2fd5a0c6b2", + "id": "672802ff-95d9-4f43-a12d-1d6c57ae3bf3", "metadata": {}, "outputs": [], "source": [ - "if sum(pd.isna(ids[\"environment_id\"])):\n", - " print(f\"environment.csv: There are sum(pd.isna(ids['environment_id'])) empty cells in environment_id.\")" + "ids = [\"host_id\", \"environment_id\"]" ] }, { "cell_type": "markdown", - "id": "392f8357-6065-4deb-b060-c419f9b2dd55", + "id": "965ad2e4-5d56-40f8-b07a-b61c6af53d1c", "metadata": {}, "source": [ - "### Are all identifiers in the other csv files defined?" + "We expect those columns to **not contain any blank cells**.\n", + "\n", + "The values for the dientifiers are defined in the respcetive columns in *host.csv* and *environment.csv*. In those files the columns must conatin unique values, i.e. the length of the column is equal to the number of unique values found in the column.\n", + "\n", + "If the output of the cell below is `True`, then all checks have passed successfully." ] }, { "cell_type": "code", "execution_count": null, - "id": "feff4836-6a0f-4b17-93bb-fc7fbbca9c4f", + "id": "76fe8d8d-40ea-4f3f-9382-1082b52b8f18", "metadata": {}, "outputs": [], "source": [ - "for id_type in ids:\n", - " for data_name in data:\n", - " if id_type in data[data_name].columns:\n", - " if not set(data[data_name][id_type]).issubset(ids[id_type]):\n", - " print(f\"File {data_name} contains undefined ids in column {id_type}:\")\n", - " print(set(data[data_name][id_type]).difference(ids[id_type]))" + "from validation_utils import identifier_checks\n", + "identifier_checks(data, ids)" ] }, { @@ -362,9 +290,7 @@ "id": "c9e43026-c1e6-4ab4-b515-dfbcc19303cf", "metadata": {}, "source": [ - "*Parameters:*\n", - "\n", - "Columns that belong together, if one is set, the others also need to be set." + "Some columns have dependencies between each other. E.g. if in a row a value is set in one columns, we also expect values in the other columns. Those *column dependencies* are defined in the respective section of the configuration file." ] }, { @@ -374,84 +300,88 @@ "metadata": {}, "outputs": [], "source": [ - "cols = {}\n", - "cols[\"event\"] = [\"event_day\", \"event_time\", \"event_type\"]\n", - "cols[\"measurement\"] = [\"measurement_type\", \"measurement_quantity\", \"measurement_unit\"]\n", - "cols[\"inoculation\"] = [\"inoculation_type\", \"inoculation_pathogen\", \"inoculation_dose\", \"inoculation_unit\"]\n", - "cols[\"treatment\"] = [\"treatment_type\", \"treatment_dose\", \"treatment_unit\"]" + "validation[\"column_dependencies\"]" ] }, { "cell_type": "markdown", - "id": "5628c709-31dd-407a-bd68-9c5b6fa83d7d", + "id": "05ce1b6b-179d-4048-8496-8f76e5afbe9a", "metadata": {}, "source": [ - "### Check for None/NA values in cluster or columns" + "### Check if columns of a cluster are present and contain values" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "e273f72c-d23d-4d05-b87b-ad29e5f7ea98", + "cell_type": "markdown", + "id": "39135812-a108-486b-b767-8fb3eb1fd87e", "metadata": {}, - "outputs": [], "source": [ - "for data_name in data:\n", - " result = check_column_clusters(cols, data[data_name])\n", - " if len(result) > 0:\n", - " print(f\"{data_name}: Invalid entries found:\")\n", - " for res in result:\n", - " df = data[data_name][res[1]]\n", - " print(f\"\\t Line {res[0]}: {df.iloc[[res[0]]]}\")" + "The code below checks:\n", + "When the columns defined in *event* are all present in the dataframe,\n", + "then we also expect all columns of *measurement*, *inoculation* and *treatment*\n", + "to be present in the same dataframe" ] }, { - "cell_type": "markdown", - "id": "3fd0eab6-d107-468e-ba39-96ab4a1599a7", + "cell_type": "code", + "execution_count": null, + "id": "61b6a5d4-4565-4aa5-84ee-6fef14a99dd4", "metadata": {}, + "outputs": [], "source": [ - "## Dependencies between clusters of columns" + "# Only for formatting out put\n", + "dependency_names = list(validation[\"column_dependencies\"].keys())[1:]\n", + "print(dependency_names)" ] }, { "cell_type": "code", "execution_count": null, - "id": "68c23436-1d76-4494-9951-824c05bb4c12", + "id": "103f8305-9120-40cc-91ce-1f778039aa97", "metadata": {}, "outputs": [], "source": [ - "for data_name in data:\n", - " if set(cols[\"event\"]).issubset(data[data_name].columns):\n", - " measure = set(cols[\"measurement\"]).issubset(data[data_name].columns)\n", - " inoc = set(cols[\"inoculation\"]).issubset(data[data_name].columns)\n", - " treat = set(cols[\"treatment\"]).issubset(data[data_name].columns)\n", - " if not (measure or inoc or treat):\n", - " print(f\"{data_name}: Need also information on either of measurement, incoulation or treatment\")" + "for data_name, df in data.items():\n", + " if set(validation[\"column_dependencies\"][\"event\"]).issubset(df.columns):\n", + " measure = set(validation[\"column_dependencies\"][\"measurement\"]).issubset(df.columns)\n", + " inoc = set(validation[\"column_dependencies\"][\"inoculation\"]).issubset(df.columns)\n", + " treat = set(validation[\"column_dependencies\"][\"treatment\"]).issubset(df.columns)\n", + " res = [i for i, val in enumerate([measure, inoc, treat]) if not val]\n", + " if len(res) > 0:\n", + " missing_cols = [dependency_names[idx] for idx in res]\n", + " print(f\"{data_name}: Need also information on {missing_cols}\")" ] }, { "cell_type": "markdown", - "id": "d92517e6-0a8e-4198-a6cb-62e2dc3da230", + "id": "69bd4c00-6a4c-4ff1-b01e-ea65839dc324", "metadata": {}, "source": [ - "## Converting columns" + "### Check values in a cluster of columns" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "4ac11422-45c0-44cb-8713-2bc96ae98966", + "cell_type": "markdown", + "id": "88803085-b09f-4af5-aeaa-863a3390c096", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "For each of the set of columns defined in the *dependencies* we assume that they are all empty per row or they all carry a value." + ] }, { "cell_type": "code", "execution_count": null, - "id": "1c8e383c-2a88-424b-9e15-725b361e51d9", + "id": "56365325-3441-4204-b0e5-2d2dd0ffc499", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from validation_utils import check_column_clusters\n", + "for data_name, df in data.items():\n", + " result = check_column_clusters(validation[\"column_dependencies\"], df)\n", + " if len(result) > 0:\n", + " print(f\"Missing values in {data_name}:\")\n", + " print(f\"Row index, column name:\\n {result}\")" + ] } ], "metadata": { @@ -470,7 +400,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.6" + "version": "3.12.4" } }, "nbformat": 4, diff --git a/src/validation_utils.py b/src/validation_utils.py index 428118b..5e82ba9 100644 --- a/src/validation_utils.py +++ b/src/validation_utils.py @@ -172,7 +172,9 @@ def check_column_values(data: dict, validation: dict) -> list: unexpected_values.append( (data_name, var_name, check_categorical_values(col, cat_values)) ) - return unexpected_values + # remove empty sets + res = [vals for vals in unexpected_values if len(vals[2]) > 0] + return res def check_column_clusters(cols: dict, data_frame: pd.DataFrame) -> list: From b22de5a67a23c5f738504ac575cc7346a95683ce Mon Sep 17 00:00:00 2001 From: chStaiger Date: Mon, 25 Nov 2024 11:38:18 +0100 Subject: [PATCH 2/6] Update src/validation_pipeline.ipynb Co-authored-by: qubixes <44498096+qubixes@users.noreply.github.com> --- src/validation_pipeline.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/validation_pipeline.ipynb b/src/validation_pipeline.ipynb index 15f0746..b9a26a8 100644 --- a/src/validation_pipeline.ipynb +++ b/src/validation_pipeline.ipynb @@ -241,7 +241,7 @@ "id": "3b664d5f-33b4-4658-ab9b-3c6b32256f52", "metadata": {}, "source": [ - "In the current set up we have two identifier columns which can occurr in all of the files:" + "In the current set up we have two identifier columns which can occur in all of the files:" ] }, { From 8370853badb86952d038aebbc3a956a3fed937f2 Mon Sep 17 00:00:00 2001 From: chStaiger Date: Mon, 25 Nov 2024 11:38:41 +0100 Subject: [PATCH 3/6] Update src/validation_pipeline.ipynb Co-authored-by: qubixes <44498096+qubixes@users.noreply.github.com> --- src/validation_pipeline.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/validation_pipeline.ipynb b/src/validation_pipeline.ipynb index b9a26a8..0456a5d 100644 --- a/src/validation_pipeline.ipynb +++ b/src/validation_pipeline.ipynb @@ -261,7 +261,7 @@ "source": [ "We expect those columns to **not contain any blank cells**.\n", "\n", - "The values for the dientifiers are defined in the respcetive columns in *host.csv* and *environment.csv*. In those files the columns must conatin unique values, i.e. the length of the column is equal to the number of unique values found in the column.\n", + "The values for the identifiers are defined in the respective columns in *host.csv* and *environment.csv*. In those files the columns must contain unique values, i.e. the length of the column is equal to the number of unique values found in the column.\n", "\n", "If the output of the cell below is `True`, then all checks have passed successfully." ] From 4d04cac852f29e3a2d97cca7867749ea38905598 Mon Sep 17 00:00:00 2001 From: chStaiger Date: Mon, 25 Nov 2024 11:38:50 +0100 Subject: [PATCH 4/6] Update src/validation_pipeline.ipynb Co-authored-by: qubixes <44498096+qubixes@users.noreply.github.com> --- src/validation_pipeline.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/validation_pipeline.ipynb b/src/validation_pipeline.ipynb index 0456a5d..f464dfb 100644 --- a/src/validation_pipeline.ipynb +++ b/src/validation_pipeline.ipynb @@ -329,7 +329,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Only for formatting out put\n", + "# Only for formatting output\n", "dependency_names = list(validation[\"column_dependencies\"].keys())[1:]\n", "print(dependency_names)" ] From e2a9dc2997c92575cfb8e3b1ec21e71a239f5162 Mon Sep 17 00:00:00 2001 From: chstaiger Date: Mon, 25 Nov 2024 11:39:12 +0100 Subject: [PATCH 5/6] clean up test data --- data/synthetic_data/environment.csv | 101 ------------------ data/synthetic_data/environment_events.csv | 101 ------------------ data/synthetic_data/host_events.csv | 101 ------------------ data/synthetic_data/hosts.csv | 101 ------------------ data/synthetic_data/test.csv | 101 ------------------ .../environment.csv | 0 .../environment_events.csv | 0 .../host_events.csv | 0 .../{synthetic_data_2 => test_data}/hosts.csv | 0 9 files changed, 505 deletions(-) delete mode 100644 data/synthetic_data/environment.csv delete mode 100644 data/synthetic_data/environment_events.csv delete mode 100644 data/synthetic_data/host_events.csv delete mode 100644 data/synthetic_data/hosts.csv delete mode 100644 data/synthetic_data/test.csv rename data/{synthetic_data_2 => test_data}/environment.csv (100%) rename data/{synthetic_data_2 => test_data}/environment_events.csv (100%) rename data/{synthetic_data_2 => test_data}/host_events.csv (100%) rename data/{synthetic_data_2 => test_data}/hosts.csv (100%) diff --git a/data/synthetic_data/environment.csv b/data/synthetic_data/environment.csv deleted file mode 100644 index 5505c9a..0000000 --- a/data/synthetic_data/environment.csv +++ /dev/null @@ -1,101 +0,0 @@ -environment_id,environment_level -oT1,8 -fN3,2 -yZ3,14 -eW4,3 -bM5,2 -pY9,14 -fK4,11 -dU1,2 -rZ4,4 -zO9,12 -fS1,1 -qY8,2 -dR6,12 -cJ8,6 -dY6,2 -yT5,6 -zR2,7 -cP2,8 -tB6,5 -gW8,2 -gO7,2 -aD9,7 -aC0,2 -bM1,13 -hY8,12 -mO5,2 -bB0,1 -yE9,0 -zE6,2 -oF1,7 -lX6,1 -sP6,6 -eM8,3 -uO4,14 -nO2,7 -eF6,12 -nI0,3 -xZ4,2 -lU8,8 -aR5,12 -zK6,0 -hI2,10 -hX8,11 -eC2,3 -oS1,11 -vV1,9 -qG0,4 -dJ7,7 -kB8,8 -eF2,6 -zL7,11 -aY3,8 -gJ7,1 -gH6,10 -hH6,5 -pL6,7 -aY0,3 -aF9,12 -rE3,3 -lX7,5 -iX5,12 -dK7,8 -yI1,12 -gF5,11 -eP8,6 -wA1,9 -iF4,7 -iM0,5 -zQ2,13 -oN1,10 -qP5,6 -cJ5,5 -cU6,0 -fU8,10 -gC6,7 -uU7,4 -xX6,8 -aJ3,2 -bD2,10 -hT1,12 -cG0,7 -bO5,14 -rF7,9 -jC8,6 -yA6,2 -bF8,14 -sV0,2 -uR8,0 -uY6,11 -aY8,9 -rD3,10 -oE7,7 -xB7,3 -kZ0,10 -rD5,2 -kE0,14 -mR7,8 -oH1,6 -eM1,1 -oM0,3 diff --git a/data/synthetic_data/environment_events.csv b/data/synthetic_data/environment_events.csv deleted file mode 100644 index f3d999b..0000000 --- a/data/synthetic_data/environment_events.csv +++ /dev/null @@ -1,101 +0,0 @@ -environment_id,environment_level,host_id,event_day,event_time,event_type,measurement_type,measurement_unit,measurement_quantity -kG2,11,vH4,10,29:49,measurement,Rhonda Thornton,ml,8.304571542074228 -bK4,5,hG4,10,15:50,allocation,Marissa Thomas,ml,6.780852482072694 -jY3,6,eA2,9,25:30,allocation,Clinton Flores,mg,8.71084047511158 -pG3,1,hC2,9,24:00,measurement,Jennifer Velasquez,g,4.548974757897116 -iN6,12,oJ0,8,24:50,measurement,Rhonda Nguyen,g,12.178328283332647 -fV7,7,rD6,5,10:09,measurement,Cassandra Hill,g,9.810527690086465 -lF5,0,xH7,10,12:19,measurement,Jennifer Vincent,g,8.433518385410357 -rN7,8,qM7,10,27:00,measurement,Lori Martinez,mg,11.26414865428836 -uI4,6,eB9,6,11:50,measurement,Larry Boone,g,5.516724593676114 -aM1,12,fA7,9,14:00,allocation,Linda Hernandez,g,13.823376993153994 -jM7,7,oN2,14,24:29,measurement,Leslie Lewis,g,0.6200350044655811 -eE7,2,kB0,14,15:59,allocation,Robert Simon,mg,2.806662293452969 -sG7,9,dB4,11,12:39,measurement,Lisa Lawson,g,4.855606652270578 -hA7,0,tN6,1,11:09,measurement,Caroline Miller,g,6.983960909674317 -wM2,3,mN5,12,22:30,allocation,Richard Parker,g,7.263042305953155 -nK7,12,aI5,0,19:50,measurement,Eric Wilson,g,11.487007076506481 -qI0,8,dJ8,11,11:29,measurement,Sarah Stewart,mg,6.913904091151436 -aX8,7,hH9,7,28:39,measurement,Jerry Delgado,g,7.095553615255107 -cY5,8,gC7,8,29:20,measurement,Monica Gonzalez,g,11.58793015033719 -jL8,0,gI4,12,19:59,measurement,Mallory Peterson DDS,mg,5.476525097528649 -hM9,6,sG1,2,11:40,measurement,Alex Jones,g,3.9497346531933557 -fM1,9,uP0,9,19:09,allocation,Arthur Martinez,g,10.701007857702889 -qI2,6,fB3,4,14:20,measurement,Mr. Casey Neal Jr.,g,1.2172411039388797 -yJ0,0,gA8,10,19:10,measurement,Dustin Swanson,mg,10.14740740474803 -iP7,0,eB2,12,28:19,measurement,Sally Barton,ml,2.0886111931573765 -oX9,3,yZ5,3,19:29,allocation,Jason Roman,ml,3.8648446683317013 -oB6,1,bE5,10,19:10,measurement,Yolanda Rollins,mg,10.397207572793882 -iT6,5,sA5,14,26:00,allocation,Ashley Miller,mg,8.496730793310116 -oI2,14,dZ8,8,21:09,measurement,Kevin Martinez,g,12.570212973462114 -pV1,4,iW3,8,27:29,allocation,Alisha Brown,ml,3.7409983955737824 -sT7,13,qX3,1,15:50,measurement,Carla Mccoy,ml,0.5193679830429698 -gS7,8,pX3,7,25:59,measurement,Shannon Gregory,ml,8.483527392198958 -vH1,9,lD2,5,16:59,measurement,John Rodriguez,ml,9.65036888268722 -qX8,0,bH2,10,18:40,allocation,Jeffrey Richardson Jr.,mg,12.856057637905984 -vF6,4,uA1,1,15:39,allocation,Kristine Lewis,ml,1.6594972029495891 -cM2,1,eQ0,0,12:29,allocation,Stacey Wright,ml,4.559818901916473 -jJ4,10,pR0,5,27:10,measurement,Anthony Medina,mg,10.39054094924359 -yO5,9,iG2,10,22:19,measurement,Stacey Reynolds,mg,2.7233015981958193 -dQ9,1,lI2,11,20:50,measurement,Diane Thomas,mg,11.698191098348175 -bU2,10,mU8,7,19:49,measurement,Mark Rodriguez,ml,12.191574851737649 -bY4,5,vF9,5,12:39,measurement,Carmen Stephens,g,9.868353207625132 -eZ5,9,mF9,0,22:20,allocation,Daniel Chung,g,2.9957785871007996 -sW4,7,jF8,12,16:19,measurement,Charles Doyle,g,4.7649403557525725 -jN1,3,xL4,8,10:50,measurement,Andre Phillips,g,4.890957289781344 -dW2,10,tB2,13,25:00,measurement,Erin Faulkner,mg,5.451336087169272 -kW2,8,uP4,3,11:09,allocation,Katherine Barry,ml,6.562413030373551 -zB1,13,aF1,14,26:10,allocation,James Alvarado,mg,0.6874736776849666 -nQ2,0,iZ9,7,17:19,allocation,Jamie Hull,g,10.804764026737258 -rD7,3,gM8,8,10:59,measurement,Yvonne Davis,mg,9.810787520423393 -sP7,5,sQ9,9,12:00,allocation,Helen Schultz,ml,2.1146195542349973 -cW1,0,cZ3,3,18:29,allocation,Sharon Walker,ml,14.402252611744617 -xP3,12,mH1,9,16:00,allocation,Patrick Chandler,ml,11.452187880078025 -aE0,10,zT4,9,27:09,measurement,Michael Lee,mg,1.5428751987501772 -vD2,8,yF4,8,29:40,allocation,Kaitlyn Cordova,ml,2.6829539639121074 -fT5,12,cP1,0,17:00,allocation,Erin Davis,mg,3.213793442662523 -yL9,0,oL5,0,16:10,measurement,Tanya Dawson,ml,13.874119105834993 -zG7,10,hW5,14,19:00,allocation,Erin Jackson,mg,13.929792046369007 -wD4,3,vK3,8,13:10,measurement,Bryan Summers,g,0.8121452945996155 -cW0,0,iR5,2,22:40,measurement,Matthew Taylor,g,10.623787685906853 -vU6,14,vF8,13,12:40,measurement,Laura Brown,g,13.87458266848192 -cA8,13,uF5,14,10:20,measurement,Carmen Fischer,ml,13.156643052498731 -lS9,1,xG1,9,21:30,allocation,Melanie Harris,mg,8.95463716227507 -lL8,2,gT8,11,20:30,allocation,Justin Allen,g,8.060110411363222 -rO1,0,cF6,5,19:49,allocation,Tiffany Stevenson,ml,3.4441914352689156 -zF3,1,xD2,11,16:39,allocation,Steven Morrison,mg,9.843401311761843 -qK7,1,eQ5,1,13:59,measurement,Jacob Harris,ml,14.679216356833894 -pR8,8,wX3,4,22:39,allocation,Tracy Vaughn,ml,9.658564555672477 -kJ6,0,nQ8,12,10:20,measurement,Melissa Martin MD,ml,1.8671931525672796 -gW8,6,wF0,4,22:30,allocation,Terry Brown,g,0.30757357377162786 -lG6,7,dX7,4,25:59,measurement,Edward Green,mg,9.20344071414781 -lR8,12,oI4,1,14:19,measurement,Curtis Cummings,g,7.729946967334432 -qY8,12,lJ8,12,26:59,allocation,Lori Lewis,mg,13.386029898656602 -yH0,12,bI0,6,27:40,measurement,Billy Hayes,g,12.490038897557099 -xV9,2,hD8,12,27:30,measurement,Pamela Foster,ml,4.334343298332742 -kH6,2,yL4,13,12:30,allocation,Sandy Boyd,ml,6.493269812072037 -pH3,14,eK8,14,24:59,measurement,Jennifer Taylor,g,5.201755204973621 -rA0,9,uA2,4,11:49,allocation,Brent Weber,mg,3.5944130487330255 -lI3,1,oO2,10,12:39,measurement,Thomas King,mg,12.87044863719824 -rM5,2,uQ5,1,17:29,allocation,Melissa Gonzalez,ml,8.117072447123228 -yH4,2,wL3,9,23:29,allocation,Kimberly Pena,ml,13.504325745880406 -cJ4,13,gZ4,3,23:50,measurement,Latoya Fisher,mg,1.7301039202384938 -pD6,0,cM9,10,29:00,measurement,Mrs. Denise Garner,mg,8.488089284009487 -mH5,8,bU5,3,28:40,measurement,Shannon Ward,mg,13.017470889809365 -gI4,11,hE4,5,14:59,allocation,Jose Carroll,mg,8.60612626778796 -lZ3,0,cV6,5,18:29,allocation,Brandon Taylor,mg,0.39164346543405615 -eA1,8,xP9,10,23:19,measurement,James Lee,mg,8.050944329174282 -hV2,14,yN7,6,15:39,allocation,Kathleen Harper,g,9.459419345933947 -dJ1,5,lP5,12,21:40,allocation,Dennis Kane,mg,7.163897228398941 -sB2,1,xW8,4,16:29,measurement,Carl Hall,mg,12.414209133983892 -xS2,5,xK5,10,29:10,measurement,Emily Bates,mg,7.9427580961396345 -jZ6,12,gD1,7,23:19,allocation,Carly Young,g,7.299787631069027 -rI6,10,iW1,1,20:50,measurement,Kelsey Moody,ml,11.406501583901408 -lW2,3,qE1,9,24:40,measurement,David Flores,mg,13.842900045410923 -xL7,4,bE4,6,11:20,allocation,Melanie Decker,mg,0.5857470910688178 -uG1,6,tB5,12,14:49,measurement,Mr. Thomas Walters,ml,2.47153852189514 -iF4,12,rA3,8,16:49,allocation,Melissa Lawrence,g,6.21905468189901 -lK1,1,zV9,10,10:59,allocation,Wendy Avery,ml,13.828550529520259 -pW1,8,iM3,7,22:40,measurement,Joseph Taylor,g,14.29165917566535 -cB4,2,bX7,0,21:00,measurement,Ricky Lee,mg,14.318839316214884 -qJ1,12,oU1,13,13:59,measurement,Michelle Keller,mg,11.706003165832982 diff --git a/data/synthetic_data/host_events.csv b/data/synthetic_data/host_events.csv deleted file mode 100644 index 4ed8fd7..0000000 --- a/data/synthetic_data/host_events.csv +++ /dev/null @@ -1,101 +0,0 @@ -host_id,event_day,event_time,event_type,measurement_type,measurement_quantity,measurement_outcome,measurement_unit,inoculation_type,inoculation_pathogen,inoculation_dose,inoculation_unit,treatment_type,treatment_dose,treatment_dose,treatment_unit -kK6,37,24:39,treatment,measurement,8.830815564119238,false,g,Abigail Shaffer,Suzanne Rodriguez,6.638871477090059,mg,Steven Carter,13.373606858559965,ml -xI4,14,11:50,inoculation,measurement,3.484788329472179,true,ml,Donald Martin,Adrian Allen,6.459181411705036,ml,David Zamora,6.317515223856118,mg -oV2,95,10:20,treatment,measurement,10.57270344843454,false,ml,Deanna Bennett,Cynthia Shaw,8.588796769015952,ml,Maria Gomez,1.4843097932997262,ml -tX6,86,17:40,inoculation,treatment,6.216985614052212,false,mg,Nancy Vang,Kyle Scott,13.955557216370194,mg,David James,8.969872651288977,mg -qU7,56,14:29,measurement,inoculation,5.254380804031272,true,mg,Ryan Mcdonald,Kristen Evans,2.223438057902304,ml,Jennifer Allen,3.4418521743466157,mg -rN2,30,26:49,treatment,inoculation,1.7261207524615245,false,ml,Joshua Gordon,Jessica Greer,4.974452057163621,ml,Amanda Bailey,5.075894317671845,ml -sP4,45,20:39,treatment,treatment,4.521032420654531,false,ml,Holly Khan,Jeffrey Miles,1.8701870059532433,ml,Joshua Rocha,8.286863812513316,mg -gU0,26,29:29,inoculation,treatment,2.8157722633815396,true,mg,Christopher Myers,Kaylee Montgomery,12.17848022094492,ml,Timothy Roach,10.536488447536897,mg -sU0,84,21:20,measurement,measurement,8.85697788687943,false,ml,Robert Miller,Jeffery Allen,5.277368567578277,g,Rachel Cohen,9.077596569232824,mg -wR7,50,25:00,measurement,treatment,1.775768278015775,true,mg,Courtney Rodriguez,Amanda Carter,4.67936579270109,mg,Robert Marshall,12.198685400938318,g -rS8,5,18:19,treatment,treatment,13.759941600471567,false,g,Mary Harrison,Andrea Hubbard,14.237447797085508,mg,Kristin Lopez,5.171391253480917,ml -pO9,91,11:00,treatment,inoculation,12.007324462913823,true,mg,Carolyn Buchanan,Tyrone Smith,9.679205539772012,mg,James Moody,13.40977015835301,g -bK0,95,10:29,treatment,treatment,3.346615341619435,true,g,Andre Lopez,Cristina Houston,11.112038348658011,mg,Alyssa White,2.599106539503275,g -yO8,70,12:19,measurement,inoculation,14.715783435209069,false,g,Craig Merritt,Rhonda Graham,11.248960431754968,mg,Amber Hansen,13.244047549128327,g -sI0,33,24:50,treatment,treatment,9.693694346629165,false,mg,Stephen King,Sarah Nguyen,9.80358162932795,mg,Brandy Ball,2.534622721523454,ml -jQ9,55,19:40,measurement,inoculation,0.23790408557687326,false,g,Teresa Mendoza DVM,Michael Walsh,0.13321645895850776,ml,Rick Valencia,2.883883295953402,mg -yN5,43,13:39,measurement,inoculation,3.7077819428977117,true,ml,Nicholas Murphy,Nathaniel Savage,0.5513369500406368,mg,Elizabeth Hebert,1.3111240205856016,ml -wF9,68,27:20,inoculation,treatment,2.7292380569842196,true,mg,Daniel Hunter,Jeffrey Obrien,11.623738810194233,mg,Stephen Medina,12.026559673202453,g -kC8,50,16:50,measurement,treatment,9.229812772049538,true,mg,Ashley Wilson,Derek Carroll,10.161892814871731,ml,Mark Jordan,3.316626574440125,ml -lD9,11,23:19,treatment,measurement,0.02549037547024069,false,g,David Ross,Jennifer Davis,5.374155633713193,g,Michael Massey,9.390760188440305,g -oP4,72,23:29,treatment,measurement,3.7272235357678536,true,ml,Bradley Walker,Maureen Robinson,0.9591407682905051,ml,Annette Stevens,6.23724382994744,mg -lB6,19,27:30,inoculation,measurement,6.202211078771201,true,ml,William Glover,Jeffery Rose,14.104575078760131,mg,Tiffany Bass,9.397211699083465,mg -nL2,51,25:20,treatment,inoculation,14.868837582008599,false,g,Suzanne Russell,James Cruz,8.563278290683147,ml,Gina Phillips,5.0703926430436,ml -aT9,76,15:29,inoculation,measurement,2.0771385370310798,false,mg,Pamela Campbell,Gloria Johnson,1.8060879977632038,ml,Laura Watson,6.275457030396771,ml -kO5,7,12:50,inoculation,inoculation,9.548883178850756,true,mg,Joan Curtis,Marc King,6.7261471187172726,g,Robert Odom,5.013657138488522,mg -eZ7,94,16:40,measurement,measurement,3.0393752837571384,false,ml,Jason Greene,Brandon Williams,6.776837227766223,mg,Andrew Daniels,5.005654927721493,mg -pN2,15,25:50,treatment,treatment,11.316469963743621,true,mg,David Little,Sonia Ramsey,10.940651363154986,g,Tina Knapp,10.71911146646549,mg -xX8,,23:29,treatment,measurement,6.671109782156147,true,g,Lauren Gardner,Mary Harmon,13.137854882172281,mg,Carlos Johnson,11.340544765295512,ml -oN5,92,10:40,measurement,inoculation,3.651105470236252,true,mg,April Watson,Douglas King,14.832940657582816,ml,James Davis,6.888990995799535,ml -iF0,86,22:59,inoculation,treatment,2.5754591909638256,false,mg,Ralph Warren,Heather Garcia,12.452150241822817,ml,Sarah Bennett,4.681253602307184,ml -nH2,74,21:40,measurement,measurement,8.06930781752856,false,ml,Pamela Weeks,Lisa Horton,6.654230733415872,mg,Jasmine Kline,2.9997374261565577,ml -lI1,97,16:30,measurement,inoculation,11.733767760133414,false,ml,Alejandro Burton,Jennifer Sullivan,8.108193994113929,mg,Ricky Baxter,0.11756955355407162,mg -aF9,17,,measurement,inoculation,1.7181229979609436,false,mg,Patricia Nguyen,Daniel Mason,5.1389157209510055,g,Megan Fisher,10.593327215399649,mg -mM3,89,26:30,measurement,inoculation,0.2014698524988423,false,mg,Janice Boone,Teresa Mitchell,7.929334595457344,ml,Adam Olson,14.080072903574692,ml -qG5,52,22:40,measurement,inoculation,3.9498677354987266,false,mg,Catherine Patel,Tina Sanchez,2.646707361586743,g,Joshua Smith,2.398040871917791,g -aY6,0,16:29,measurement,inoculation,1.636509118106055,false,ml,Robert Scott,Jerry Farrell,12.580522252766011,g,Anthony Mercado,11.014960694670275,g -zF3,42,24:40,measurement,measurement,8.116303501950508,false,mg,Karen Davis,Leah Lowe,2.379642387589555,ml,Micheal Kim,2.273033455751124,ml -yO2,16,27:49,measurement,measurement,12.650620830297525,false,ml,Monique Kelley,Michael Willis,13.619074055254131,g,Kristine Fernandez,2.083915085723069,ml -qD9,82,16:39,measurement,measurement,3.5005104390246604,true,ml,Katrina Carroll,Jason Webb,9.17780004511478,ml,George Whitney,7.90016060803093,ml -lI7,24,28:59,treatment,treatment,8.340866683159978,false,mg,Mark Allen,Darlene Vega,13.4198187069566,ml,Michael Moreno,6.225479788426566,g -gF1,76,24:30,inoculation,inoculation,14.346883157780002,false,mg,Bailey Turner,Anthony Thompson,2.3639986210434905,mg,Gavin Jackson,13.69831295851483,ml -gV9,24,12:30,inoculation,measurement,13.005580146614918,false,mg,Rebecca Schneider,Johnny Jones,10.846881724372563,g,William Zimmerman,7.030451183895423,mg -oW5,53,10:09,treatment,treatment,8.76962724131333,false,g,Morgan Rubio,Dr. Jessica Nixon,12.789461682452448,g,Anthony Jenkins,14.005838226879376,mg -aD7,98,25:50,inoculation,treatment,12.280929012141875,false,mg,Raymond Campbell,Peter Vincent,9.076902824303186,mg,Paul Fletcher,11.735216592303363,g -oX3,30,10:40,treatment,treatment,11.303758775645077,true,mg,Kayla Terry,Shawn Gray,14.686533722112978,g,Madison Miller,5.46121022389279,g -uL0,96,12:50,measurement,treatment,11.896947956668878,false,ml,Karen Davis,James Palmer,3.272377114205807,ml,Terri Nash,12.108724943050504,ml -aY5,2,27:29,measurement,treatment,13.74331271078986,false,ml,Joseph Turner,Ricky Sanchez,1.0637460221575408,g,Nicholas Franklin,0.5981404890474445,mg -lC2,6,12:39,inoculation,treatment,5.867356722615165,true,g,Jennifer Wolfe,Kimberly Soto,3.765308890220087,g,Theresa Butler,7.701765312957921,ml -yF0,49,15:39,inoculation,measurement,14.911807309267122,false,mg,Madison Lawrence,Teresa Johns,1.7093091443912278,ml,Megan Edwards,4.2599714848911425,mg -xZ7,4,27:30,inoculation,inoculation,0.4795750761264944,true,g,John Ferguson,Tammy Novak,10.519446838947635,ml,Crystal Schwartz,1.0313765748104657,mg -cP4,55,19:29,inoculation,inoculation,10.996814597181979,true,ml,Sheryl Thomas,Shelia Gomez,6.574307042122701,mg,William Woods,14.725078104875777,g -yJ0,7,11:00,treatment,inoculation,13.038550292206894,true,ml,Paul Myers,Stephen Schultz,10.958534040264846,mg,Amanda Miller,11.922995378849533,mg -zD5,99,18:09,inoculation,treatment,4.249287525135021,true,mg,Lisa Carroll,Mary Jones,14.31117271735691,ml,Amber Molina,5.08057655985669,ml -wM5,56,15:40,inoculation,measurement,14.43764251823506,false,mg,Frank Richardson,Amy Phelps,13.943688682248723,g,Carly Johnson,8.7837940624653,g -vF4,47,16:30,measurement,treatment,14.771065607506317,false,mg,Priscilla Ramos,Gregory Pratt,3.29818012352493,mg,Jennifer Mason,8.710203192313683,mg -dT0,84,16:00,inoculation,inoculation,1.3319548286072718,true,mg,Heather Nichols,Matthew Collins,1.5679673313973708,g,Jeremy Taylor,7.821723244970321,mg -gM3,55,29:20,inoculation,inoculation,8.916801223989479,false,ml,Dr. Harold Blake MD,Rodney Burns,1.5323898157367162,ml,Brandon Woods,11.15273861772867,mg -wP3,36,20:49,treatment,treatment,10.046885689469853,true,g,Eduardo Thomas,Kevin Frank,3.759676539577109,mg,Steven Martinez,5.4717342960786635,mg -rQ2,0,25:50,measurement,treatment,4.386851409367995,false,g,Christopher Adams,Yolanda James,12.084700336839797,ml,Eduardo Lam,2.6572927485805926,g -xK2,22,11:30,inoculation,measurement,1.261709584412879,true,g,Courtney Hutchinson,Thomas Mckee,13.468294641581465,mg,Nicholas Edwards,2.8044478488401454,g -hQ7,41,27:50,treatment,treatment,12.882860293096241,true,mg,Teresa Williams,Michelle Cortez,3.189357867436237,mg,Carlos Fowler,1.2012722832951401,ml -zM4,84,12:49,measurement,measurement,11.748189867356706,false,mg,Jennifer Reynolds,Ryan Brown,12.187875069111554,g,Joshua Mcfarland,2.6951941039037948,g -fX2,68,24:19,treatment,inoculation,14.729057574802743,true,mg,Courtney Clark,Sherry Wilkinson,8.167383268185135,g,Sara Manning,6.973575152965897,ml -wM3,16,20:19,inoculation,treatment,2.6924484404438784,false,g,Michael Bell,William Scott,10.423166484488045,mg,Thomas Hardy,12.249470662959325,mg -sG0,8,15:30,treatment,inoculation,8.96316665087906,false,mg,Diane Johnson,Thomas Rice,3.732322235895154,g,Kimberly Becker,11.419891377060672,g -pS7,58,29:30,measurement,measurement,7.405572035500703,true,g,Ryan Thornton,Joann Bridges,10.229299150148657,g,Dennis Phillips,7.656123079670134,g -hP7,20,21:39,measurement,inoculation,4.3317567894069375,false,g,David Allen,Sophia Jones,7.902694942839533,ml,Joshua Dennis,2.6018952012511893,mg -zS6,78,14:30,measurement,inoculation,0.5489686532638827,true,mg,Angel Davis,Nicholas West,2.68617393667373,g,Nicholas Colon,1.2145270337556868,mg -hF1,43,24:00,inoculation,treatment,5.534223846472205,false,ml,Ryan Moss,Holly Franklin,10.150186190536516,ml,Bradley Smith,11.017453793183607,g -iU8,60,16:39,inoculation,inoculation,1.8429061730146472,true,g,Sharon Anderson,Mr. Peter Gordon,7.119185193131973,mg,Carolyn Daniels,7.754124070498095,mg -cN7,55,16:19,inoculation,inoculation,10.365486534096862,true,g,Tiffany Deleon,Emily Fitzgerald,6.517431609625961,mg,Jessica Donovan,10.993444758917775,g -iJ1,10,15:20,inoculation,treatment,8.25034942029592,true,mg,Renee Wilson,Jacqueline Wells,12.553180786699762,mg,John Reyes,14.26426378604214,mg -fO0,72,15:59,inoculation,measurement,4.0138466250380915,true,ml,Brandon Lozano,Samantha Chase,1.7025891539390337,mg,Carrie Rodriguez,9.628813149217388,ml -dH2,8,23:30,measurement,treatment,2.1254685244615983,false,g,Jeremy Powell,Sharon Mcpherson,3.7747261621483394,ml,Ricardo Nixon,12.259661650242226,mg -vJ8,42,17:40,measurement,inoculation,12.58719936860308,true,ml,Jennifer Sharp,Stephanie Johnston,12.434339814258749,mg,Tiffany Rivera,11.100557973249837,g -hY5,4,24:00,measurement,treatment,3.7142499375412257,true,g,Brianna Frazier,Matthew Hendrix,10.012763386515925,mg,Kimberly Charles,7.726941250993389,mg -cU5,98,27:40,measurement,treatment,11.114804881351702,true,mg,David Howard,Olivia Miller,9.905058404628095,g,Jeremy Davenport,6.031295567431217,g -bX6,18,18:30,treatment,treatment,10.414138370295467,false,ml,Carrie Black,Jose Thompson,4.0733905008402616,g,Paul Harmon,14.888141201339034,g -vP8,56,18:19,measurement,treatment,10.611808761794626,false,ml,David Green,Chad Price,3.5017811065678623,g,David Adkins,13.667220259513995,ml -lF5,64,15:40,treatment,measurement,10.066877200661853,false,g,Jeffrey Fox,Jason Thompson,8.18213632290201,g,Alyssa Fields,0.7895869929210481,g -jX6,32,10:00,measurement,measurement,6.971240823793766,false,g,Jimmy Barnes,Sarah Francis,3.6304594430494985,g,Alexandra Ruiz,10.198883567702032,ml -oH0,60,19:59,treatment,measurement,13.90632028709793,true,ml,Crystal Johnson,Christopher Smith,13.595529407458056,mg,Janet Henry,0.2992699333492682,g -bH3,11,11:59,measurement,inoculation,12.360552739968899,false,mg,Brandon Young,Danielle Oliver,12.89263386944844,mg,Mrs. Shelly Turner,6.038256238576022,ml -rJ0,50,17:20,treatment,inoculation,0.3700349003762332,false,mg,Beverly Riggs,John Hunter,9.163003029796792,g,Brooke Booker,0.1421408975314381,g -rS9,42,11:00,measurement,measurement,14.67573472747406,true,g,Jason Becker,Joseph Randall,0.9911613272264835,g,Frank Guzman,6.226332179391142,mg -hV6,61,13:29,treatment,measurement,13.297152342340725,true,g,Paul Paul,Caitlyn Brown,14.492229956752137,ml,John Ellis,0.7494853513758637,mg -sK2,13,19:10,inoculation,inoculation,8.949891052575508,false,ml,Kristie Brown,Dale Warren,7.334189366898367,ml,Mariah Case,12.852708465280273,mg -hH8,44,12:49,inoculation,measurement,4.361538924114362,true,mg,Daniel Green,Erica Medina,9.246501813727832,mg,Sharon Hopkins,9.726465287950038,mg -wZ5,40,21:39,measurement,treatment,4.581991590895726,false,g,Brittany Roberts,Courtney Hinton,0.5027932818725173,mg,Deborah Parker,12.265504683925531,mg -uG3,22,15:00,measurement,measurement,7.446115365333339,true,g,Haley Brown,Kristin Hart,0.3006799560503831,g,Shannon Phillips,7.511709681020814,ml -jW6,38,26:40,measurement,treatment,4.5407855932793115,false,g,Mark Barrett,Nicole Bond,5.001987422680799,mg,Lindsey Crawford,11.592302455246772,mg -vT4,9,13:30,treatment,measurement,12.630784914998852,false,mg,Alexander Martinez,Stephen Jackson,12.710336104255797,mg,Barbara Hartman,4.528990232613134,mg -sW8,7,25:59,measurement,inoculation,9.263568268116867,false,ml,Ray Burke,Nathan Contreras,0.2610731734268307,ml,Michael Sanders,6.547225285178296,ml -tN4,88,23:50,inoculation,inoculation,11.638997422257912,true,ml,Michael Reese,Veronica White,5.136110825213272,ml,Brittany Lewis,10.491528527169196,ml -bL5,34,25:29,measurement,measurement,2.7537286578901505,false,ml,Albert Lewis,Mr. Gary Martinez,11.586836875086265,g,James Snow,5.87412961835712,g -mP5,22,28:20,inoculation,treatment,7.609902969154323,true,ml,Lauren Jackson,Stacy Bowen,2.9402217549842065,ml,Sandy Thomas,2.1840970897015626,mg -vU8,19,11:19,treatment,inoculation,0.2572011458780038,false,mg,Matthew Hogan,Tony Gomez,2.9527228551079205,mg,Carolyn Moore,7.466179169073936,mg -iF3,48,28:29,inoculation,inoculation,10.342400237316097,false,g,Mark Holland,Samuel Anderson,8.442807919749091,ml,Sarah Ponce,12.924666833989873,mg -nD1,83,21:09,measurement,treatment,7.236259090570106,true,g,Donna Simmons,Roberto Hampton,7.184893798543401,mg,Christopher Larsen MD,10.817312703632217,g -aH4,78,16:10,treatment,treatment,4.378669621769644,true,mg,Susan Hernandez,John Smith,1.6660926397708993,g,Laura Baker,6.040088287706282,ml diff --git a/data/synthetic_data/hosts.csv b/data/synthetic_data/hosts.csv deleted file mode 100644 index e6f4b49..0000000 --- a/data/synthetic_data/hosts.csv +++ /dev/null @@ -1,101 +0,0 @@ -host_id,host_groupNumber,host_sex,host_age,host_death,host_species,host_breed -0,10,M,88,84,Jordan Brady,Jose Hughes -1,11,M,48,78,Patricia Ortega,Kyle Michael -2,8,M,89,54,Melissa Little,Lori Matthews -3,3,M,5,61,Robert Lane,James Lee -4,0,M,29,67,Logan Morrow,Matthew Walton -5,10,M,47,34,Daniel Castillo,Susan Ward -6,8,M,19,32,April Ross,William Bishop -7,11,F,22,60,Ashley Brown,Lindsay Sims -8,2,F,45,37,Hannah Webb,Melinda Glover -9,7,F,20,14,Matthew Mcguire,Ann Smith -10,6,M,84,67,Daniel Ellis,James Ortiz -11,8,M,42,27,Laura Jennings,Beth Hill -12,14,F,13,15,Heather Bowers,Amy Perez -iW3,12,F,35,78,Kevin Lang,Gregory Wright -14,2,M,55,75,Marc Gibson,Sandra Rivera -15,13,M,35,1,Heather Crawford,Kayla Jordan -16,6,F,20,9,Brooke Kelly,Lisa Sheppard -17,4,M,80,8,Kevin Smith,Thomas Hodges -18,6,F,0,69,Aaron Riggs,Nicholas House -19,13,M,84,18,Anne Sharp,Matthew Moore -20,5,F,45,85,Phillip Smith,Alejandro Hall -21,7,M,98,26,Diane Byrd,Chase Garcia -22,7,F,26,75,Jeffrey Serrano,Lisa Jefferson -23,8,M,92,92,William Fischer,Russell Walters -24,5,M,31,97,Kevin Collins,Michele Bell -25,0,F,21,31,Lauren Watson,Mercedes Peters -26,3,M,87,6,Heather Merritt,Mr. Jesus Rowe IV -27,1,F,45,16,Manuel Hill,Mark Bell -28,0,F,45,31,Jessica Robinson,Andrew Sawyer -29,11,F,84,0,George Chambers,Melissa Washington -30,8,M,91,11,Dawn Arnold,Samuel Faulkner -31,0,F,63,84,Nicholas Morrison,Adrian Bryant DVM -32,10,F,49,73,Megan Nguyen,Keith Holloway -33,9,M,43,35,Danielle Blake,Wendy Moore -34,3,F,28,17,Dr. Jessica Huffman,Cindy Schwartz -35,14,F,33,95,Jacob Taylor,Pamela Houston -36,5,F,87,21,Wesley Collier,Kyle Nelson -37,3,M,99,32,Timothy Barnett,Brian Wilkerson -38,12,M,49,15,Jesse Decker,Dustin Jones -39,3,F,28,10,Monica Marquez,Michelle Powell -40,12,M,2,61,Reginald Wallace,Lindsey Olsen -41,6,M,85,38,Gloria Peterson,Erica Bates -42,4,M,37,56,Katherine Bryan,David Hunter -43,13,M,59,82,Jennifer Payne,Joshua Obrien -44,2,F,94,14,Mark Fisher,Alexander Mendoza -45,0,M,76,73,Jessica Carter,Sarah Brown -46,3,M,42,22,John Mays,Steven Meyer -47,11,M,43,27,Jessica Hill,Aaron Flores -48,11,M,87,82,Dennis Jenkins,Samantha Richardson -49,13,M,92,31,Jeremy Jones,Kelly Taylor -50,9,F,93,64,John Watson,Kimberly Estrada -51,9,M,62,24,Eric Robinson,Nancy Williamson -52,1,M,63,49,April Li DVM,David Rich -53,5,M,82,88,Jessica Brown,Kimberly Contreras -54,3,M,92,95,John Brown,Lisa Pierce -55,3,F,70,0,Laura Nelson,Mackenzie Welch -56,12,M,56,45,Joseph Holmes,Anna Padilla -57,5,M,76,97,Joseph Vazquez,Maria Wu -58,10,F,30,58,Tony Turner,Brandon Bullock -59,7,M,74,16,Adam Klein,Ms. Sara Arnold -60,4,M,5,15,Sean Campos,Krista Garcia -61,13,M,77,89,Brenda Smith,John Smith -62,10,M,75,71,Sally Brandt,Diana Chase -63,2,F,26,81,Alexandra Patterson,Kristen Odom DDS -64,11,M,1,78,Veronica Petersen,Kimberly Gross -65,13,F,99,17,Lisa Walker,Christina Schmidt -66,14,M,16,80,Monica Griffin,Isaac Alvarado -67,0,F,47,65,Joshua Santos,Samantha Bender -68,0,M,61,8,William Wade,Kristina Rice -69,2,F,80,22,Taylor Norton,Elizabeth Jenkins -70,3,M,43,86,Jorge Davis,Lori Morgan -71,1,F,65,19,Ashley Klein,Mr. Dale Peterson DVM -72,3,F,0,33,Karen Carroll,Jacob Perry -73,0,M,37,72,Jessica Wise,Jeremy Douglas -74,1,M,53,18,Lisa Brown,Matthew Strickland -75,0,M,3,53,Sandra Thompson,Jose Jones -76,12,F,97,29,Michael Moore,Toni Henderson -77,1,F,14,37,Bobby Ryan,Frank Vaughn -78,4,F,43,48,Miguel Ellis,Taylor Willis DVM -79,2,F,17,85,Calvin Maxwell,Douglas Hill -80,7,F,75,8,Sabrina Rowland,Thomas Fox -81,9,F,51,74,Dr. Melissa Marks,Noah Andersen -82,5,F,31,25,Cassandra Cabrera,Andrew Jones -83,11,F,21,90,Frederick Francis,Mr. David Patrick -84,8,M,83,26,Max Brock,Martha Thomas -85,7,M,81,86,Paula Clark,Luis Reed -86,6,M,62,89,Eric Rich,Tanya Lee -87,8,F,64,66,Carol Pineda,Marcus Daniels -88,6,M,13,60,Miss Katherine Warner,Jennifer Matthews -89,1,M,5,86,Dr. Cynthia Gonzalez,Robert Romero -90,6,F,11,84,Tyler Aguilar,Brian Ross -91,4,M,82,83,Miguel Ochoa,James Sherman -92,4,F,91,89,Francis Duran,Tonya Thornton -93,11,F,34,5,Troy Thompson,Kelly Wright -94,0,M,52,22,Matthew Mcdonald,Timothy Martinez -95,2,F,78,45,Andrea Ayers,Kimberly Thompson -96,11,M,79,77,Jenny Benton,Tanya Gibson -97,1,F,58,95,Mitchell Douglas,Leslie Huang -98,9,F,8,29,Raymond Wong,Andrew Mcdonald -99,7,M,52,83,Matthew Jones,Taylor Martinez \ No newline at end of file diff --git a/data/synthetic_data/test.csv b/data/synthetic_data/test.csv deleted file mode 100644 index e8f66d1..0000000 --- a/data/synthetic_data/test.csv +++ /dev/null @@ -1,101 +0,0 @@ -host_id,event_day,event_time,event_type,measurement_type,measurement_quantity,measurement_outcome,measurement_unit,inoculation_type,noculation_pathogen,inoculation_dose,inoculation_unit,treatment_type,treatment_dose,treatment_dose_unit -yM7,80,15:00,inoculation,inoculation,1.6759891207463555,true,mg,Lynn Ramirez,Kelly Franklin,2.3087254455124757,g,Krista Pena,7.007855312021169,mg -sD0,33,28:19,inoculation,treatment,8.725608223318273,true,mg,Thomas Martinez,James Deleon,8.900815274470958,ml,Troy Wang,12.322307670041532,g -eB1,6,15:59,inoculation,measurement,10.05228082311512,false,mg,Robert Forbes MD,Becky Hall,8.547248591965776,ml,Dr. Nancy Bryant,13.531149067334235,mg -tW0,78,24:00,treatment,inoculation,11.956506263671681,false,mg,Miguel Benton,Victoria Fuller,11.49544991338025,ml,Adam Bryant,0.6841937862800807,mg -gR0,23,20:09,treatment,treatment,4.373015028086345,true,mg,Carl Strickland,Sara Baker,12.05424328614304,ml,Tonya Morrison,8.667419094298207,mg -eL4,89,28:10,treatment,treatment,10.214818916516645,false,mg,Brianna Carr,David Burton,0.29731543473322075,mg,Justin Hudson,3.641968842946844,g -kK8,99,21:20,inoculation,treatment,8.78334708377446,true,g,Amy Boyle,Ivan Brewer,8.314788453578604,mg,Maxwell Mack,7.845800147069532,g -rT4,2,13:49,treatment,inoculation,1.2200794096664964,true,g,Scott Bird,Nichole King,2.3029964498457707,g,Lucas Gibson,11.624396213511819,mg -nU1,99,21:19,inoculation,treatment,3.276036593083101,true,g,Debbie Ramirez,Jessica Camacho,1.131891284559467,ml,Harold Johnson,0.3686565096566291,mg -jF2,90,15:19,inoculation,inoculation,5.384577484272684,false,g,Charles Benson,Jonathan Griffin,9.79541372874227,g,Timothy Garza,3.215385802321134,ml -rD8,44,26:20,treatment,measurement,6.330344829949023,false,g,Brandon Mccullough,Pamela Porter,11.086610410849758,g,Ms. Linda Giles,8.76084031952459,mg -oO7,92,17:29,treatment,measurement,3.6153553443182136,true,g,Courtney Sullivan,Sarah Fernandez,7.05770650637476,mg,Cody Campbell,0.37724483355354943,mg -lO0,61,10:40,treatment,treatment,6.201648745129848,false,ml,Annette Patterson,Christine Baird,6.323049564755213,mg,Amber Branch,0.5865710869538238,mg -aE9,83,19:20,treatment,inoculation,1.186441147441406,true,g,Robert Best,Austin Hill PhD,8.069321848527014,g,Keith Jensen,1.8955963091613437,ml -aP3,5,13:09,measurement,measurement,9.688488573983914,false,ml,Tyler Lopez,Joseph Rosales,2.6148399663695674,g,Taylor Patel,4.851690946226266,mg -yM3,31,21:39,inoculation,inoculation,1.8955386439519346,false,mg,Tina Sparks,Jordan Riggs,3.63630982618329,ml,Eric Allen,7.341339064662091,g -qV0,63,18:39,inoculation,treatment,5.533086882856628,false,ml,Rachel Page,Deborah Mason,4.827761844148744,g,Kathleen Smith,7.48168285577694,g -qN5,48,22:39,inoculation,treatment,0.7733929719821231,true,ml,Scott Dunn,Adam Hamilton,2.035283620273959,ml,Dr. Jamie Olson,14.987533132886467,g -eK1,38,28:29,measurement,inoculation,5.831090877641928,true,ml,Robert Sparks,Cameron Thompson,8.159793123825354,g,Caroline Martin,0.28233704633969847,ml -bI3,10,19:40,inoculation,inoculation,11.244187794380053,true,ml,Amanda Fowler,Paul Mercer,2.374819719396048,ml,Kevin Gallagher,9.46761288710587,mg -hW7,11,26:20,inoculation,treatment,10.373359463363775,false,mg,Nicole Mahoney,Kevin Wright,0.15752519852740732,mg,Tara Smith,11.648261114886523,mg -uL5,88,20:40,measurement,inoculation,0.6527839818250469,false,mg,Michael Ramos,Terry Cook,5.158002992360696,ml,Charles Reilly,6.048964367612999,ml -qA2,53,16:29,treatment,inoculation,14.985940977683022,true,mg,Tara Weber,Willie Johnson,4.608651366752896,ml,Anthony Martinez,11.140602943093358,ml -qJ3,12,15:30,inoculation,measurement,6.5645046596409165,false,g,Kimberly Page,Steven Norris,6.321029538090624,mg,Brenda Valentine,10.176166712713451,g -lH9,73,19:40,treatment,inoculation,6.142757490081908,true,ml,Cindy Graham,Richard Potter,7.825123390732208,g,Scott Wood,5.17619337896531,g -qI6,66,29:50,inoculation,inoculation,4.910709647256995,true,g,Melissa Woods,Ronald Ingram,8.536039716659124,mg,James Gray,4.000024707552826,mg -cE2,64,17:19,measurement,inoculation,10.398115252766146,false,ml,Kristen Liu,Wesley Cox,13.030776757919428,g,Joan Mendez,12.054811543255463,ml -wC9,57,19:09,inoculation,measurement,11.411394737029747,true,g,Tiffany Johnson,Julie Parker,9.723403474597111,g,John Delacruz,6.35683555578343,g -gC3,43,27:29,treatment,inoculation,2.538413081850246,false,g,Denise Morgan,Kathryn Davis,13.343265974594988,mg,Jonathan George,6.7863212985701935,mg -vB1,75,28:00,treatment,inoculation,13.12939920705536,false,mg,Ashley Russell,Carly Wolfe,0.9677091932016896,g,Brittney Willis,3.979069497654291,g -aP0,51,27:10,inoculation,measurement,5.859894822512535,false,mg,Darlene Thomas,Justin Johnson MD,12.8157869383758,mg,Patricia Romero PhD,1.5231989350940882,mg -kP6,23,21:19,treatment,measurement,12.975353503202625,true,mg,Melinda Curtis,Debra Weber,10.516983628646695,g,Natalie Adams DDS,3.6568420793600485,mg -eG9,17,26:00,measurement,treatment,11.641856509692028,false,mg,Alexander Tran,Chelsey French,2.5481020687602527,mg,Christopher Adams,4.9934361617251115,g -kA0,61,13:19,treatment,measurement,10.965445883937445,true,mg,John Mills MD,Brandy Jenkins,1.348269637101438,ml,Jessica Adkins,9.660906089730375,ml -sW7,65,10:09,treatment,inoculation,1.5999012622154836,false,ml,Michael Atkins,Cathy Ryan,13.863089866471967,ml,Melissa Wood,4.727059652756015,mg -yT6,26,15:00,treatment,measurement,0.04945106491675233,false,g,Kimberly Davis,Alan Stevens,10.150727248586389,g,Ernest Bailey,9.291720528393663,mg -pP7,16,19:10,treatment,inoculation,6.9458294597651955,true,ml,Kelly Blackwell,Suzanne Mills,0.18684801668785056,mg,Crystal Wall,7.999144087165612,mg -hO7,28,23:39,treatment,measurement,5.052935743590976,false,mg,Samantha White,Chelsea Watts,12.391394576490017,g,Steven Watts,2.224356443993213,g -xP7,74,21:09,measurement,inoculation,6.399330729503434,false,mg,Denise Turner,Crystal Frank,4.932897813837924,ml,Raymond Richardson,10.605079841395762,mg -mF5,62,16:49,measurement,measurement,13.163372292328539,false,g,Dorothy Wade,Brian Marquez,10.898437609749038,ml,Katrina Wilson,9.015301863812283,mg -zU6,7,20:59,inoculation,measurement,8.779572978068462,false,mg,Lauren Reed,Michael Love,1.7119822665543878,mg,Jeremy Harris,4.047604766406892,g -qK9,17,29:29,inoculation,measurement,6.660356562178204,true,mg,Roy Black,Daniel Chen,13.087400167547958,g,Jennifer Carson,11.723185752338015,mg -cP1,39,26:20,measurement,inoculation,4.087283517682177,false,mg,Kelsey Lucero,Garrett Chavez,2.457146795181395,mg,Lori Wagner,13.603843514760124,mg -jP2,9,10:40,treatment,treatment,12.721978899556667,true,mg,Deborah Johnson,Kayla Johnson,10.965148332243261,mg,Adam White,9.320631732354261,mg -lU3,97,17:49,inoculation,treatment,14.000079968362899,true,g,Tammy Taylor,Richard Mooney,9.21503887977481,ml,Mark Bush,7.197417845555084,g -eK9,0,22:19,treatment,inoculation,7.01473475350666,true,ml,Jason Patterson,Jason Chen,11.70275838837474,g,Cesar Yang,7.127770492095488,mg -sR7,24,18:40,measurement,inoculation,14.935018549969406,true,mg,Jonathan Thornton,Andrew Washington,3.4308287613333737,ml,Samuel Howell,12.379212754234361,g -rB6,74,13:19,measurement,treatment,10.29901821505296,true,g,Ethan Jordan,Lori Heath,7.747323082508601,mg,Michaela Williams,5.460602647957629,g -nH3,35,24:39,measurement,inoculation,0.2661200414654469,true,mg,Calvin Hicks,Michelle Patel,10.942885022873414,mg,Bethany Alexander,13.32318394836498,g -wZ6,82,22:49,treatment,measurement,9.673144302783928,false,mg,Frank Hicks,Daniel Leon,7.479462718586431,g,Wayne Rogers,5.587874964565684,g -uT8,1,18:10,measurement,treatment,9.737326819847477,true,g,Robert Smith,Jesse Adams,8.756923298162942,mg,Julia Gilbert,11.087475142920452,ml -oE9,82,24:00,treatment,inoculation,3.917211947041321,true,mg,Kevin Moss,Christine Jenkins,1.1901328996050864,ml,Theresa Estrada,14.787064102620601,mg -fT9,48,10:20,inoculation,inoculation,6.169602289755571,true,ml,Megan Boyd,Melissa Luna,0.37661373834515566,g,Carol Williams,1.144982751586578,mg -rT0,57,11:50,treatment,inoculation,4.814472344702807,true,g,Gregory Olson,Jennifer Adams,0.9515186315638552,mg,Christine Wright,5.138657357971258,g -xG7,60,27:50,treatment,treatment,1.4037628976919896,false,mg,Ryan Morton,Aaron Gonzalez,5.764931574564288,ml,Andrea Dougherty,7.07216532747624,mg -tB4,20,27:09,measurement,treatment,3.085074447097225,false,mg,Jeremy Lopez,Jesse Lowe,3.5078110349609553,ml,Ethan Wilcox,2.214447742509398,ml -nI8,27,13:20,inoculation,inoculation,4.713561855433578,true,ml,Laura Chapman,Morgan Khan,2.1044310879488877,mg,Emily Lindsey,13.629462223594858,ml -qK1,93,27:40,measurement,treatment,5.318668905942286,true,ml,Spencer Munoz,Sarah Williams,1.3831961338436338,g,Rebecca Harmon,9.722477663023867,ml -zY7,44,11:20,inoculation,measurement,6.222367385592106,true,g,Jessica Garcia,Tracy Calhoun,7.587560699181322,ml,Thomas Henry,10.415151134193072,ml -lM1,82,18:09,inoculation,measurement,10.743779578331889,true,ml,William Flores,David Lozano,12.432416733309546,mg,Dwayne Calderon,5.3981465123555195,mg -dH8,78,15:19,inoculation,treatment,7.6249124931926575,false,g,Susan Norton,Sarah Martinez,8.572481863011381,ml,Ann Hines,13.271963129482153,mg -hT8,66,21:29,treatment,measurement,6.66214898734545,false,g,Beth Ward DVM,Brian Johnson,1.7085369490518632,mg,Andrea Hodge,1.9308427159261505,g -vW4,28,17:40,treatment,treatment,12.038154070572682,false,g,Steven Boyle,Jacob Hill,0.2627222282756492,g,Mary Valencia PhD,3.3361054794008176,ml -mE7,60,18:59,treatment,measurement,10.075958954963449,false,ml,Kathleen Preston,Anna Mayo,2.4933867790933135,ml,Nathan Marquez,12.298070651880163,g -gP0,82,11:00,treatment,measurement,0.09973058077080432,false,mg,Cassandra Gilbert,Erin Douglas DDS,4.8496842769807404,ml,Austin Fisher,7.944961521954903,ml -xM2,64,19:50,inoculation,treatment,11.809301187407147,false,mg,Adam Chen,Alan Sanders,11.618100744069533,mg,Erica Walker,12.275615989840102,g -tY5,2,15:00,measurement,treatment,2.271160627112912,false,ml,Nancy Price,Thomas Holden,5.525092259242655,ml,Christine Boone,0.808970413965292,mg -uX0,28,13:00,inoculation,treatment,7.179513112552234,true,ml,Jennifer Reed,Veronica Andrews,6.275785709053466,ml,Michele Williams,11.412761853431874,ml -aB0,27,17:19,treatment,treatment,12.548674887100862,true,ml,Roger Clay,Christina Mendoza,6.260944295967511,ml,Jonathan Bullock,6.351280854141813,mg -aC4,23,21:09,inoculation,inoculation,6.510277782817226,true,ml,Nicole Blanchard,Elizabeth Patterson,7.496767812829162,mg,Madison Erickson,2.7132596071783857,g -sZ2,65,12:29,measurement,inoculation,7.882328394304386,true,mg,Shelly Simmons,Joseph Gilbert,0.6582219424341451,mg,Cynthia Kim,4.817433534785515,g -gA3,21,20:29,measurement,treatment,8.933169234125478,false,ml,Mark Carroll,Kevin Mayer,8.946057290350176,g,Jessica Schmidt,11.822315836298195,mg -rB9,99,17:30,inoculation,treatment,8.889061185469119,true,mg,Sarah Daniel,Richard Fletcher,0.7895161732365763,g,Tina Brady,5.866660939504006,ml -bD6,31,15:20,inoculation,treatment,9.533576600681922,true,g,Andrew Wright,Alexis Mitchell DVM,13.419656559679403,g,Kimberly Fernandez,13.90270712105909,ml -gO3,67,25:20,measurement,measurement,0.8703782372998847,false,ml,Karen Ross,Victor Perry,9.493814673737623,g,Christine Lawson,3.8854636521816497,mg -pP3,93,11:09,inoculation,measurement,6.586191092667379,false,g,Robert Brown,Raymond Mendez,3.737435961022407,ml,Anne Hughes,1.4916919251747403,mg -yB3,39,11:49,measurement,treatment,7.7516424924103084,true,g,Chad Mcclain,Phillip Bell,14.212880251657495,g,Danielle Jones,0.42195912925302836,g -pP6,73,24:59,inoculation,measurement,10.674783758781444,false,g,Christina Miller,Tina Stevens,5.009010978846403,mg,Brett Roberts,0.8183142050802206,g -vO9,72,21:10,treatment,inoculation,2.4968493921209998,false,ml,Martin Burns,Jessica Waters,2.3233419136941698,ml,Allen Walsh,0.15414933872545555,g -hI5,73,16:19,measurement,inoculation,7.192989505983701,true,mg,Justin Patterson Jr.,Aaron Lopez,12.977902964189981,mg,Christina Castro,14.434464557236053,mg -oE4,25,10:19,inoculation,measurement,2.2929926547800483,true,mg,Victor Cisneros,Christine Murray,12.593665711014044,ml,Justin Davis,1.429183618438703,g -gU5,1,22:19,measurement,inoculation,10.489774151926696,true,ml,George Kramer,Heidi Rivera,12.784556156334805,mg,Gregory White,5.076182628263347,mg -cP4,7,19:39,treatment,inoculation,1.0128941200876274,false,g,Andrea Thomas,Jesus Liu,0.38922101685689325,mg,Sarah Garrett,6.241553270649635,mg -lB6,45,15:59,treatment,treatment,3.695892296256549,false,ml,Norma Wells,Brenda Walter,14.975495406758347,mg,Victor Gonzalez,0.01340363773652653,mg -zI8,18,22:30,inoculation,treatment,0.5499203825492988,true,mg,Leah Perez,Laura Olson,9.152038457589784,ml,Diamond Brown,5.408092204687694,mg -yY6,84,20:20,inoculation,treatment,2.7934210209740193,true,mg,William Lopez,Maurice Stevenson,1.3260059013064374,mg,Derek Moore,5.495291987665244,g -iV7,66,12:00,measurement,inoculation,4.957196603179029,false,mg,Larry Nelson,Allison Cole,11.707213557931938,ml,Katherine Murphy,5.329685219991022,g -rZ7,92,20:20,inoculation,inoculation,0.6096324138867998,true,g,Dustin Brooks,Katherine Perry,7.103692042057635,mg,Tamara Lopez,1.8587942329010687,mg -xK2,34,18:10,treatment,inoculation,1.2804226967623262,true,g,Nicole Arroyo,Cole Phillips DDS,6.0318262830536025,mg,Edwin Strong,6.106478046813894,mg -wX0,69,27:39,measurement,measurement,10.63141540300464,false,ml,Dr. John Love PhD,Elizabeth Black,7.4188408531461505,g,Stephen Jones,0.7130997927431321,g -hN9,98,15:59,inoculation,inoculation,10.756536693544215,false,g,Cheryl Adams,Rebecca Bishop,2.1093230830052168,mg,Larry Moore,2.7536173708325693,ml -oQ0,98,28:29,measurement,measurement,5.481176472456799,true,g,Calvin Murphy,Luke Nunez,9.087431763016028,g,Dawn Bass,8.024466614387867,ml -wE0,49,23:00,inoculation,treatment,6.485272410248847,false,mg,Sarah Bryant,Victoria Gibson,1.745561759523338,mg,Teresa King,11.639968125601063,mg -bC4,81,17:40,measurement,measurement,9.149718739767911,true,mg,Sarah Clark,Joseph Wheeler,7.135054605082013,ml,Juan Lopez,11.102570644103368,g -oJ2,60,19:50,treatment,measurement,0.5931604043181143,false,ml,Ashley Townsend,Ashley Jones,9.132748113454365,mg,Michael Krause,6.319348301008125,ml -rJ3,39,23:20,inoculation,inoculation,11.873362741197957,false,ml,Diana Willis,Sheila Palmer,2.568532482532326,ml,Robert Costa,1.6917852263269773,ml -vS2,18,13:40,measurement,inoculation,9.77772108215598,false,mg,Claire Garcia,Carrie Walsh,2.655076506066336,g,Jeffrey Shaffer,5.4966913881118575,ml -lB3,62,22:59,inoculation,measurement,8.993578271099906,false,mg,Matthew Phillips,Deborah Hernandez,1.501654107194037,ml,Benjamin Hughes,9.118313589008647,mg -oI5,29,15:00,inoculation,treatment,2.1183587744132613,false,ml,Elizabeth Williams,Michael Ortega,14.30386841554045,ml,John Le,12.016579443179452,mg -tF1,60,22:10,measurement,measurement,3.704238800639361,false,mg,Andre Shaw,Madison Vincent,8.746218122503842,g,Jesus Park,14.210740842166777,g diff --git a/data/synthetic_data_2/environment.csv b/data/test_data/environment.csv similarity index 100% rename from data/synthetic_data_2/environment.csv rename to data/test_data/environment.csv diff --git a/data/synthetic_data_2/environment_events.csv b/data/test_data/environment_events.csv similarity index 100% rename from data/synthetic_data_2/environment_events.csv rename to data/test_data/environment_events.csv diff --git a/data/synthetic_data_2/host_events.csv b/data/test_data/host_events.csv similarity index 100% rename from data/synthetic_data_2/host_events.csv rename to data/test_data/host_events.csv diff --git a/data/synthetic_data_2/hosts.csv b/data/test_data/hosts.csv similarity index 100% rename from data/synthetic_data_2/hosts.csv rename to data/test_data/hosts.csv From 2eb28b66d4add666664651e45ade3a0454a1ec22 Mon Sep 17 00:00:00 2001 From: chstaiger Date: Mon, 25 Nov 2024 11:40:33 +0100 Subject: [PATCH 6/6] redirect notebook to test_data --- src/validation_pipeline.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/validation_pipeline.ipynb b/src/validation_pipeline.ipynb index f464dfb..a1965da 100644 --- a/src/validation_pipeline.ipynb +++ b/src/validation_pipeline.ipynb @@ -108,7 +108,7 @@ "metadata": {}, "outputs": [], "source": [ - "proj_data_path = Path.cwd().parent / \"data\" / \"synthetic_data_2\"\n", + "proj_data_path = Path.cwd().parent / \"data\" / \"test_data\"\n", "sep = \",\"" ] }, @@ -400,7 +400,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.4" + "version": "3.12.7" } }, "nbformat": 4,