diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml
new file mode 100644
index 0000000..c73ace5
--- /dev/null
+++ b/.github/workflows/testing.yaml
@@ -0,0 +1,34 @@
+name: Python
+
+on: [push, pull_request]
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        # os: [ubuntu-latest, windows-latest]
+        os: [ubuntu-latest]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip 
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install hatch
+      - name: Create Environment 
+        run: hatch env create 
+      - name: Test OSeMOSYS_step
+        run: hatch run test
+      # - name: Format OSeMOSYS_step
+      #   run: hatch fmt
+      # uncomment for auto-pushing to PyPI
+      # - name: Build OSeMOSYS_step
+      #   run: hatch build
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3a5623e..0489050 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,4 +171,4 @@ cython_debug/
 #.idea/
 
 # VS Code
-.vscode/
\ No newline at end of file
+.vscode/
diff --git a/README.md b/README.md
index e7013fb..a65b56a 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # OSeMOSYS step
-This repository contains a set of scripts to conduct model runs with limited 
+
+This repository contains a set of scripts to conduct model runs with limited
 foresight with OSeMOSYS models.
 
-# Directory Structure 
+# Directory Structure
 
-Before the workflow has been ran, the directory will look like what is shown below: 
+Before the workflow has been run, the directory will look like what is shown below:
 
 ```bash
 OSeMOSYS_STEP
@@ -34,19 +35,19 @@ OSeMOSYS_STEP
 ```
 
 ## `data`
-The `data/` directory holds information on the reference model, and each 
-scenario you want to run. All scenario information must be nested under a 
+The `data/` directory holds information on the reference model, and each
+scenario you want to run. All scenario information must be nested under a
 `scenarios/` subdirectory.
 
 ### `data/scenarios`
-The `data/scenarios` subdirectory holds information on the options (or decisions) 
-that the model can make. Within this directory, numerically ordered subdirectories 
-are created to hold information on each step. For example, if there are 5 steps 
-in the model run, there will be 5 subdirectories, each labeled 0 through 4. 
+The `data/scenarios` subdirectory holds information on the options (or decisions)
+that the model can make. Within this directory, numerically ordered subdirectories
+are created to hold information on each step. For example, if there are 5 steps
+in the model run, there will be 5 subdirectories, each labeled 0 through 4.
 
-Within each `data/scenarios/#` subdirectory, CSV files hold information 
-on the options that can be made. Each CSV file must follow the formatting 
-shown below. 
+Within each `data/scenarios/#` subdirectory, CSV files hold information
+on the options that can be made. Each CSV file must follow the formatting
+shown below.
 
 |             PARAMETER            | REGION | TECHNOLOGY | OPTION | YEAR | VALUE     |
 |:--------------------------------:|:------:|------------|--------|:----:|-----------|
@@ -61,9 +62,9 @@ shown below.
 | TotalAnnualMaxCapacityInvestment | UTOPIA | COAL       | 1      | 2049 | 0         |
 | TotalAnnualMaxCapacityInvestment | UTOPIA | COAL       | 1      | 2050 | 0         |
 
-Note, that the `OPTION` column will dictate whether the option is made or not. For each 
-model run, either all data identifed as the `0` option or the `1` option will be used, 
-not both. There can be as many options as the modeller desires. 
+Note, that the `OPTION` column will dictate whether the option is made or not. For each
+model run, either all data identifed as the `0` option or the `1` option will be used,
+not both. There can be as many options as the modeller desires.
 
 ## `model`
 
@@ -71,35 +72,35 @@ This directory houses the reference GNU MathProg OSeMOSYS model you are using
 
 ## `results`
 
-Contains CSV result files for each model run 
+Contains CSV result files for each model run
 
 ## `src`
 
-Contains all scripts 
+Contains all scripts
 
 ## `steps`
 
 Solver outputs for each CSV model run. If using GLPK, these will be result CSV
-files, if using a different solver, this will be a text file. 
+files, if using a different solver, this will be a text file.
 
-# Running Instructions 
+# Running Instructions
 
 ## Objective
-Run a 5 step model, over the horizon of 1990 to 2010 that makes investment decesions 
+Run a 5 step model, over the horizon of 1990 to 2010 that makes investment decesions
 about allowing investment in coal.
 
-## 1. Add the model file 
+## 1. Add the model file
 Drop in an osemsosys file (called `osemosys.txt`) into the `model/` directory
 
 ## 2. Add the base data file
 **NB:** Neither the model data nor the scenario data should make use of the parameter _TotalAnnualMaxCapacity_. It can cause problems when passing _NewCapacity_ from one step to the next step.
 
-Drop in a MathProg formatted data file in the `data/` folder. The data file 
+Drop in a MathProg formatted data file in the `data/` folder. The data file
 can be long formatted (otoole) or wide formatted (momani)
 
-## 3. Add in scenario data 
+## 3. Add in scenario data
 For the first step, add the file `data/scenarios/1/A.csv`, where one
-option allows investment in IMPHCO, and one options does not allow it. 
+option allows investment in IMPHCO, and one options does not allow it.
 
 |             PARAMETER            | REGION | TECHNOLOGY | OPTION | YEAR | VALUE     |
 |:--------------------------------:|:------:|------------|--------|:----:|-----------|
@@ -114,8 +115,8 @@ option allows investment in IMPHCO, and one options does not allow it.
 | TotalAnnualMaxCapacityInvestment | UTOPIA | IMPHCO1    | 1      | 2009 | 0         |
 | TotalAnnualMaxCapacityInvestment | UTOPIA | IMPHCO1    | 1      | 2010 | 0         |
 
-In this same step, but independent from the decision to invest in IMPHCO, we 
-want to also add the option to invest in importing uranium. Add the file 
+In this same step, but independent from the decision to invest in IMPHCO, we
+want to also add the option to invest in importing uranium. Add the file
 `data/scenarios/1/B.csv`
 
 |             PARAMETER            | REGION | TECHNOLOGY | OPTION | YEAR | VALUE     |
@@ -131,8 +132,8 @@ want to also add the option to invest in importing uranium. Add the file
 | TotalAnnualMaxCapacityInvestment | UTOPIA | IMPURN1    | 1      | 2009 | 0         |
 | TotalAnnualMaxCapacityInvestment | UTOPIA | IMPURN1    | 1      | 2010 | 0         |
 
-In the second step, through one decesion, we want to decide the allowable investment 
-in importing coal, importing RL1, and if RLu is allowed to run or not. Add the file 
+In the second step, through one decesion, we want to decide the allowable investment
+in importing coal, importing RL1, and if RLu is allowed to run or not. Add the file
 `data/scenarios/2/C.csv`
 
 |             PARAMETER                   | REGION | TECHNOLOGY | OPTION | YEAR | VALUE     |
@@ -168,20 +169,20 @@ in importing coal, importing RL1, and if RLu is allowed to run or not. Add the f
 | TotalTechnologyAnnualActivityUpperLimit | UTOPIA | RLu        | 1      | 2009 | 0         |
 | TotalTechnologyAnnualActivityUpperLimit | UTOPIA | RLu        | 1      | 2010 | 0         |
 
-## 4. Run the workflow 
-```bash 
+## 4. Run the workflow
+```bash
 cd src
-python main_ms.py --step_length 5 --input_data ../data/<datafile_name>.txt 
+python main_ms.py --step_length 5 --input_data ../data/<datafile_name>.txt
 ```
 
 ## 6. View Results
-Under the results folder, there should now be results for all the 
-permutations of options. 
+Under the results folder, there should now be results for all the
+permutations of options.
 
-For example, the results of implementing option 0 in scenario A, option 1 in 
+For example, the results of implementing option 0 in scenario A, option 1 in
 scenario B, and option 1 in scenario C are nested under the folder `results/1A0-1B1/2C1`.
 
-```bash 
+```bash
 OSeMOSYS_STEP
 ├── data
 ├── model
@@ -217,3 +218,53 @@ OSeMOSYS_STEP
 ├── src
 └── steps
 ```
+
+# Installation
+
+You can use pip to install the package directly from Github:
+
+    pip install git+https://github.com/KTH-dESA/OSeMOSYS_step.git@main#egg=osemosys_step
+
+While in development phase, you can test this like so:
+
+    pip install --dry-run git+https://github.com/KTH-dESA/OSeMOSYS_step.git@packaging#egg=osemosys_step
+
+Or you install a development version like so:
+
+    git clone https://github.com/KTH-dESA/OSeMOSYS_step.git osemosys_step
+    cd osemosys_step
+    pip install -e .
+
+# Development
+
+OSeMOSYS_step is packaged using [hatchling](https://hatch.pypa.io/latest/)
+
+Create the development environment:
+
+    hatch env create
+
+Run the tests:
+
+    hatch run test
+
+Run linting for style, typing and format:
+
+    hatch fmt
+
+The version number is taken from the git tag.  Before building and publishing the package, you should create a new annotated tag.
+
+First, check the previous tags:
+
+    git tag
+
+Then create a new annotated tag:
+
+    git tag -a v1.0 -m "First full release of OSeMOSYS Step"
+
+Build the package:
+
+    hatch build
+
+Publish the package to PyPI:
+
+    hatch publish
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..19459c4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,170 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "osemosys_step"
+dynamic = ["version"]
+description = 'The OSeMOSYS_step package expands the capabilities of OSeMOSYS by allowing for model runs under myopia with limited foresight and scenario tree model runs that are performed under myopia with limited foresight while considering provided decision options.'
+readme = "README.md"
+requires-python = ">=3.7"
+license = "MIT"
+keywords = []
+authors = [
+  { name = "Hauke Henke", email = "haukeh@kth.se" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.7",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+  "pandas",
+  "otoole>=1.1",
+  "snakemake",
+  "click",
+  "tqdm"
+  ]
+
+[project.urls]
+Documentation = "https://github.com/KTH-dESA/OSeMOSYS_step/osemosys-step#readme"
+Issues = "https://github.com/KTH-dESA/OSeMOSYS_step/issues"
+Source = "https://github.com/KTH-dESA/OSeMOSYS_step"
+
+[project.scripts]
+step = "osemosys_step.main:main"
+
+[tool.hatch.version]
+source = "vcs"
+
+[tool.hatch.envs.default]
+dependencies = [
+  "coverage[toml]>=6.5",
+  "pytest",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/osemosys_step"]
+
+[tool.hatch.envs.default.scripts]
+test = "pytest {args:tests}"
+test-cov = "coverage run -m pytest {args:tests}"
+cov-report = [
+  "- coverage combine",
+  "coverage report",
+]
+cov = [
+  "test-cov",
+  "cov-report",
+]
+
+[[tool.hatch.envs.all.matrix]]
+python = ["3.7", "3.8", "3.9", "3.10", "3.11"]
+
+[tool.hatch.envs.lint]
+detached = true
+dependencies = [
+  "black>=23.1.0",
+  "mypy>=1.0.0",
+  "ruff>=0.0.243",
+]
+[tool.hatch.envs.lint.scripts]
+typing = "mypy --install-types --non-interactive {args:src/osemosys_step tests}"
+style = [
+  "ruff {args:.}",
+  "black --check --diff {args:.}",
+]
+fmt = [
+  "black {args:.}",
+  "ruff --fix {args:.}",
+  "style",
+]
+all = [
+  "style",
+  "typing",
+]
+
+[tool.black]
+target-version = ["py37"]
+line-length = 120
+skip-string-normalization = true
+
+[tool.ruff]
+target-version = "py37"
+line-length = 120
+select = [
+  "A",
+  "ARG",
+  "B",
+  "C",
+  "DTZ",
+  "E",
+  "EM",
+  "F",
+  "FBT",
+  "I",
+  "ICN",
+  "ISC",
+  "N",
+  "PLC",
+  "PLE",
+  "PLR",
+  "PLW",
+  "Q",
+  "RUF",
+  "S",
+  "T",
+  "TID",
+  "UP",
+  "W",
+  "YTT",
+]
+ignore = [
+  # Allow non-abstract empty methods in abstract base classes
+  "B027",
+  # Allow boolean positional values in function calls, like `dict.get(... True)`
+  "FBT003",
+  # Ignore checks for possible passwords
+  "S105", "S106", "S107",
+  # Ignore complexity
+  "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
+]
+unfixable = [
+  # Don't touch unused imports
+  "F401",
+]
+
+[tool.ruff.isort]
+known-first-party = ["osemosys_step"]
+
+[tool.ruff.flake8-tidy-imports]
+ban-relative-imports = "all"
+
+[tool.ruff.per-file-ignores]
+# Tests can use magic values, assertions, and relative imports
+"tests/**/*" = ["PLR2004", "S101", "TID252"]
+
+[tool.coverage.run]
+source_pkgs = ["osemosys_step", "tests"]
+branch = true
+parallel = true
+omit = [
+  "src/osemosys_step/__about__.py",
+]
+
+[tool.coverage.paths]
+osemosys_step = ["src/osemosys_step", "*/osemosys_step/src/osemosys_step"]
+tests = ["tests", "*/osemosys_step/tests"]
+
+[tool.coverage.report]
+exclude_lines = [
+  "no cov",
+  "if __name__ == .__main__.:",
+  "if TYPE_CHECKING:",
+]
diff --git a/src/osemosys_step/__init__.py b/src/osemosys_step/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/data_split.py b/src/osemosys_step/data_split.py
similarity index 80%
rename from src/data_split.py
rename to src/osemosys_step/data_split.py
index 68d55fa..e52c6e9 100644
--- a/src/data_split.py
+++ b/src/osemosys_step/data_split.py
@@ -3,60 +3,30 @@
 import sys
 import pandas as pd
 import math
-from otoole import ReadDatafile, ReadCsv
-from otoole import WriteCsv
-from otoole import Context
 from typing import Dict, Tuple, List, Any
-from pathlib import Path 
-import utils
+from pathlib import Path
+from . import utils
 import logging
 
 logger = logging.getLogger(__name__)
 
-# def datafile_to_csv(datafile: str, csv_dir: str, config: Dict[str,Any]) -> None:
-#     """Converts datafile to folder of csvs
-    
-#     Args:
-#         datafile: str
-#             Path to datafile 
-#         csv_dir: str
-#             Path to directory of csv folder 
-#         config: Dict[str,Any]
-#             otoole configuration data
-#     """
-#     reader = ReadDatafile(user_config=config)
-#     writer = WriteCsv(user_config=config)
-#     converter = Context(read_strategy=reader, write_strategy=writer)
-#     converter.convert(datafile, csv_dir)
-    
-# def read_csv(csv_dir: str, config: Dict[str,Any], data: bool = True) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:
-#     """Reads in csv data using otoole
-    
-#     Returns: 
-#         Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]
-#             First dictionary is the data 
-#             Second dictionary is the default values
-#     """
-#     reader = ReadCsv(user_config=config)
-#     return reader.read(filepath=csv_dir)
-
 def get_step_data(data: Dict[str, pd.DataFrame], years: List[int]) -> Dict[str, pd.DataFrame]:
-    """Filter otoole data based on years 
-    
-    Note that the data is the same format as otoole; mulitindex dataframe 
-    
+    """Filter otoole data based on years
+
+    Note that the data is the same format as otoole; mulitindex dataframe
+
     Args:
         data: Dict[str, pd.DataFrame]
-            Complete set of reference data 
+            Complete set of reference data
         years: List[int]
-            years to filter over 
-            
+            years to filter over
+
     Returns:
         Dict[str, pd.DataFrame]
-            Filtered data over the years 
+            Filtered data over the years
     """
     out = {}
-    
+
     for name, df in data.items():
         if df.empty:
             out[name] = df
@@ -69,13 +39,13 @@ def get_step_data(data: Dict[str, pd.DataFrame], years: List[int]) -> Dict[str,
             out[name] = df.loc[df["VALUE"].isin(years)]
         else:
             out[name] = df
-            
-    return out 
+
+    return out
 
 # Function to calculate end of model
 def get_end_model(m_start: int, m_step_size: int, last_yr_model: int, m_foresight = None):
     """Determines the last year of a step model
-    
+
     Args:
         m_start: int
             Start year of model
@@ -88,9 +58,9 @@ def get_end_model(m_start: int, m_step_size: int, last_yr_model: int, m_foresigh
 
     Returns:
         e_m: int
-            Last year of step model 
+            Last year of step model
     """
-    
+
     if not m_foresight == None:
         if not (m_start + m_step_size + int(m_foresight)) > last_yr_model:
             e_m = m_start + m_step_size + int(m_foresight)
@@ -107,21 +77,21 @@ def get_end_model(m_start: int, m_step_size: int, last_yr_model: int, m_foresigh
 
 # Function to run the script
 def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight = None) -> Tuple[Dict[int, List[int]], Dict[int, List[int]], int]:
-    """Reads in and splits data for steps 
-    
-    Args: 
+    """Reads in and splits data for steps
+
+    Args:
         data: Dict[str, pd.DataFrame]
-            otoole internal datastore structure 
+            otoole internal datastore structure
         step_size: List[int]
-            Years in each step. If one value provided, equal step sizes. If 
-            multiple values provided, the first values represents the first 
+            Years in each step. If one value provided, equal step sizes. If
+            multiple values provided, the first values represents the first
             step, with the remaining step sizes being the second value
         foresight: int or None
-            Optional arguemnt in case the user specifies the foresight 
+            Optional arguemnt in case the user specifies the foresight
             horizon, i.e., the years beyond the actual step years.
-    
+
     Returns:
-        actual_years_per_step: Dict 
+        actual_years_per_step: Dict
             {step: actual years in step}
             Actual years per step (ie. 1995-2000 for a 5yr step)
         model_years_per_step: Dict {step: modelled years in step}
@@ -129,32 +99,32 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
         full_steps: int
             Number of full steps in model run indexed from zero
     """
-    
+
     # Derive information on modelling period
     m_years = data['YEAR']["VALUE"].to_list() # modelled years
-    l_year = max(m_years) # last year modelled 
+    l_year = max(m_years) # last year modelled
     n_years = len(m_years) # number of years
 
     if len(step_size) < 2:
         n_steps = n_years / step_size[0]
     else:
         n_steps = 1 + (n_years - step_size[0]) / step_size[1]
-    all_steps = math.ceil(n_steps)  
+    all_steps = math.ceil(n_steps)
     full_steps = math.floor(n_steps) # the last step will often be cut short
-    
+
     model_years_per_step = {} # actual years plus extra at end
-    actual_years_per_step = {} # actual years per step 
-    
-    # parse out data based on number of years 
+    actual_years_per_step = {} # actual years per step
+
+    # parse out data based on number of years
     step_num = 0
     if len(step_size) < 2:
-        
+
         for step_num in range(all_steps):
 
             start = m_years[0] + step_size[0] * step_num
-            
+
             if step_num < full_steps:
-                
+
                 end_actual = start + step_size[0]
 
                 if foresight == None:
@@ -162,21 +132,21 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
 
                 else:
                     end_model = get_end_model(start, step_size[0], l_year, foresight)
-                
+
             else:
                 end_model = m_years[-1] + 1
                 end_actual = m_years[-1] + 1
-                
+
             model_step_years = [y for y in m_years if y in range(start, end_model)]
             actual_step_years = [y for y in m_years if y in range(start, end_actual)]
-            
+
             model_years_per_step[step_num] = model_step_years
             actual_years_per_step[step_num] = actual_step_years
 
     else:
         for step_num in range(all_steps):
-            
-            if step_num == 0: 
+
+            if step_num == 0:
                 start = m_years[0]
                 end_actual = start + step_size[0]
 
@@ -197,28 +167,28 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
                 start =  m_years[0] + step_size[0] + (step_num - 1) * step_size[1]
                 end_actual = m_years[-1]
                 end_model = m_years[-1]
-                
+
             step_years_model = [y for y in m_years if y in range(start, end_model)]
             step_years_actual = [y for y in m_years if y in range(start, end_actual)]
-                
+
             model_years_per_step[step_num] = step_years_model
             actual_years_per_step[step_num] = step_years_actual
 
-    # retun (all_steps-1) beacause indexing of steps starts at 0 
+    # retun (all_steps-1) beacause indexing of steps starts at 0
     return actual_years_per_step, model_years_per_step, (all_steps - 1)
 
 
 # def split_data_old(datafile: str, step_size: List[int]) -> Tuple[Dict, int]:
-#     """Reads in and splits data for steps 
-    
-#     Args: 
+#     """Reads in and splits data for steps
+
+#     Args:
 #         datafile: str
 #             Path to directory
 #         step_size: List[int]
-#             Years in each step. If one value provided, equal step sizes. If 
-#             multiple values provided, the first values represents the first 
+#             Years in each step. If one value provided, equal step sizes. If
+#             multiple values provided, the first values represents the first
 #             step, with the remaining step sizes being the second value
-    
+
 #     Returns:
 #         actual_years_per_step: Dict {step: actual years in step}
 #             Actual years per step (ie. 1995-2000 for a 5yr step)
@@ -227,17 +197,17 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
 #         full_steps: int
 #             Number of full steps in model run indexed from zero
 #     """
-    
-#     # check for directory structure 
+
+#     # check for directory structure
 #     data_dir = Path(datafile).parents[0]
 #     utils.check_for_directory(data_dir)
-    
+
 #     # Create folder of csvs from datafile
 #     csv_dir = Path(data_dir, "data")
-#     config_path = Path(data_dir, "otoole_config.yaml") # chnage this to an input 
+#     config_path = Path(data_dir, "otoole_config.yaml") # chnage this to an input
 #     config = utils.read_otoole_config(str(config_path))
 #     datafile_to_csv(str(datafile), str(csv_dir), config)
-    
+
 #     # Derive information on modelling period
 #     m_period = pd.read_csv(Path(csv_dir, "YEAR.csv"))
 #     n_years = len(m_period.index)
@@ -247,15 +217,15 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
 #         n_steps = 1 + (n_years - step_size[0]) / step_size[1]
 #     full_steps = math.floor(n_steps)
 #     all_steps = math.ceil(n_steps)
-    
+
 #     # Read in reference csv data
 #     otoole_reader = read_csv(str(csv_dir), config)
 #     data = otoole_reader[0]
 #     default_values = otoole_reader[1]
 #     model_years_per_step = {} # actual years plus extra at end
-#     actual_years_per_step = {} # actual years per step 
-    
-#     # parse out data based on number of years 
+#     actual_years_per_step = {} # actual years per step
+
+#     # parse out data based on number of years
 #     i = 0
 #     if len(step_size) < 2:
 #         for i in range(all_steps):
@@ -296,7 +266,7 @@ def split_data(data: Dict[str, pd.DataFrame], step_size: List[int], foresight =
 #             step_data = get_step_data(data, step_years_model)
 #             write_csv(step_data, default_values, str(Path(data_dir, f"data_{i}")), config)
 #             logger.info(f"Wrote data for step {i}")
-                
+
 #     return actual_years_per_step, model_years_per_step, full_steps
 
 if __name__ == '__main__':
diff --git a/src/main.py b/src/osemosys_step/main.py
similarity index 84%
rename from src/main.py
rename to src/osemosys_step/main.py
index 276dc4c..9b5382f 100644
--- a/src/main.py
+++ b/src/osemosys_step/main.py
@@ -1,91 +1,91 @@
 """Main entry point for the script
 
-The main function of main_ms takes always three inputs and can take the optional 
-input solver. The three needed inputs are the path to the datafile of the model, 
-the step length - either an integer in case the step length is always the same 
-or a list of two integers, the first indicating the length of the first step and 
+The main function of main_ms takes always three inputs and can take the optional
+input solver. The three needed inputs are the path to the datafile of the model,
+the step length - either an integer in case the step length is always the same
+or a list of two integers, the first indicating the length of the first step and
 the second of the remaining steps - and the path to the folder with the csv files
-containing the data for the parameter to varied between scenarios. The solver can 
+containing the data for the parameter to varied between scenarios. The solver can
 be indicate in the following way 'solver=gurobi'
 """
 
 import click
-import data_split as ds
+from . import data_split as ds
 import os
 from pathlib import Path
 import pandas as pd
 import shutil
-from typing import Dict, List
-import utils
-import main_utils as mu
-import numpy as np
-import preprocess_data 
-import solve
+from . import utils
+from . import main_utils as mu
+from . import preprocess_data
+from . import solve
 from tqdm import tqdm
-import subprocess
-import yaml
 import logging
 import sys
 import glob
+import snakemake 
 
+from otoole import read, write
 
 logger = logging.getLogger(__name__)
 
 @click.command()
-@click.option("--step_length", required=True, multiple=True, 
+@click.option("--step_length", required=True, multiple=True,
               help="""
-              Provide an integer to indicate the step length, e.g. '5' for 
-              five year steps. One can provide the parameter also twice, for 
-              example if the first step shall be one year and all following five 
+              Provide an integer to indicate the step length, e.g. '5' for
+              five year steps. One can provide the parameter also twice, for
+              example if the first step shall be one year and all following five
               years one would enter '--step_length 1 --step_length 5'
               """)
-@click.option("--input_data", required=True, default= '../data/utopia.txt', 
+@click.option("--input_data", required=True, default= '../data/utopia.txt',
               help="The path to the input datafile. relative from the src folder, e.g. '../data/utopia.txt'")
-@click.option("--solver", default="cbc", 
+@click.option("--solver", default="cbc",
               help="Available solvers are 'glpk', 'cbc', and 'gurobi'. Default is 'cbc'")
-@click.option("--cores", default=1, show_default=True, 
+@click.option("--cores", default=1, show_default=True,
               help="Number of cores snakemake is allowed to use.")
 @click.option("--foresight", default=None,
               help="""Allows the user to indicated the number of years of foresight,
                 i.e., beyond the years in a step.
                 """)
-@click.option("--path_param", default=None, 
-              help="""If the scenario data for the decisions between the steps is 
-              saved elsewhere than '../data/scenarios/' on can use this option to 
+@click.option("--path_param", default=None,
+              help="""If the scenario data for the decisions between the steps is
+              saved elsewhere than '../data/scenarios/' on can use this option to
               indicate the path.
               """)
 def main(input_data: str, step_length: int, path_param: str, cores: int, solver=None, foresight=None):
     """Main entry point for workflow"""
 
     ##########################################################################
-    # Setup dirctories
+    # Setup directories
     ##########################################################################
-    
-    data_dir = Path("..", "data")
-    step_dir = Path("..", "steps")
-    results_dir = Path("..", "results")
-    model_dir = Path("..", "model")
-    logs_dir = Path("..", "logs")
-    
+
+    # Note that when running from the command line entry point, these paths will be relative
+    # to the local path from which the command is run.
+    data_dir = Path("data")
+    step_dir = Path("steps")
+    results_dir = Path("results")
+    model_dir = Path("model")
+    logs_dir = Path("logs")
+
     for f in glob.glob(str(logs_dir / "*.log")):
         os.remove(f)
     logging.basicConfig(filename=str(Path(logs_dir, "log.log")), level=logging.WARNING)
-    
+
     ##########################################################################
     # Remove previous run data
     ##########################################################################
-    
+
     for dir in glob.glob(str(data_dir / "data*/")):
         # remove both "data/" and "data_*/" folders
         shutil.rmtree(dir)
     utils.check_for_directory(Path(data_dir, "data"))
-        
+
     for dir in glob.glob(str(data_dir / "step_*/")):
         shutil.rmtree(dir)
-        
+
     for dir in glob.glob(str(results_dir / "*/")):
         shutil.rmtree(dir)
-        
+
     for dir in glob.glob(str(step_dir / "*/")):
         shutil.rmtree(dir)
 
@@ -93,44 +93,43 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
         shutil.rmtree(str(Path(logs_dir, "solves")))
 
     ##########################################################################
-    # Setup data and folder structure 
+    # Setup data and folder structure
     ##########################################################################
-    
+
     # Create scenarios folder
     if path_param:
         scenario_dir = Path(path_param)
     else:
         scenario_dir = Path(data_dir, "scenarios")
-        
-    # format step length 
+
+    # format step length
     step_length = utils.format_step_input(step_length)
 
     # Create folder of csvs from datafile
     otoole_csv_dir = Path(data_dir, "data")
     otoole_config_path = Path(data_dir, "otoole_config.yaml")
-    otoole_config = utils.read_otoole_config(str(otoole_config_path))
-    utils.datafile_to_csv(str(input_data), str(otoole_csv_dir), otoole_config)
+    utils.datafile_to_csv(str(input_data), str(otoole_csv_dir), otoole_config_path)
 
-    # get step length parameters 
-    otoole_data, otoole_defaults = utils.read_csv(str(otoole_csv_dir), otoole_config)
+    # get step length parameters
+    otoole_data, otoole_defaults = read(otoole_config_path, "csv", str(otoole_csv_dir))
     if not foresight==None:
         actual_years_per_step, modelled_years_per_step, num_steps = ds.split_data(otoole_data, step_length, foresight=foresight)
     else:
         actual_years_per_step, modelled_years_per_step, num_steps = ds.split_data(otoole_data, step_length)
 
-    # write out original parsed step data 
+    # write out original parsed step data
     for step, years_per_step in modelled_years_per_step.items():
         step_data = ds.get_step_data(otoole_data, years_per_step)
-        utils.write_csv(step_data, otoole_defaults, str(Path(data_dir, f"data_{step}")), otoole_config)
+        write(otoole_config_path, "csv", str(Path(data_dir, f"data_{step}")), step_data, otoole_defaults)
         logger.info(f"Wrote data for step {step}")
 
     # dictionary for steps with new scenarios
     steps = mu.get_step_data(str(scenario_dir)) # returns Dict[int, Dict[str, pd.DataFrame]]
-    
+
     # get option combinations per step
     step_options = mu.get_options_per_step(steps) # returns Dict[int, List[str]]
     step_options = mu.add_missing_steps(step_options, num_steps)
-    step_options = mu.append_step_num_to_option(step_options) 
+    step_options = mu.append_step_num_to_option(step_options)
 
     # create option directores in data/
     mu.create_option_directories(str(data_dir), step_options, step_directories=True)
@@ -139,7 +138,7 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
     if not step_dir.exists():
         step_dir.mkdir()
     mu.create_option_directories(str(step_dir), step_options, step_directories=True)
-    
+
     # create option directories in results/
     if not results_dir.exists():
         results_dir.mkdir()
@@ -147,38 +146,38 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
     if not utils.check_for_subdirectory(results_dir):
         all_res_dir = Path(results_dir, 'the_scen')
         all_res_dir.mkdir(exist_ok=True)
-    
+
     # copy over step/scenario/option data
     mu.copy_reference_option_data(src_dir=data_dir, dst_dir=data_dir, options_per_step=step_options)
 
     ##########################################################################
     # Apply options to input data
     ##########################################################################
-    
+
     step_option_data = mu.get_option_data_per_step(steps) # {int, Dict[str, pd.DataFrame]}
     option_data_by_param = mu.get_param_data_per_option(step_option_data) # Dict[str, Dict[str, pd.DataFrame]]
 
     for step_num in range(0, num_steps + 1):
-        
+
         step_dir_number = Path(data_dir, f"step_{step_num}")
-        
+
         # get grouped list of options to apply - ie. [A0-B1, C0]
-        
+
         for option_dir in utils.get_subdirectories(str(step_dir_number)):
             grouped_options_to_apply = Path(option_dir).parts
             parsed_options_to_apply = []
-            
+
             # get parsed list of options to apply - ie. [A0, B1, C0]
-            
+
             for grouped_option_to_apply in grouped_options_to_apply:
                 if grouped_option_to_apply in ["..", "data", f"step_{step_num}"]:
                     continue
                 parsed_options = grouped_option_to_apply.split("-")
                 for parsed_option_to_apply in parsed_options:
                     parsed_options_to_apply.append(parsed_option_to_apply)
-            
+
             #  at this point, parsed_options_to_apply = [A0, B1, C0]
-            
+
             for option_to_apply in parsed_options_to_apply:
                 for param, param_data in option_data_by_param[option_to_apply].items():
                     path_to_data = Path(option_dir, f"{param}.csv")
@@ -186,13 +185,13 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                     param_data_year_filtered = param_data.loc[param_data["YEAR"].isin(modelled_years_per_step[step_num])].reset_index(drop=True)
                     new = mu.apply_option_data(original, param_data_year_filtered)
                     new.to_csv(path_to_data, index=False)
- 
+
     ##########################################################################
     # Loop over steps
     ##########################################################################
- 
+
     csv_dirs = mu.get_option_combinations_per_step(step_options)
-    
+
     for step, options in tqdm(csv_dirs.items(), total=len(csv_dirs), desc="Building and Solving Models", bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}'):
 
         ######################################################################
@@ -203,7 +202,7 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
             csvs = Path(data_dir, f"step_{step}")
             data_file = Path(step_dir, f"step_{step}", "data.txt")
             data_file_pp = Path(step_dir, f"step_{step}", "data_pp.txt")
-            mu.create_datafile(csvs, data_file, otoole_config)
+            mu.create_datafile(csvs, data_file, otoole_config_path)
             preprocess_data.main("otoole", str(data_file), str(data_file_pp))
         else:
             for option in options:
@@ -212,17 +211,17 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                 for each_option in option:
                     csvs = csvs.joinpath(each_option)
                     data_file = data_file.joinpath(each_option)
-                if not data_file.exists(): 
+                if not data_file.exists():
                     logger.warning(f"{str(data_file)} not created")
                     # failed = True
                 else:
-                    data_file_pp = data_file.joinpath("data_pp.txt") # preprocessed 
+                    data_file_pp = data_file.joinpath("data_pp.txt") # preprocessed
                     data_file = data_file.joinpath("data.txt") # need non-preprocessed for otoole results
-                    mu.create_datafile(csvs, data_file, otoole_config)
+                    mu.create_datafile(csvs, data_file, otoole_config_path)
                     preprocess_data.main("otoole", str(data_file), str(data_file_pp))
-                    
+
         ######################################################################
-        # Create LP file 
+        # Create LP file
         ######################################################################
 
         osemosys_file = Path(model_dir, "osemosys.txt")
@@ -231,19 +230,19 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
         if not options:
             lp_file = Path(step_dir, f"step_{step}", "model.lp")
             datafile = Path(step_dir, f"step_{step}", "data_pp.txt")
-            lp_log_dir = Path("..", "logs", "solves", f"step_{step}")
+            lp_log_dir = Path("logs", "solves", f"step_{step}")
             lp_log_dir.mkdir(parents=True, exist_ok=True)
             lp_log_file = Path(lp_log_dir,"lp.log")
 
             exit_code = solve.create_lp(str(datafile), str(lp_file), str(osemosys_file), str(lp_log_file))
             if exit_code == 1:
-                logger.warning(f"{str(lp_file)} could not be created")
+                logger.error(f"{str(lp_file)} could not be created")
                 failed_lps.append(lp_file)
         else:
             for option in options:
                 lp_file = Path(step_dir, f"step_{step}")
-                datafile = Path(step_dir, f"step_{step}") 
-                lp_log_dir = Path("..", "logs", "solves", f"step_{step}")
+                datafile = Path(step_dir, f"step_{step}")
+                lp_log_dir = Path("logs", "solves", f"step_{step}")
                 lp_log_file = Path(lp_log_dir,"lp.log")
                 for each_option in option:
                     lp_file = lp_file.joinpath(each_option)
@@ -255,20 +254,20 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                 lp_log_file = Path(lp_log_dir,"lp.log")
                 exit_code = solve.create_lp(str(datafile), str(lp_file), str(osemosys_file), str(lp_log_file))
                 if exit_code == 1:
-                    logger.warning(f"{str(lp_file)} could not be created")
+                    logger.error(f"{str(lp_file)} could not be created")
                     failed_lps.append(lp_file)
 
         ######################################################################
-        # Remove failed builds 
+        # Remove failed builds
         ######################################################################
 
         for failed_lp in failed_lps:
-            
-            # remove the step folder 
+
+            # remove the step folder
             directory_path = Path(failed_lp).parent
             if directory_path.exists():
                 shutil.rmtree(str(directory_path))
-            
+
             # remove the corresponding folder in results/
             result_options = []
             while directory_path.name != f"step_{step}":
@@ -278,22 +277,22 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
             if result_option_path == results_dir:
                 logger.error("Top level run failed :(")
                 for item in result_option_path.glob('*'):
-                    if not item == ".gitignore":
+                    if not item.name == ".gitkeep":
                         shutil.rmtree(item)
                 sys.exit()
             elif result_option_path.exists():
                 shutil.rmtree(str(result_option_path))
 
         ######################################################################
-        # Solve the model 
+        # Solve the model
         ######################################################################
-        
-        # get lps to solve 
-    
+
+        # get lps to solve
+
         lps_to_solve = []
-        
+
         if not options:
-            lp_file = Path(step_dir, f"step_{step}", "model.lp")
+            lp_file = Path("..", "..", step_dir, f"step_{step}", "model.lp")
             sol_dir = Path(step_dir, f"step_{step}")
             lps_to_solve.append(str(lp_file))
         else:
@@ -304,40 +303,30 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                     lp_file = lp_file.joinpath(each_option)
                 lp_file = lp_file.joinpath("model.lp")
                 if lp_file.exists():
+                    lp_file = Path("..", "..", lp_file)
                     lps_to_solve.append(str(lp_file))
-                
-        # create a config file for snakemake 
-        
-        config_path = Path(data_dir, "config.yaml")
-        config_data = {"files":lps_to_solve}
-        if not solver:
-            config_data["solver"] = "cbc"
-        else:
-            config_data["solver"] = solver
-        
-        if config_path.exists():
-            config_path.unlink()
-            
-        with open(str(config_path), 'w') as file:
-            yaml.dump(config_data, file)
-
-        # run snakemake 
-        
+
+        # run snakemake
+
         #######
         # I think the multiprocessing library may be a better option then this
         # since snakemake is a little overkill for running a single function
         # when the goal is to just parallize multiple function calls
         #######
-        
-        cmd = f"snakemake --cores {cores} --keep-going"
-        subprocess.run(cmd, shell = True, capture_output = True)
-        
+
+        snakemake.snakemake(
+            "src/osemosys_step/snakefile", 
+            config = {"solver":solver, "files":lps_to_solve},
+            cores = cores,
+            keepgoing=True
+        )
+
         ######################################################################
         # Check for solutions
         ######################################################################
 
         failed_sols = []
-        
+
         if not options:
             sol_file = Path(step_dir, f"step_{step}", "model.sol")
             if not sol_file.exists():
@@ -351,7 +340,9 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
             elif solver == "gurobi":
                 if solve.check_gurobi_feasibility(str(sol_file)) == 1:
                     failed_sols.append(str(sol_file))
-                    
+            elif solver == "cplex":
+                print("CPLEX solution not checked")
+
         else:
             for option in options:
                 sol_file = Path(step_dir, f"step_{step}")
@@ -369,28 +360,30 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                 elif solver == "gurobi":
                     if solve.check_gurobi_feasibility(str(sol_file)) == 1:
                         failed_sols.append(str(sol_file))
+                elif solver == "cplex":
+                    print("CPLEX solution not checked")
 
         ######################################################################
-        # Remove failed solves 
+        # Remove failed solves
         ######################################################################
 
         if failed_sols:
             for failed_sol in failed_sols:
-                
+
                 logger.warning(f"Model {failed_sol} failed solving")
-                
+
                 # get failed options
                 failed_options = utils.get_options_from_path(failed_sol, ".sol") # returns ["1E0-1C0", "2C1"]
-                
-                # remove options from results 
+
+                # remove options from results
                 result_option_path = Path(results_dir).joinpath(*failed_options)
                 if result_option_path == results_dir:
-                    logger.error("All runs failed")
-                    sys.exit("All runs failed :(")
+                    logger.error("All runs failed, quitting...")
+                    sys.exit()
                 elif result_option_path.exists():
                     shutil.rmtree(str(result_option_path))
-                    
-                # remove options from current and future steps 
+
+                # remove options from current and future steps
                 step_to_delete = step
                 while step_to_delete <= num_steps:
                     step_option_path = Path(step_dir, f"step_{step_to_delete}").joinpath(*failed_options)
@@ -401,16 +394,16 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
         ######################################################################
         # Generate result CSVs
         ######################################################################
-        if not solver == "glpk": #csvs already created 
+        if not solver == "glpk": #csvs already created
             if not options:
                 sol_dir = Path(step_dir, f"step_{step}")
                 if sol_dir.exists():
                     sol_file = Path(sol_dir, "model.sol")
                     data_file = Path(sol_dir, "data.txt")
                     solve.generate_results(
-                        sol_file=str(sol_file), 
-                        solver=solver, 
-                        config=otoole_config, 
+                        sol_file=str(sol_file),
+                        solver=solver,
+                        config=otoole_config_path,
                         data_file=str(data_file)
                     )
             else:
@@ -422,16 +415,16 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                         sol_file = Path(sol_dir, "model.sol")
                         data_file = Path(sol_dir, "data.txt")
                         solve.generate_results(
-                            sol_file=str(sol_file), 
-                            solver=solver, 
-                            config=otoole_config, 
+                            sol_file=str(sol_file),
+                            solver=solver,
+                            config=otoole_config_path,
                             data_file=str(data_file)
                         )
- 
+
         ######################################################################
-        # Save Results 
+        # Save Results
         ######################################################################
-        
+
         if not options:
             # apply data to all options
             sol_results_dir = Path(step_dir, f"step_{step}", "results")
@@ -455,25 +448,25 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
 
         else:
             for option in options:
-                
-                # get top level result paths 
+
+                # get top level result paths
                 sol_results_dir = Path(step_dir, f"step_{step}")
                 dst_results_dir = results_dir
-                
-                # apply max option level for the step 
+
+                # apply max option level for the step
                 for each_option in option:
                     sol_results_dir = sol_results_dir.joinpath(each_option)
                     dst_results_dir = dst_results_dir.joinpath(each_option)
-                    
-                if not dst_results_dir.exists(): # failed solve 
+
+                if not dst_results_dir.exists(): # failed solve
                     continue
-                    
+
                 # find if there are more nested options for each step
                 dst_result_subdirs = utils.get_subdirectories(str(dst_results_dir))
                 if not dst_result_subdirs:
                     dst_result_subdirs = [dst_results_dir]
-                
-                # copy results 
+
+                # copy results
                 sol_results_dir = Path(sol_results_dir, "results")
                 for result_file in sol_results_dir.glob("*"):
                     for dst_results_dir in dst_result_subdirs:
@@ -490,44 +483,44 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
         ######################################################################
         # Update data for next step
         ######################################################################
-        
+
         # skip on last step
         if step + 1 > num_steps:
             continue
-            
+
         step_dir_data = Path(data_dir, f"step_{step}")
-        
+
         options_next_step = csv_dirs[step + 1]
-        
-        # no options in current step or next step 
-        if not options_next_step: 
+
+        # no options in current step or next step
+        if not options_next_step:
             logger.info(f"Step {step} does not have options, and step {step + 1} does not have options")
-            
-            # Get updated residual capacity values 
+
+            # Get updated residual capacity values
             step_dir_results = Path(step_dir, f"step_{step}", "results")
-            
+
             old_res_cap = mu.get_res_cap_next_steps(step, num_steps, data_dir, actual_years_per_step)
 
             op_life = pd.read_csv(str(Path(step_dir_data, "OperationalLife.csv")))
             new_cap = pd.read_csv(str(Path(step_dir_results, "NewCapacity.csv")))
-            
+
             res_cap = mu.update_res_capacity(
                 res_capacity=old_res_cap,
                 op_life=op_life,
                 new_capacity=new_cap,
                 step_years=actual_years_per_step[step]
             )
-            
+
             # overwrite residual capacity values for all subsequent steps
             next_step = step + 1
             while next_step < num_steps + 1:
 
                 step_res_cap = res_cap.loc[res_cap["YEAR"].isin(modelled_years_per_step[next_step])]
-                
+
                 # no more res capacity to pass on
                 if step_res_cap.empty:
                     break
-                
+
                 step_dir_to_update = Path(data_dir, f"step_{next_step}")
 
                 if not utils.check_for_subdirectory(str(step_dir_to_update)):
@@ -536,28 +529,28 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                 else:
                     for subdir in utils.get_subdirectories(str(step_dir_to_update)):
                         step_res_cap.to_csv(str(Path(subdir, "ResidualCapacity.csv")), index=False)
-                 
+
                 next_step += 1
 
         # no options in current step, but options in next step
         elif (not options) and (options_next_step):
             logger.info(f"Step {step} does not have options, and step {step + 1} does have options")
-            
+
             option_dir_data = Path(data_dir, f"step_{step}")
             option_dir_results = Path(step_dir, f"step_{step}", "results")
 
-            if not option_dir_results.exists(): # failed solve 
+            if not option_dir_results.exists(): # failed solve
                 continue
-            
-            # Get updated residual capacity values 
+
+            # Get updated residual capacity values
             op_life = pd.read_csv(str(Path(option_dir_data, "OperationalLife.csv")))
             new_cap = pd.read_csv(str(Path(option_dir_results, "NewCapacity.csv")))
-            
+
             # overwrite residual capacity values for all subsequent steps
             next_step = step + 1
             while next_step < num_steps + 1:
 
-                # apply to max option level for the step 
+                # apply to max option level for the step
                 option_dir_to_update = Path(data_dir, f"step_{next_step}")
 
                 for subdir in utils.get_subdirectories(str(option_dir_to_update)):
@@ -572,38 +565,38 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                     res_cap.to_csv(str(Path(subdir, "ResidualCapacity.csv")), index=False)
 
                 next_step += 1
-    
+
         # options in current step and next step
         else:
             logger.info(f"Step {step} has options, and step {step + 1} has options")
-            
+
             for option in options:
 
                 option_dir_data = Path(data_dir, f"step_{step}")
                 option_dir_results = Path(step_dir, f"step_{step}")
 
-                # apply max option level for the step 
+                # apply max option level for the step
                 for each_option in option:
                     option_dir_data = option_dir_data.joinpath(each_option)
                     option_dir_results = option_dir_results.joinpath(each_option)
                 option_dir_results = option_dir_results.joinpath("results")
-                if not option_dir_results.exists(): # failed solve 
+                if not option_dir_results.exists(): # failed solve
                     continue
-                
-                # Get updated residual capacity values 
+
+                # Get updated residual capacity values
 
                 op_life = pd.read_csv(str(Path(option_dir_data, "OperationalLife.csv")))
                 new_cap = pd.read_csv(str(Path(option_dir_results, "NewCapacity.csv")))
-                
+
                 # overwrite residual capacity values for all subsequent steps
                 next_step = step + 1
                 while next_step < num_steps + 1:
-                    
-                    # apply to max option level for the step 
+
+                    # apply to max option level for the step
                     option_dir_to_update = Path(data_dir, f"step_{next_step}")
                     for each_option in option:
                         option_dir_to_update = option_dir_to_update.joinpath(each_option)
-                    
+
                     if utils.check_for_subdirectory(str(option_dir_to_update)):
                         for subdir in utils.get_subdirectories(str(option_dir_to_update)):
                             old_res_cap = pd.read_csv(str(Path(subdir, "ResidualCapacity.csv")))
@@ -627,6 +620,6 @@ def main(input_data: str, step_length: int, path_param: str, cores: int, solver=
                         res_cap.to_csv(str(Path(option_dir_to_update, "ResidualCapacity.csv")), index=False)
 
                     next_step += 1
-        
+
 if __name__ == '__main__':
     main() #input_data,step_length,path_param,solver)
diff --git a/src/main_utils.py b/src/osemosys_step/main_utils.py
similarity index 89%
rename from src/main_utils.py
rename to src/osemosys_step/main_utils.py
index 27d0dc6..12554c0 100644
--- a/src/main_utils.py
+++ b/src/osemosys_step/main_utils.py
@@ -5,39 +5,38 @@
 import os
 from pathlib import Path
 import logging
-import utils
+from osemosys_step import utils
 import sys
-from otoole import WriteDatafile
-from otoole import ReadCsv
-from otoole import Context
+from otoole import convert
+
 
 logger = logging.getLogger(__name__)
 
 def get_step_data(scenaro_path: str) -> Dict[int, Dict[str, pd.DataFrame]]:
-    """Get step information 
-    
-    Args: 
+    """Get step information
+
+    Args:
         scenaro_path: str
-    
+
     Returns:
         Dict[int, Dict[str, pd.DataFrame]]
-    
+
     Example:
-    If scenarios are arranged as: 
-        
+    If scenarios are arranged as:
+
         scenarios/
             1/
                 A.csv
                 B.csv
             2/
                 C.csv
-    
-    The funtion will return 
-    
+
+    The funtion will return
+
         {
             1:{A:pd.DataFrame, B:pd.DataFrame,},
             2:{C:pd.DataFrame}
-        }   
+        }
     """
     steps = next(os.walk(scenaro_path))[1] # returns subdirs in scenarios/
     scenarios_per_step = {}
@@ -45,32 +44,32 @@ def get_step_data(scenaro_path: str) -> Dict[int, Dict[str, pd.DataFrame]]:
         step_path = Path(scenaro_path, step_num)
         scenario_data = {}
         for _, _, files in os.walk(str(step_path)):
-            scenarios = [f for f in files if not f[0] == '.'] 
+            scenarios = [f for f in files if not f[0] == '.']
         for scenario in scenarios:
             # the -4 removes the ".csv"
             scenario_data[scenario[:-4]] = pd.read_csv(Path(step_path, scenario))
         scenarios_per_step[int(step_num)] = scenario_data
-        
+
     return scenarios_per_step
 
 def make_step_directories(path: str, num_steps: int) -> Dict[int, List[str]]:
     """Create folder for each step and a dictonary with their paths
-    
+
     Args:
         path: str
-            path to suffix of file 
+            path to suffix of file
         steps: List[int]
             Number of steps
-            
-    Returns 
+
+    Returns
         Dict[int, str]
-        
+
     Example
         >>> make_step_directories("data/", 3)
         >>> ls data
         >>> data/step1, data/step2, data/step3
     """
-    
+
     dic_step_paths = {}
     for step in range(num_steps):
         path_step = os.path.join(path, f"step_{step}")
@@ -82,23 +81,23 @@ def make_step_directories(path: str, num_steps: int) -> Dict[int, List[str]]:
     return dic_step_paths
 
 def get_options_per_step(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[int, List[List[str]]]:
-    """Create a dictionary of options for each step 
-    
-    Args: 
+    """Create a dictionary of options for each step
+
+    Args:
         steps: Dict[int, Dict[str, pd.DataFrame]]
             steps dictionary - output from get_step_data()
-    
-    Returns: 
+
+    Returns:
         Dict[int, List[str]]
-        
-    Example: 
+
+    Example:
         >>> get_options_per_step( .. )
         >>> {0: [], 1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1]}
     """
     options_per_step = {}
     for step, scenarios in steps.items():
-        
-        # create all the combinations 
+
+        # create all the combinations
         options_per_scenario = get_options_per_scenario(scenarios) # {A:[0,1], B:[1,2,3], C:[0]}
         grouped_options = []
         if len(options_per_scenario) == 1:
@@ -115,25 +114,25 @@ def get_options_per_step(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[int,
                         for next_option in next_options:
                             grouped_options.append([f"{scenario}{option}",f"{next_scenario}{next_option}"])
             grouped_options = remove_duplicate_combinations(grouped_options)
-            expanded_options = ["-".join(x) for x in grouped_options] 
+            expanded_options = ["-".join(x) for x in grouped_options]
             options_per_step[int(step)] = expanded_options
-    
+
     return options_per_step
 
 def get_option_data(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[str, pd.DataFrame]:
-    """Gets option data in DataFrames. 
-    
+    """Gets option data in DataFrames.
+
     This function does NOT return an index over steps, but rater options (A0, A1 ...)
-    
-    Args: 
+
+    Args:
         steps: Dict[int, Dict[str, pd.DataFrame]]
-            steps dictionary 
-            
-    Returns: 
+            steps dictionary
+
+    Returns:
         Dict[str, pd.DataFrame]
-            Options dictionary 
-    
-    Example: 
+            Options dictionary
+
+    Example:
         >>> get_option_data(steps)
         >>> {
             A0: pd.DataFrame,
@@ -141,30 +140,30 @@ def get_option_data(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[str, pd.D
             B0: pd.DataFrame,
         }
     """
-    
+
     option_data = {}
     for step, scenarios in steps.items():
         for scenario, df in scenarios.items():
             for option in df['OPTION'].unique():
                 option_data[f"{scenario}{option}"] = df.loc[df["OPTION"] == option].reset_index(drop=True)
     return option_data
-    
+
 def remove_duplicate_combinations(options: List[Tuple]) -> List[Tuple]:
     """Removes duplicate options from each step
-    
+
     Args:
         options: List[Tuple]
             List of tuples
-    
-    Returns: 
+
+    Returns:
         combinations: List[Tuple]
             List of tuples
-            
+
     Example:
         >>> remove_duplicate_combinations([(A0, A1), (A0, B0), (A0, B1), (A1, B0), (A1, B1)])
         >>> [(A0, B0), (A0, B1), (A1, B0), (A1, B1)]
     """
-    
+
     unique_options = []
     for option in options:
         option_set = set(option)
@@ -174,11 +173,11 @@ def remove_duplicate_combinations(options: List[Tuple]) -> List[Tuple]:
 
 def get_option_combinations_per_step(options_per_step: Dict[int, List[List[str]]]) -> Dict[int, List[str]]:
     """Gets full permutations of file paths in each step
-    
+
     Args:
         options_per_step: Dict[int, List[str]]
             {1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1]}
-        
+
     returns:
         {
             0: [],
@@ -189,24 +188,24 @@ def get_option_combinations_per_step(options_per_step: Dict[int, List[List[str]]
             ]
         }
     """
-    
+
     option_combos_per_step = {}
     max_step = max(list(options_per_step))
     for step_num in range(0, max_step + 1):
         current_step = step_num
         last_step = current_step - 1
-       
+
        # first step
-        if current_step == 0: 
+        if current_step == 0:
             option_combos_per_step[0] = options_per_step[0]
             continue
-        
+
         # no new options for this step
         if not options_per_step[current_step]:
             option_combos_per_step[current_step] = option_combos_per_step[last_step][:]
             continue
-        
-        # no options for previous step 
+
+        # no options for previous step
         if not options_per_step[last_step]:
             option_combos_this_step = []
             for current_step_option in options_per_step[current_step]:
@@ -220,19 +219,19 @@ def get_option_combinations_per_step(options_per_step: Dict[int, List[List[str]]
             for current_step_option in options_per_step[current_step]:
                 option_combos_this_step.append([last_step_option, current_step_option])
         option_combos_per_step[current_step] = option_combos_this_step
-    
+
     return option_combos_per_step
 
 def get_options_per_scenario(scenarios: Dict[str, pd.DataFrame]) -> List[str]:
     """Gets list of all options per scenario
 
-    Args: 
+    Args:
         step: Dict[str, pd.DataFrame]
             scenarios in a step
 
     Returns
-        All scenario/option mappings 
-    
+        All scenario/option mappings
+
     Example:
     >>> get_options(scenarios)
     >>> {A:[0,1], B:[1,2,3], C:[0]}
@@ -240,24 +239,24 @@ def get_options_per_scenario(scenarios: Dict[str, pd.DataFrame]) -> List[str]:
     options = {}
     for scenario, df in scenarios.items():
         options[scenario] = df['OPTION'].unique().tolist()
-    return options 
+    return options
 
 def create_option_directories(root_dir: str, options_per_step: Dict[int, List[str]], step_directories: bool = True) -> None:
     """Create directories at the option level
-    
-    Args: 
+
+    Args:
         root_dir: str
             Root dirctory to expand options directories
         options_per_step: Dict[int, List[str]]
-            All options per step 
+            All options per step
         step_directories: bool = True
-            Nest options under step directories 
+            Nest options under step directories
     """
-    
+
     option_combos = get_option_combinations_per_step(options_per_step)
     max_step = max(list(options_per_step))
     for step_num in range(0, max_step + 1):
-        
+
         if step_num not in list(option_combos):
             logger.info(f"No scenario data for step {step_num}")
             continue
@@ -269,7 +268,7 @@ def create_option_directories(root_dir: str, options_per_step: Dict[int, List[st
         else:
             new_dirs = option_combos[step_num]
 
-        # no options 
+        # no options
         if not new_dirs:
             new_dir_copy = []
             if step_directories:
@@ -278,8 +277,8 @@ def create_option_directories(root_dir: str, options_per_step: Dict[int, List[st
             dir_path = Path(*new_dir_copy)
             dir_path.mkdir(parents=True, exist_ok=True)
             logger.info(f"Created directory {str(dir_path)}")
-        
-        else: 
+
+        else:
             for new_dir in new_dirs:
                 new_dir_copy = new_dir[:]
                 if step_directories:
@@ -288,19 +287,19 @@ def create_option_directories(root_dir: str, options_per_step: Dict[int, List[st
                 dir_path = Path(*new_dir_copy)
                 dir_path.mkdir(parents=True, exist_ok=True)
                 logger.info(f"Created directory {str(dir_path)}")
-        
+
 def copy_reference_option_data(src_dir: str, dst_dir: str, options_per_step: Dict[int, List[str]]) -> None:
     """Copies original data to step/option folders
-    
-    Args: 
+
+    Args:
         src_dir: str
-            Root data folder 
+            Root data folder
         dst_dir: str
             Root destination folder
         options_per_step: Dict[int, List[str]]
-            All options per step 
+            All options per step
     """
-    
+
     option_combos = get_option_combinations_per_step(options_per_step)
     max_step = max(list(options_per_step))
     for step_num in range(0, max_step + 1):
@@ -315,18 +314,18 @@ def copy_reference_option_data(src_dir: str, dst_dir: str, options_per_step: Dic
             src = Path(src_dir,f"data_{step_num}")
             dst = Path(dst_dir, f"step_{step_num}", *dsts)
             utils.copy_csvs(src, dst)
-        
+
 def split_path_name(directory: str) -> List[str]:
     """Splits path name into sub directories
-    
+
     Args:
         directory: str
-            Directory to split 
-    Returns: 
+            Directory to split
+    Returns:
         List[str]
-            List of subdirectories in order 
-            
-    Example: 
+            List of subdirectories in order
+
+    Example:
         >>> split_path_name("/path/to/directory/subdir1/subdir1a")
         >>> ['path', 'to', 'directory', 'subdir1', 'subdir1a']
     """
@@ -338,14 +337,14 @@ def split_path_name(directory: str) -> List[str]:
 
 def apply_option(df: pd.DataFrame, option: pd.DataFrame) -> pd.DataFrame:
     """Applies option to dataframe
-    
+
     Args:
         df:pd.DataFrame
             Input dataframe (original)
         option:pd.DataFrame
-            Option to apply 
-    
-    Returns: 
+            Option to apply
+
+    Returns:
         pd.DataFrame
             Updated dataframe with option applied
     """
@@ -362,16 +361,16 @@ def apply_option(df: pd.DataFrame, option: pd.DataFrame) -> pd.DataFrame:
 
 def add_missing_steps(options_per_step: Dict[int, List[str]], max_step: int) -> Dict[int, List[str]]:
     """Adds missing step information
-    
+
     Args:
         options_per_step: Dict[int, List[str]]
             {1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1]}
         max_step: int
-        
-    Returns: 
+
+    Returns:
         Dict[int, List[str]]
-        
-    Example: 
+
+    Example:
         >>> add_missing_steps({1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1]}, 4)
         >>> {0: [], 1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1] 3: [], 4:[]}
     """
@@ -384,11 +383,11 @@ def add_missing_steps(options_per_step: Dict[int, List[str]], max_step: int) ->
 
 def append_step_num_to_option(options_per_step: Dict[int, List[str]]) -> Dict[int, List[str]]:
     """Adds the step number to uniquely identify the option
-    
-    Args: 
+
+    Args:
         options_per_step: Dict[int, List[str]]
             {1:[A0-B0, A0-B1, A1-B0, A1-B1], 2:[C0, C1]}
-            
+
     Retuns:
         Dict[int, List[str]]
             {1:[1A0-1B0, 1A0-1B1, 1A1-1B0, 1A1-1B1], 2:[2C0, 2C1]}
@@ -409,8 +408,8 @@ def append_step_num_to_option(options_per_step: Dict[int, List[str]]) -> Dict[in
     return output
 
 def create_datafile(csv_dir: str, datafile: str, config: Dict[str,Any]) -> None:
-    """Converts a folder of CSV data into a datafile 
-    
+    """Converts a folder of CSV data into a datafile
+
     Args:
         csv_dir: str
             path to csv directory
@@ -419,40 +418,37 @@ def create_datafile(csv_dir: str, datafile: str, config: Dict[str,Any]) -> None:
         config: Dict[str,Any]
             otoole configuration data
     """
-    reader = ReadCsv(user_config=config)
-    writer = WriteDatafile(user_config=config)
-    context = Context(read_strategy=reader, write_strategy=writer)
-    context.convert(csv_dir, datafile)
-    
+    convert(config, 'csv', 'datafile', csv_dir, datafile)
+
 def get_option_data_per_step(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[int, Dict[str, pd.DataFrame]]:
     """Gets option data at a step level.
-    
+
     Args:
         steps: Dict[int, Dict[str, pd.DataFrame]]
             Data at a step level - see get_step_data(scenaro_path: str)
-            
+
     Returns:
         Dict[int, Dict[str, pd.DataFrame]]
-            Data at a step level, parsed by option 
-            
+            Data at a step level, parsed by option
+
     Example:
         >>> get_options_per_step(
                 1:{A:pd.DataFrame, B:pd.DataFrame},
                 2:{C:pd.DataFrame}
             )
-        >>> {1: 
+        >>> {1:
                 {
                     A0: pd.DataFrame,
                     A1: pd.DataFrame,
                     B0: pd.DataFrame,
                 }
-            2: 
+            2:
                 {
                     C0: pd.DataFrame,
                     C1: pd.DataFrame,
                 }
             }
-        
+
     """
     step_option_data = {}
     for step, step_data in steps.items():
@@ -467,16 +463,16 @@ def get_option_data_per_step(steps: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[
 
 def get_param_data_per_option(step_option_data: Dict[int, Dict[str, pd.DataFrame]]) -> Dict[str, Dict[str, pd.DataFrame]]:
     """Gets param data for each option at a step level
-    
+
     Args:
         step_option_data: Dict[int, Dict[str, pd.DataFrame]]
             Option data at a step level -> see get_options_per_step()
-            
-    Returns: 
+
+    Returns:
         Dict[str, Dict[str, pd.DataFrame]]
             Param data per option per step
-            
-    Example: 
+
+    Example:
         >>> get_param_data_per_option()
         >>> {A0: {TotalAnnualMaxCapacity: pd.DataFrame, TotalAnnualMaxCapacityInvestment: pd.DataFrame}}
     """
@@ -497,16 +493,16 @@ def get_param_data_per_option(step_option_data: Dict[int, Dict[str, pd.DataFrame
 
 def apply_option_data(original: pd.DataFrame, option: pd.DataFrame) -> pd.DataFrame:
     """Overwrites original dataframe values with option values
-    
+
     Args:
         original: pd.DataFrame
-            original dataframe to be modified 
+            original dataframe to be modified
         option: pd.DataFrame
             option dataframe
-            
+
     Returns:
         pd.DataFrame
-            dataframe with option values applied 
+            dataframe with option values applied
     """
     if not (original.columns.to_list()) == (option.columns.to_list()):
         logger.error(f"columns for original are {original.columns} and columns to apply are {option.columns}")
@@ -521,7 +517,7 @@ def apply_option_data(original: pd.DataFrame, option: pd.DataFrame) -> pd.DataFr
 
 def get_res_cap_next_steps(step: int, n_steps: int, data_path: str, actual_yrs_in_steps: Dict) -> pd.DataFrame:
     """Gets a dataframe of the ResidualCapacity in the steps that still need to be run.
-    
+
     Args:
         step: int
             current step, that has just been run
@@ -546,20 +542,20 @@ def get_res_cap_next_steps(step: int, n_steps: int, data_path: str, actual_yrs_i
         else:
             sub_dir = utils.get_subdirectories(str(Path(data_path, f"step_{next_step}")))[0]
             res_cap_next_step = pd.read_csv(str(Path(sub_dir, "ResidualCapacity.csv")))
-        
+
         res_cap_next_step = res_cap_next_step[res_cap_next_step['YEAR'].isin(actual_yrs_in_steps[next_step])]
         res_cap = pd.concat([res_cap, res_cap_next_step], ignore_index=True)
-        
+
         next_step += 1
-        
+
     return res_cap
 
 def get_new_capacity_lifetime(op_life: pd.DataFrame, new_capacity: pd.DataFrame) -> pd.DataFrame:
     """Gets new capacity to apply to next steps"""
-    
+
     mapper = dict(zip(op_life['TECHNOLOGY'], op_life['VALUE']))
     regions = new_capacity["REGION"].unique()
-    
+
     results = []
     for region in regions:
         df = new_capacity.copy()
@@ -569,24 +565,24 @@ def get_new_capacity_lifetime(op_life: pd.DataFrame, new_capacity: pd.DataFrame)
         df["YEAR"] = df["YEARS_ACTIVE"]
         df = df.drop(columns=["YEARS_ACTIVE"])
         results.append(df)
-    
+
     df = pd.concat(results).reset_index(drop=True)
     df = df.groupby(by=["REGION", "TECHNOLOGY", "YEAR"]).sum().reset_index()
-    
+
     return df[["REGION", "TECHNOLOGY", "YEAR", "VALUE"]]
 
 def apply_op_life(start_year: int, technology: str, mapper: Dict[str,int]) -> List[int]:
     """Creates a list of years to apply a capacity value to
-    
-    Args: 
-        start_year: int, 
-            start year of new capacity 
-        technology: str, 
+
+    Args:
+        start_year: int,
+            start year of new capacity
+        technology: str,
             technology to lookup operational life for
         mapper: Dict[str,int]
             technology to operational life mapper
     Returns:
-        List[int]: 
+        List[int]:
             Years that the capacity will be available for
     """
     try:
@@ -596,14 +592,14 @@ def apply_op_life(start_year: int, technology: str, mapper: Dict[str,int]) -> Li
 
 def merge_res_capacites(old_res_cap: pd.DataFrame, new_res_cap: pd.DataFrame) -> pd.DataFrame:
     """Merges an exisiting residual capacity and new residual capacity dataframe
-    
+
     Args:
         old_res_cap: pd.DataFrame
         new_res_cap: pd.DataFrame
-    
-    Returns: 
+
+    Returns:
         pd.Dataframe
-            Residual capacity data with the values summed together 
+            Residual capacity data with the values summed together
     """
     if not list(old_res_cap.columns) == list(new_res_cap.columns):
         raise ValueError("Columns name do not match")
@@ -613,16 +609,16 @@ def merge_res_capacites(old_res_cap: pd.DataFrame, new_res_cap: pd.DataFrame) ->
 
 def update_res_capacity(res_capacity: pd.DataFrame, op_life: pd.DataFrame, new_capacity: pd.DataFrame, step_years: List[int]) -> pd.DataFrame:
     """Updates residual capacity data for next step
-    
-    Args: 
-        res_capacity: pd.DataFrame, 
-        op_life: pd.DataFrame, 
-        new_capacity: pd.DataFrame, 
+
+    Args:
+        res_capacity: pd.DataFrame,
+        op_life: pd.DataFrame,
+        new_capacity: pd.DataFrame,
         step_years: List[int]
             Years in the step assocated with the NewCapacity
     """
     step_new_capacity = new_capacity.loc[new_capacity["YEAR"].isin(step_years)]
     new_res_cap = get_new_capacity_lifetime(op_life, step_new_capacity)
     final_capacity = merge_res_capacites(res_capacity, new_res_cap)
-    
+
     return final_capacity
\ No newline at end of file
diff --git a/src/preprocess_data.py b/src/osemosys_step/preprocess_data.py
similarity index 100%
rename from src/preprocess_data.py
rename to src/osemosys_step/preprocess_data.py
diff --git a/src/osemosys_step/snakefile b/src/osemosys_step/snakefile
new file mode 100644
index 0000000..402b75c
--- /dev/null
+++ b/src/osemosys_step/snakefile
@@ -0,0 +1,37 @@
+
+options = [x[12:-9] for x in config["files"]] # extract out only the option path
+
+rule all:
+    input: 
+        expand("steps/{option}/model.sol", option=options)
+    
+rule solve_lp:
+    message:
+        'Solving model via {config[solver]}...'
+    input:
+        lp_file = "steps/{option}/model.lp"
+    output:
+        sol_file = "steps/{option}/model.sol"
+    params:
+        ilp = "steps/{option}/model.ilp"
+    log:
+        log = "logs/solves/{option}/model.log"
+    shell: 
+        """
+        if [ {config[solver]} = gurobi ]
+        then
+            gurobi_cl Method=2 ResultFile={output.sol_file} ResultFile={params.ilp} {input.lp_file}
+        elif [ {config[solver]} = cbc ]
+        then
+            cbc {input.lp_file} solve -solu {output.sol_file}
+        elif [ {config[solver]} = cplex ]
+        then 
+            cplex -c "read {input.lp_file}" "optimize" "write {output.sol_file}"
+        elif [ {config[solver]} = glpk ]
+        then
+            cp ../model/osemosys.txt ../steps/{wildcards.option}/osemosys.txt &&
+            cd ../steps/{wildcards.option} &&
+            mkdir results &&
+            glpsol -m osemosys.txt -d data_pp.txt -w model.sol
+        fi > {log.log}
+        """
\ No newline at end of file
diff --git a/src/solve.py b/src/osemosys_step/solve.py
similarity index 67%
rename from src/solve.py
rename to src/osemosys_step/solve.py
index 726da92..88f5c0b 100644
--- a/src/solve.py
+++ b/src/osemosys_step/solve.py
@@ -1,21 +1,22 @@
 """Module to hold solving logic"""
 
 from typing import Union, Dict, Any
-from pathlib import Path 
-import sys 
+from pathlib import Path
+import sys
 import logging
 import subprocess
-import os 
-from otoole import ReadCbc, ReadCplex, ReadGurobi, ReadDatafile, ReadCsv
-from otoole import WriteCsv
-from otoole import Context
+import os
+
+from otoole import convert_results
+
+
 logger = logging.getLogger(__name__)
 
 def generate_results(sol_file: str, solver: str, config: Dict[str,Any], data_file: str = None, csv_data: str = None) -> None:
     """Converts a solution file to a folder of CSVs
-    
+
     Note, only one of datafile or csv data need to be passed to generate full suit of results
-    
+
     Args:
         sol_file: str
             path to sol file
@@ -28,39 +29,22 @@ def generate_results(sol_file: str, solver: str, config: Dict[str,Any], data_fil
         csv_data: str
             path to csv data. if not provided, the full suit of results will NOT be generated
     """
-    
+
     sol_dir = Path(sol_file).parent
-    
-    if solver == "gurobi":
-        reader = ReadGurobi(user_config = config)
-    elif solver == "cplex":
-        reader = ReadCplex(user_config = config)
-    elif solver == "cbc":
-        reader = ReadCbc(user_config = config)
-    writer = WriteCsv(user_config = config)
-    
-    if data_file:
-        input_data, _ = ReadDatafile(user_config=config).read(data_file)
-    elif csv_data:
-        input_data, _ = ReadCsv(user_config=config).read(csv_data)
-    else:
-        input_data = None
-    
-    context = Context(read_strategy = reader, write_strategy = writer)
-    
-    context.convert(sol_file, str(Path(sol_dir, "results")), input_data = input_data)
-    
+
+    convert_results(config, solver, 'csv', sol_file, str(Path(sol_dir, "results")), 'datafile', data_file)
+
 def create_lp(datafile: str, lp_file: str, osemosys: str, log_file:str = None) -> int:
     """Create the LP file using GLPK
-    
-    Args: 
-       datafile: str, 
-       lp_file: str, 
-       osemosys: str 
-       
+
+    Args:
+       datafile: str,
+       lp_file: str,
+       osemosys: str
+
     Returns:
         0: int
-            If successful 
+            If successful
         1: int
             If not successful
     """
@@ -68,28 +52,28 @@ def create_lp(datafile: str, lp_file: str, osemosys: str, log_file:str = None) -
     if log_file:
         cmd = f"glpsol -m {osemosys} -d {datafile} --wlp {lp_file} --check --log {log_file}"
         subprocess.run(cmd, shell = True, capture_output = True)
-        
+
     else:
         cmd = f"glpsol -m {osemosys} -d {datafile} --wlp {lp_file} --check"
         subprocess.run(cmd, shell = True, capture_output = True)
 
     if not os.path.exists(lp_file):
-        logger.error(f"Can not create {lp_file}")
+        logger.error(f"Can not create {lp_file} with the command {cmd}")
         return 1
     else:
         return 0
-    
+
 
 def check_cbc_feasibility(sol: str) -> int:
     """Checks if the CBC solution is optimal
-    
+
     Args:
         sol: str
-            Path to CBC solution file 
-    
+            Path to CBC solution file
+
     Returns:
         0: int
-            If successful 
+            If successful
         1: int
             If not successful
     """
@@ -100,27 +84,27 @@ def check_cbc_feasibility(sol: str) -> int:
         return 0
     else:
         return 1
-    
+
 def check_glpk_feasibility(sol: str) -> int:
     """Checks if the GLPK solution is optimal
-    
+
     Args:
         sol: str
-            Path to CBC solution file 
-    
+            Path to CBC solution file
+
     Returns:
         0: int
-            If successful 
+            If successful
         1: int
             If not successful
-            
+
     c Problem:    osemosys
     c Rows:       3721
     c Columns:    2280
     c Non-zeros:  18069
     c Status:     OPTIMAL
     c Objective:  cost = 16068.9934 (MINimum)
-            
+
     """
     fifth_line = get_nth_line(sol, 5)
     status = fifth_line.split(" ")[6]
@@ -132,21 +116,21 @@ def check_glpk_feasibility(sol: str) -> int:
 
 def check_gurobi_feasibility(sol: str) -> int:
     """Checks if the gurobi solution is optimal
-    
-    This will check for an empty solution file, not the ilp file that 
-    Gurobi will also write out for infeasible 
-    
+
+    This will check for an empty solution file, not the ilp file that
+    Gurobi will also write out for infeasible
+
     Args:
         sol: str
-            Path to gurobi solution file 
-    
+            Path to gurobi solution file
+
     Returns:
         0: int
-            If successful 
+            If successful
         1: int
             If not successful
     """
-    if Path(sol).stat().st_size != 0: # not empty 
+    if Path(sol).stat().st_size != 0: # not empty
         return 0
     else:
         return 1
@@ -158,5 +142,5 @@ def get_nth_line(file_path: str, n: int):
             if i == n:
                 return line
     return None
-    
-    
+
+
diff --git a/src/utils.py b/src/osemosys_step/utils.py
similarity index 71%
rename from src/utils.py
rename to src/osemosys_step/utils.py
index bb0999d..b767903 100644
--- a/src/utils.py
+++ b/src/osemosys_step/utils.py
@@ -2,22 +2,21 @@
 
 import os
 import shutil
-from pathlib import Path 
+from pathlib import Path
 from typing import Dict, Any, List, Union, Tuple
-from yaml import load 
+from yaml import load
 import pandas as pd
 import sys
-import glob
 from otoole.utils import UniqueKeyLoader
-from otoole import ReadDatafile, WriteCsv, Context, ReadCsv
+from otoole import convert
 
 import logging
 logger = logging.getLogger(__name__)
 
 def check_for_directory(directory: Path):
-    """Creates Directory 
-    
-    Args: 
+    """Creates Directory
+
+    Args:
         directory: Path
             Path to directory
     """
@@ -26,12 +25,12 @@ def check_for_directory(directory: Path):
             logger.info(f"{directory} exists and is empty")
     else:
         logger.info(f"{directory} doesn't exist")
-        
+
 def read_otoole_config(config: str) -> Dict[str,Any]:
     """Reads in otoole configuration file"""
     ending = Path(config).suffix
     if ending != (".yaml" or ".yml"):
-        logger.error(f"otoole config file must have a .yaml extension. Identified a {ending} extension") 
+        logger.error(f"otoole config file must have a .yaml extension. Identified a {ending} extension")
     with open(config, "r") as f:
         contents = load(f, Loader=UniqueKeyLoader)
     return contents
@@ -44,23 +43,23 @@ def format_step_input(steps: Tuple) -> List[int]:
     if len(steps) > 2:
         logger.error(f"Step must be less than 2 values. Recieved length of {len(steps)}")
         sys.exit()
-    
+
     return [int(s) for s in steps]
 
 def copy_csvs(src: str, dst: str) -> None:
     """Copies directories of CSV data
-        
+
     Args:
         src: str
-            Source directory 
+            Source directory
         dst: str
-            Destination directory 
+            Destination directory
     """
     for f in os.listdir(src):
         source_file = os.path.join(src, f)
         dst_file = os.path.join(dst, f)
         shutil.copy(source_file, dst_file)
-        
+
 def get_subdirectories(directory: str):
     """Gets all subdirectories"""
     subdirectories = []
@@ -71,7 +70,7 @@ def get_subdirectories(directory: str):
                 subdirectories.extend(get_subdirectories(path))
             else:
                 subdirectories.append(path)
-            
+
     return subdirectories
 
 def check_for_subdirectory(directory: str):
@@ -84,18 +83,18 @@ def check_for_subdirectory(directory: str):
 
 def concat_dataframes(src: pd.DataFrame, dst: pd.DataFrame, years: list[int] = None) -> pd.DataFrame:
     """Combines two dfs together
-    
+
     Args:
-        src: str, 
+        src: str,
             first df
-        dst: str, 
+        dst: str,
             Second df
-        years: list[int] = None 
+        years: list[int] = None
             Years to filter source over. If none, no filtering happens
-            
+
     Returns:
         pd.DataFrame
-            dataframes concatanted 
+            dataframes concatanted
     """
     if not (src.columns.to_list()) == (dst.columns.to_list()):
         logger.error(f"columns for source are {src.columns} and columns to destination are {dst.columns}")
@@ -113,14 +112,14 @@ def concat_dataframes(src: pd.DataFrame, dst: pd.DataFrame, years: list[int] = N
 
 def get_options_from_path(file_path: str, extension: str = None) -> Union[List[str], None]:
     """Parses a path to return options
-    
+
     Args:
         file_path: str
-            directory path 
-            
+            directory path
+
     Returns:
-        Options based on looking for step_*/ in the filepath 
-    
+        Options based on looking for step_*/ in the filepath
+
     Example:
         >>> get_options_from_path("../steps/step_4/1E0-1C1/2D1/model.sol")
         >>> ["1E0-1C1", "2D1"]
@@ -133,35 +132,16 @@ def get_options_from_path(file_path: str, extension: str = None) -> Union[List[s
         if dir.startswith("step_"):
             return dirs[num + 1:]
     return None
-    
-def datafile_to_csv(datafile: str, csv_dir: str, config: Dict[str,Any]) -> None:
+
+def datafile_to_csv(datafile: str, csv_dir: str, config: str) -> None:
     """Converts datafile to folder of csvs
-    
+
     Args:
         datafile: str
-            Path to datafile 
+            Path to datafile
         csv_dir: str
-            Path to directory of csv folder 
+            Path to directory of csv folder
         config: Dict[str,Any]
             otoole configuration data
     """
-    reader = ReadDatafile(user_config=config)
-    writer = WriteCsv(user_config=config)
-    converter = Context(read_strategy=reader, write_strategy=writer)
-    converter.convert(datafile, csv_dir)
-    
-def read_csv(csv_dir: str, config: Dict[str,Any]) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]:
-    """Reads in csv data using otoole
-    
-    Returns: 
-        Tuple[Dict[str, pd.DataFrame], Dict[str, Any]]
-            First dictionary is the data 
-            Second dictionary is the default values
-    """
-    reader = ReadCsv(user_config=config)
-    return reader.read(filepath=csv_dir)
-
-def write_csv(data: Dict[str, pd.DataFrame], default_values: Dict[str, Any], csv_dir:str, config: Dict[str,Any]) -> None:
-    """Writes out CSV data"""
-    writer = WriteCsv(user_config=config)
-    writer.write(inputs=data, filepath=csv_dir, default_values=default_values)
\ No newline at end of file
+    convert(config, 'datafile', 'csv', datafile, csv_dir)
diff --git a/src/snakefile b/src/snakefile
deleted file mode 100644
index d0d17d1..0000000
--- a/src/snakefile
+++ /dev/null
@@ -1,55 +0,0 @@
-
-configfile: "../data/config.yaml"
-
-options = [x[9:-9] for x in config["files"]] # extract out only the option path
-
-rule all:
-    input: 
-        expand("../steps/{option}/model.sol", option=options)
-    
-rule solve_lp:
-    message:
-        'Solving model via {config[solver]}...'
-    input:
-        lp_file = "../steps/{option}/model.lp"
-    output:
-        sol_file = "../steps/{option}/model.sol"
-    params:
-        ilp = "../steps/{option}/model.ilp"
-    log:
-        log = "../logs/solves/{option}/model.log"
-    shell: 
-        """
-        if [ {config[solver]} = gurobi ]
-        then
-          gurobi_cl Method=2 ResultFile={output.sol_file} ResultFile={params.ilp} {input.lp_file}
-        elif [ {config[solver]} = cbc ]
-        then
-          cbc {input.lp_file} solve -solu {output.sol_file}
-        elif [ {config[solver]} = glpk ]
-        then
-            cp ../model/osemosys.txt ../steps/{wildcards.option}/osemosys.txt &&
-            cd ../steps/{wildcards.option} &&
-            mkdir results &&
-            glpsol -m osemosys.txt -d data_pp.txt -w model.sol
-        fi > {log.log}
-        """
-
-"""
-Commented out cause you will need the transform script and sort results for CPLEX
-
-
-if [ {config[solver]} = gurobi ]
-then
-    gurobi_cl Method=2 ResultFile={output.sol_file} ResultFile={params.ilp} {input.lp_file}
-elif [ {config[solver]} = cplex ]
-then
-    cplex -c "read {input.lp_file}" "optimize" "write {output.sol_file}"
-else
-    cbc {input.lp_file} solve -sec 1500 -solu {output.sol_file}
-fi
-
-to add in glpk as the solver, we will either need to change the results path 
-(which is quite cumbersum) or just change the working directory (pro safer)
-
-"""
diff --git a/src/test_results_to_next_step.py b/src/test_results_to_next_step.py
deleted file mode 100644
index d228231..0000000
--- a/src/test_results_to_next_step.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import pandas as pd
-from pytest import fixture, raises
-from pandas.testing import assert_frame_equal
-import main_utils as mu
-
-@fixture
-def res_capacity():
-    return pd.DataFrame(
-        [
-            ["UTOPIA", "E01", 1995, 2],
-            ["UTOPIA", "E01", 1996, 2],
-            ["UTOPIA", "E01", 1997, 2],
-            ["UTOPIA", "E01", 1998, 2],
-            ["UTOPIA", "E01", 1999, 2],
-        ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
-    )
-
-@fixture 
-def op_life():
-    return pd.DataFrame(
-        [
-            ["E01", 5]
-        ], columns=["TECHNOLOGY","VALUE"]
-    )
-
-@fixture
-def new_capacity():
-    return pd.DataFrame(
-        [
-            ["UTOPIA", "E01", 1995, 1],
-            ["UTOPIA", "E01", 1997, 1],
-        ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
-    )
-
-class TestUpdateResidualCapacity:
-
-    def test_update_residual_capacity_1(self, res_capacity, op_life, new_capacity):
-        
-        step_years = [1995, 1996]
-        
-        expected = pd.DataFrame(
-            [
-                ["UTOPIA", "E01", 1995, 3],
-                ["UTOPIA", "E01", 1996, 3],
-                ["UTOPIA", "E01", 1997, 3],
-                ["UTOPIA", "E01", 1998, 3],
-                ["UTOPIA", "E01", 1999, 3],
-            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
-        )
-        actual = mu.update_res_capacity(
-            res_capacity = res_capacity, 
-            op_life = op_life, 
-            new_capacity = new_capacity, 
-            step_years = step_years
-        )
-        assert_frame_equal(actual, expected)
-        
-    def test_update_residual_capacity_2(self, res_capacity, op_life, new_capacity):
-        
-        step_years = [1995, 1996, 1997]
-        
-        expected = pd.DataFrame(
-            [
-                ["UTOPIA", "E01", 1995, 3],
-                ["UTOPIA", "E01", 1996, 3],
-                ["UTOPIA", "E01", 1997, 4],
-                ["UTOPIA", "E01", 1998, 4],
-                ["UTOPIA", "E01", 1999, 4],
-                ["UTOPIA", "E01", 2000, 1],
-                ["UTOPIA", "E01", 2001, 1],
-            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
-        )
-        actual = mu.update_res_capacity(
-            res_capacity = res_capacity, 
-            op_life = op_life, 
-            new_capacity = new_capacity, 
-            step_years = step_years
-        )
-        assert_frame_equal(actual, expected)
-        
-class TestGetNewCapacityLifetime:
-    
-    def test_get_new_capacity_lifetime(self, op_life, new_capacity):
-        
-        expected = pd.DataFrame(
-            [
-                ["UTOPIA", "E01", 1995, 1],
-                ["UTOPIA", "E01", 1996, 1],
-                ["UTOPIA", "E01", 1997, 2],
-                ["UTOPIA", "E01", 1998, 2],
-                ["UTOPIA", "E01", 1999, 2],
-                ["UTOPIA", "E01", 2000, 1],
-                ["UTOPIA", "E01", 2001, 1],
-            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
-        )
-        actual = mu.get_new_capacity_lifetime(op_life=op_life, new_capacity=new_capacity)
-        assert_frame_equal(actual, expected)
-        
-class TestApplyOperationalLife:
-        
-    def test_apply_op_life_one_year(self):
-        start_year = 2000
-        technology = "EO1"
-        mapper = {"EO1": 1}
-        
-        actual = mu.apply_op_life(start_year, technology, mapper)
-        expected = [2000]
-        assert actual == expected
-        
-    def test_apply_op_life_multiple_years(self):
-        start_year = 2000
-        technology = "EO1"
-        mapper = {"EO1": 5}
-        
-        actual = mu.apply_op_life(start_year, technology, mapper)
-        expected = [2000, 2001, 2002, 2003, 2004]
-        assert actual == expected
-        
\ No newline at end of file
diff --git a/tests/test_results_to_next_step.py b/tests/test_results_to_next_step.py
index f007f5b..177ce0e 100644
--- a/tests/test_results_to_next_step.py
+++ b/tests/test_results_to_next_step.py
@@ -1,33 +1,117 @@
 import pandas as pd
-import os
-import results_to_next_step as rtns
+from pytest import fixture, raises
+from pandas.testing import assert_frame_equal
+from osemosys_step import main_utils as mu
 
-class TestResultsTransfer:
+@fixture
+def res_capacity():
+    return pd.DataFrame(
+        [
+            ["UTOPIA", "E01", 1995, 2],
+            ["UTOPIA", "E01", 1996, 2],
+            ["UTOPIA", "E01", 1997, 2],
+            ["UTOPIA", "E01", 1998, 2],
+            ["UTOPIA", "E01", 1999, 2],
+        ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
+    )
 
-    def test_sum_rescap_newcap(self):
+@fixture
+def op_life():
+    return pd.DataFrame(
+        [
+            ["E01", 5]
+        ], columns=["TECHNOLOGY","VALUE"]
+    )
 
-        folder = os.path.join('..', 'tests', 'fixtures')
-        dp_path = os.path.join(folder, 'data')
-        res_path = os.path.join(folder, 'results')
+@fixture
+def new_capacity():
+    return pd.DataFrame(
+        [
+            ["UTOPIA", "E01", 1995, 1],
+            ["UTOPIA", "E01", 1997, 1],
+        ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
+    )
 
-        res_cap = pd.read_csv(os.path.join(dp_path, 'ResidualCapacity.csv'))
-        
-        data = [
-            ['TEST','TECA',0,1.5],
-            ['TEST','TECB',0,1],
-            ['TEST','TECB',1,2],
-            ['TEST','TECA',1,0.5],
-            ['TEST','TECC',0,1],
-            ['TEST','TECC',1,0],
-        ]
+class TestUpdateResidualCapacity:
 
-        expected = pd.DataFrame(data=data, columns=['REGION', 'TECHNOLOGY', 'YEAR', 'VALUE'])
+    def test_update_residual_capacity_1(self, res_capacity, op_life, new_capacity):
 
-        index = ['REGION', 'TECHNOLOGY', 'YEAR']
+        step_years = [1995, 1996]
 
-        rtns.main(dp_path, res_path)
-        new_res_cap = pd.read_csv(os.path.join(dp_path, 'ResidualCapacity.csv'))
+        expected = pd.DataFrame(
+            [
+                ["UTOPIA", "E01", 1995, 3],
+                ["UTOPIA", "E01", 1996, 3],
+                ["UTOPIA", "E01", 1997, 3],
+                ["UTOPIA", "E01", 1998, 3],
+                ["UTOPIA", "E01", 1999, 3],
+            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
+        )
+        actual = mu.update_res_capacity(
+            res_capacity = res_capacity,
+            op_life = op_life,
+            new_capacity = new_capacity,
+            step_years = step_years
+        )
+        assert_frame_equal(actual, expected)
 
-        pd.testing.assert_frame_equal(new_res_cap.set_index(index), expected.set_index(index), check_index_type=False)
+    def test_update_residual_capacity_2(self, res_capacity, op_life, new_capacity):
 
-        res_cap.to_csv(os.path.join(dp_path, 'ResidualCapacity.csv'))
\ No newline at end of file
+        step_years = [1995, 1996, 1997]
+
+        expected = pd.DataFrame(
+            [
+                ["UTOPIA", "E01", 1995, 3],
+                ["UTOPIA", "E01", 1996, 3],
+                ["UTOPIA", "E01", 1997, 4],
+                ["UTOPIA", "E01", 1998, 4],
+                ["UTOPIA", "E01", 1999, 4],
+                ["UTOPIA", "E01", 2000, 1],
+                ["UTOPIA", "E01", 2001, 1],
+            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
+        )
+        actual = mu.update_res_capacity(
+            res_capacity = res_capacity,
+            op_life = op_life,
+            new_capacity = new_capacity,
+            step_years = step_years
+        )
+        assert_frame_equal(actual, expected)
+
+class TestGetNewCapacityLifetime:
+
+    def test_get_new_capacity_lifetime(self, op_life, new_capacity):
+
+        expected = pd.DataFrame(
+            [
+                ["UTOPIA", "E01", 1995, 1],
+                ["UTOPIA", "E01", 1996, 1],
+                ["UTOPIA", "E01", 1997, 2],
+                ["UTOPIA", "E01", 1998, 2],
+                ["UTOPIA", "E01", 1999, 2],
+                ["UTOPIA", "E01", 2000, 1],
+                ["UTOPIA", "E01", 2001, 1],
+            ], columns = ["REGION", "TECHNOLOGY", "YEAR", "VALUE"]
+        )
+        actual = mu.get_new_capacity_lifetime(op_life=op_life, new_capacity=new_capacity)
+        assert_frame_equal(actual, expected)
+
+class TestApplyOperationalLife:
+
+    def test_apply_op_life_one_year(self):
+        start_year = 2000
+        technology = "EO1"
+        mapper = {"EO1": 1}
+
+        actual = mu.apply_op_life(start_year, technology, mapper)
+        expected = [2000]
+        assert actual == expected
+
+    def test_apply_op_life_multiple_years(self):
+        start_year = 2000
+        technology = "EO1"
+        mapper = {"EO1": 5}
+
+        actual = mu.apply_op_life(start_year, technology, mapper)
+        expected = [2000, 2001, 2002, 2003, 2004]
+        assert actual == expected