From 1d4751fee20dc5bbc0b3ca5e771b6a99918602e3 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 4 Mar 2024 10:12:17 -0800 Subject: [PATCH 01/35] Amending ReadTheDocs directory --- docs/_toc.yml | 25 +++++++++++------ docs/algo.md | 3 +++ docs/algo1.ipynb | 18 +++++++++++++ docs/algo2.ipynb | 18 +++++++++++++ docs/glossary.md | 1 + docs/index copy.md | 18 +++++++++++++ docs/{input-files.md => input_files.md} | 0 docs/quick_start.ipynb | 18 +++++++++++++ docs/{Z_references.bib => references.bib} | 0 docs/theory_implementation.ipynb | 33 +++++++++++++++++++++++ 10 files changed, 126 insertions(+), 8 deletions(-) create mode 100644 docs/algo.md create mode 100644 docs/algo1.ipynb create mode 100644 docs/algo2.ipynb create mode 100644 docs/glossary.md create mode 100644 docs/index copy.md rename docs/{input-files.md => input_files.md} (100%) create mode 100644 docs/quick_start.ipynb rename docs/{Z_references.bib => references.bib} (100%) create mode 100644 docs/theory_implementation.ipynb diff --git a/docs/_toc.yml b/docs/_toc.yml index d6650436..adebb5cf 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -7,17 +7,26 @@ parts: # - file: markdown - caption: Documentation chapters: + - file: quick_start - file: classes-datastructures - file: example_notebooks/workflow_minimal.ipynb - - file: input-files + - file: input_files + - file: theory_implementation + - file: algo + - file: glossary + title: Algorithmic implementation + sections: + - file: algo1 + - file: algo2 + title: Equations and theoretical explanation + sections: + - file: theory/ts_length_conversion + - file: theory/acoustics_to_biomass_conversion + - file: theory/semivariograms + - file: theory/kriging + - file: theory/stratified_statistics + - file: theory/general_equations - caption: Notebooks - chapters: - - file: example_notebooks/echopro_workflow.ipynb - - file: example_notebooks/reports_workflow.ipynb - - file: example_notebooks/semi_variogram_workflow.ipynb - - file: example_notebooks/kriging_mesh_walkthrough.ipynb - - file: example_notebooks/transect_selection_workflow.ipynb - - file: example_notebooks/bootstrapping_walkthrough.ipynb - caption: Help & reference chapters: - file: api diff --git a/docs/algo.md b/docs/algo.md new file mode 100644 index 00000000..0c8a3b62 --- /dev/null +++ b/docs/algo.md @@ -0,0 +1,3 @@ +# Algorithmic Implementations + +The following sections connect the underlying [theory](theory:theory_base) with the variables and data structures presented in [](data-structure). \ No newline at end of file diff --git a/docs/algo1.ipynb b/docs/algo1.ipynb new file mode 100644 index 00000000..e43d977b --- /dev/null +++ b/docs/algo1.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algo page1" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/algo2.ipynb b/docs/algo2.ipynb new file mode 100644 index 00000000..3dea11c2 --- /dev/null +++ b/docs/algo2.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algo page 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 00000000..0361557d --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1 @@ +Glossary of all symbols, indices, and notations used for mathematical equations and variables contained within the `Survey` class object. \ No newline at end of file diff --git a/docs/index copy.md b/docs/index copy.md new file mode 100644 index 00000000..b738f562 --- /dev/null +++ b/docs/index copy.md @@ -0,0 +1,18 @@ +# EchoPro + +This site currently hosts example Jupyter notebooks for the new Python EchoPro package (https://github.com/uw-echospace/EchoPro/). Over time, the documentation for this package will be added here too. + +The Jupyter notebooks are shown in a "rendered", executed form. + +```{admonition} Glitches with some interactive graphical elements +While the notebooks in this site are rendered, there are some glitches in the display we're still working out. In particular, an [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) interactive graphical element in the semivariogram widget doesn't display correctly. The notebooks do run correctly when executed with Jupyter Notebook ("classic", not JupyterLab). +``` + +Go to the individual example notebooks below or in the table of content on the left. + +```{tableofcontents} +``` + +## Installation + +See the [README.md](https://github.com/uw-echospace/EchoPro/blob/master/README.md) in the EchoPro repository for installation and execution instructions. diff --git a/docs/input-files.md b/docs/input_files.md similarity index 100% rename from docs/input-files.md rename to docs/input_files.md diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb new file mode 100644 index 00000000..e76860c7 --- /dev/null +++ b/docs/quick_start.ipynb @@ -0,0 +1,18 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quick start" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/Z_references.bib b/docs/references.bib similarity index 100% rename from docs/Z_references.bib rename to docs/references.bib diff --git a/docs/theory_implementation.ipynb b/docs/theory_implementation.ipynb new file mode 100644 index 00000000..76375149 --- /dev/null +++ b/docs/theory_implementation.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(theory:theory_base)=\n", + "# Theory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From cb238acfab74045935221bb9a644705ee5d31515 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 4 Mar 2024 10:22:37 -0800 Subject: [PATCH 02/35] Modified _toc.yml indentation of chapters --- docs/_toc.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/_toc.yml b/docs/_toc.yml index adebb5cf..fe7e0881 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -18,8 +18,8 @@ parts: sections: - file: algo1 - file: algo2 - title: Equations and theoretical explanation - sections: + title: Equations and theoretical explanation + sections: - file: theory/ts_length_conversion - file: theory/acoustics_to_biomass_conversion - file: theory/semivariograms From 2b6f71710901a0fb2ab6c0f179edcb4dfeb76eb4 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Thu, 4 Apr 2024 09:43:56 -0700 Subject: [PATCH 03/35] Test: `load_survey_data`/`biometric_distributions` --- echopop/tests/test_data_loader.py | 105 ++++++++++++++++++++- echopop/tests/utility_testing_functions.py | 34 +++++++ 2 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 echopop/tests/utility_testing_functions.py diff --git a/echopop/tests/test_data_loader.py b/echopop/tests/test_data_loader.py index a9e9a70b..53aa2f25 100644 --- a/echopop/tests/test_data_loader.py +++ b/echopop/tests/test_data_loader.py @@ -1,7 +1,11 @@ import yaml +import numpy as np from pathlib import Path +import copy from echopop import Survey -from echopop.utils.data_file_validation import load_configuration , validate_data_columns +from echopop.core import LAYER_NAME_MAP +from echopop.utils.data_file_validation import load_configuration +from echopop.tests.utility_testing_functions import dictionary_shape_equal def test_load_configuration(test_path, tmp_path): init_params = yaml.safe_load( @@ -32,6 +36,101 @@ def test_load_configuration(test_path, tmp_path): ) -def test_init(mock_survey): +def test_init( mock_survey ): objS = mock_survey - assert isinstance(objS, Survey) \ No newline at end of file + assert isinstance( objS , Survey ) + + +def test_load_survey_data( mock_survey , + test_path ): + + ### Initialize Survey object (objS) + objS = mock_survey + + ### Pull in configuration values + objS.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + + ### Initialize data attributes + objS.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + objS.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + objS.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + objS.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + + ### Load in data using the `load_survey_data` method + objS.load_survey_data( ) + + # ----------------- + ### Evaluate results + # ----------------- + ### Dictionary structure + # !!! TODO: based on the original data structure -- will need to be updated once the core data structure is also updated + # ---- Check attributes + assert set( [ 'acoustics' , 'biology' , 'spatial' , 'statistics' ] ) <= set( dir( objS ) ) + # ---- Check sub-directory keys + assert dictionary_shape_equal( objS.acoustics , LAYER_NAME_MAP['NASC']['data_tree'] ) + assert dictionary_shape_equal( objS.biology , LAYER_NAME_MAP['biological']['data_tree'] ) + assert dictionary_shape_equal( objS.spatial , LAYER_NAME_MAP['stratification']['data_tree'] ) + assert dictionary_shape_equal( objS.statistics , LAYER_NAME_MAP['kriging']['data_tree'] ) + ### Data structure + # ++++ acoustics + assert objS.acoustics[ 'nasc' ][ 'nasc_df' ].shape == tuple( [ 1 , 10 ] ) + # ++++ biology + assert objS.biology[ 'catch_df' ].shape == tuple( [ 2 , 7 ] ) + assert objS.biology[ 'distributions' ][ 'age_bins_arr' ].shape == tuple( [ 0 , ] ) + assert objS.biology[ 'distributions' ][ 'length_bins_arr' ].shape == tuple( [ 0 , ] ) + assert objS.biology[ 'haul_to_transect_df' ].shape == tuple( [ 2 , 5 ] ) + assert objS.biology[ 'length_df' ].shape == tuple( [ 2 , 10 ] ) + assert objS.biology[ 'specimen_df' ].shape == tuple( [ 2 , 11 ] ) + # ++++ spatial + assert objS.spatial[ 'strata_df' ].shape == tuple( [ 1 , 3 ] ) + assert objS.spatial[ 'geo_strata_df' ].shape == tuple( [ 1 , 2 ] ) + assert objS.spatial[ 'inpfc_strata_df' ].shape == tuple( [ 1 , 2 ] ) + # ++++ statistics + assert objS.statistics[ 'kriging' ][ 'mesh_df' ].shape == tuple( [ 19843 , 3 ] ) + assert objS.statistics[ 'kriging' ][ 'isobath_200m_df' ].shape == tuple( [ 147 , 2 ] ) + assert len( objS.statistics[ 'kriging' ][ 'model_config' ] ) == 39 + assert len( objS.statistics[ 'variogram' ][ 'model_config' ] ) == 13 + ### Test merged outputs + assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'catch_df' ].columns ) + assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'length_df' ].columns ) + assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'specimen_df' ].columns ) + ### Test biological data (sex definition) + assert np.all( ( objS.biology[ 'length_df' ].sex == 'female' ) & ( objS.biology[ 'length_df' ].group == 'sexed' ) ) + assert np.all( ( objS.biology[ 'specimen_df' ].sex == [ 'male' , 'female' ] ) & ( objS.biology[ 'specimen_df' ].group == 'sexed' ) ) + +def test_biometric_distributions( mock_survey , + test_path ): + + ### Initialize Survey object (objS) + objS = mock_survey + + ### Pull in configuration values + objS.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + + ### Initialize data attributes + objS.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + objS.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + objS.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + objS.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + + ### Load in data using the `load_survey_data` method + objS.load_survey_data( ) + + ### Generate length and age distributions + objS.biometric_distributions( ) + + # ----------------- + ### Evaluate results + # ----------------- + ### Data structure + assert objS.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ].shape == tuple( [ 23 , ] ) + assert objS.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ].shape == tuple( [ 22 , ] ) + assert objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ].shape == tuple( [ 41 , ] ) + assert objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ].shape == tuple( [ 40 , ] ) + ### Data equality + assert np.all( objS.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ] == np.linspace( 0.5 , 22.5 , 23 ) ) + assert np.all( objS.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ] == np.linspace( 1 , 22 , 22 ) ) + assert np.all( objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] == np.linspace( 1 , 81 , 41 ) ) + assert np.all( objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] == np.linspace( 2 , 80 , 40 ) ) \ No newline at end of file diff --git a/echopop/tests/utility_testing_functions.py b/echopop/tests/utility_testing_functions.py new file mode 100644 index 00000000..98a370e7 --- /dev/null +++ b/echopop/tests/utility_testing_functions.py @@ -0,0 +1,34 @@ +import numpy as np + +def dictionary_shape( dictionary: dict ): + """ + A utility test function that extracts the shape of a nested dictionary + """ + if isinstance( dictionary , dict ): + return( { i: dictionary_shape( dictionary[ i ] ) for i in dictionary } ) + else: + return None + +def dictionary_shape_equal( dictionary1: dict , + dictionary2: dict ): + """ + Tests equality between the shapes of two nested dictionaries + """ + result = dictionary_shape( dictionary1 ) == dictionary_shape( dictionary2 ) + + if result: + return result + else: + if set( dictionary_shape( dictionary1 ) ) <= set( dictionary_shape( dictionary2 ) ): + tracked_true = [ ] + + for j in dictionary2.keys( ): + test = set( dictionary1[ j ].keys( ) ) <= ( dictionary2[ j ].keys( ) ) + tracked_true.append( test ) + + if np.all( tracked_true ): + return True + else: + return result + else: + return result \ No newline at end of file From cc9a9061ea798533eebe4965f5e3b8b7b812d13e Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Thu, 4 Apr 2024 10:22:14 -0700 Subject: [PATCH 04/35] `test_fit_binned_length_weight_relationship` --- echopop/tests/test_data_transect_analysis.py | 104 +++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 echopop/tests/test_data_transect_analysis.py diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py new file mode 100644 index 00000000..e6b0cb2d --- /dev/null +++ b/echopop/tests/test_data_transect_analysis.py @@ -0,0 +1,104 @@ +import pandas as pd +import numpy as np +from echopop.survey import Survey + +def test_fit_binned_length_weight_relationship( mock_survey ): + + #### Pull in mock Survey object + objS = mock_survey + + ### Initialize objS for `length_weight` + objS.statistics[ 'length_weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + objS.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 ] , + 'weight': [ 4.0 , 9.0 , 16.0 , 25.0 , 36.0 , 49.0 , 64.0 , 81.0 ] , + } + ) + + ### Re-parameterize `length_bins` with dummy data + objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] = ( + [ 2.0 , 5.0 , 8.0 , 11.0 ] + ) + + ### Re-parameterize `length_interval` with dummy data + objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = ( + [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] + ) + + ### Evaluate object for later comparison + objS.fit_binned_length_weight_relationship( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- `objS.statistics[ 'length_weight' ][ 'regression_parameters' ]` + # ---- Expected dimensions + expected_dimensions_regression_parameters = tuple( [ 3 , 3 ] ) + # ---- Expected output + expected_output_regression_parameters = pd.DataFrame( + { + 'sex': [ 'all' , 'female' , 'male' ] , + 'rate': [ 2.0 , 2.0 , 2.0 ] , + 'initial': [ 4.7e-16 , -2.2e-16 , 1.1e-15 ] + } + ) + # ---- `objS.statistics[ 'length_weight' ][ 'length_weight_df' ]` + # ---- Expected dimensions + expected_dimensions_length_weight_df = tuple( [ 12 , 10 ] ) + # ---- Expected output + expected_output_length_weight_df = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 1 , 4 , 7 , 10 ] , 3 ) , + np.array( [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] ) ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'mean_length': [ 2.5 , 3.0 , 2.0 , 5.0 , 5.0 , 5.0 , + 8.0 , 8.0 , 8.0 , 0.0 , 0.0 , 0.0 ] , + 'n_length': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'mean_weight': [ 6.50 , 9.00 , 4.00 , 25.67 , 25.00 , 26.00 , + 64.67 , 65.00 , 64.00 , 0.00 , 0.00 , 0.00 ] , + 'n_weight': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'rate': np.repeat( 2.0 , 12 ) , + 'initial': np.tile( [ 4.7e-16 , -2.2e-16 , 1.1e-15 ] , 4 ) , + 'weight_fitted': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , + 'weight_modeled': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] + } + ) + expected_output_length_weight_df[ 'length_bin' ] = pd.IntervalIndex( expected_output_length_weight_df[ 'length_bin' ] ) + expected_output_length_weight_df[ 'length_bin' ] = pd.Categorical( expected_output_length_weight_df[ 'length_bin' ] , + categories = expected_output_length_weight_df[ 'length_bin' ].unique( ) , + ordered = True ) + #---------------------------------- + ### Run tests: `fit_binned_length_weight_relationship` + #---------------------------------- + eval_regression_parameters = objS.statistics[ 'length_weight' ][ 'regression_parameters' ] + eval_length_weight_df = objS.statistics[ 'length_weight' ][ 'length_weight_df' ] + ### Check shape + assert eval_regression_parameters.shape == expected_dimensions_regression_parameters + assert eval_length_weight_df.shape == expected_dimensions_length_weight_df + ### Check datatypes + assert np.all( eval_regression_parameters.dtypes == expected_output_regression_parameters.dtypes ) + assert np.all( eval_length_weight_df.dtypes == expected_output_length_weight_df.dtypes ) + ### Dataframe equality + assert np.allclose( eval_regression_parameters[ [ 'rate' , 'initial' ] ] , + expected_output_regression_parameters[ [ 'rate' , 'initial' ] ] , + rtol = 1e-1 ) + # ---- Non-float/high-precision + assert eval_length_weight_df[ [ 'length_bin' , 'sex' , 'mean_length' , 'n_length' , 'n_weight' ] ].equals( + expected_output_length_weight_df[ [ 'length_bin' , 'sex' , 'mean_length' , 'n_length' , 'n_weight' ] ] + ) + # ---- Float/high-precision + assert np.allclose( eval_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , + expected_output_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , + rtol = 1e-1 ) \ No newline at end of file From 6e0ef76336140c178b457213b7d25e9defdbc6d5 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Thu, 4 Apr 2024 10:49:09 -0700 Subject: [PATCH 05/35] `test_strata_sex_weight_proportions` --- echopop/tests/test_data_transect_analysis.py | 90 +++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index e6b0cb2d..c133cfff 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -101,4 +101,92 @@ def test_fit_binned_length_weight_relationship( mock_survey ): # ---- Float/high-precision assert np.allclose( eval_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , expected_output_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , - rtol = 1e-1 ) \ No newline at end of file + rtol = 1e-1 ) + +def test_strata_sex_weight_proportions( mock_survey ): + + #### Pull in mock Survey object + objS = mock_survey + + ### Initialize objS for `weight` + objS.biology[ 'weight' ] = { } + + ### Initialize objS for `length_weight` + objS.statistics[ 'length_weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + objS.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , + 'weight': [ 2.0 , 3.0 , 8.0 , 7.0 , 1.0 , 4.0 , 9.0 , 6.0 ] , + 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] + } + ) + + ### Re-parameterize `length_df` with dummy data + objS.biology[ 'length_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12 , 12 , 19 , 19 , 12 , 12 , 19 , 19 ] , + 'length_count': [ 5 , 10 , 15 , 20 , 20 , 15 , 10 , 5 ] + } + ) + + ### Re-parameterize `fitted_weight` with dummy data + objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Re-parameterize `length_bins` with dummy data + objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + + ### Evaluate object for later comparison + objS.strata_sex_weight_proportions( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 2 , 8 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'stratum_num': np.array( [ 0 , 1 ] ).astype( np.int32 ) , + 'proportion_female': [ 0.59 , 0.41 ] , + 'proportion_male': [ 0.41 , 0.59 ] , + 'proportion_station_1': [ 0.93 , 0.93 ] , + 'proportion_station_2': [ 0.07 , 0.07 ] , + 'average_weight_female': [ 4.72 , 2.71 ] , + 'average_weight_male': [ 6.64 , 6.30 ] , + 'average_weight_total': [ 3.07 , 2.60 ] + } + ) + #---------------------------------- + ### Run tests: `strata_sex_weight_proportions` + #---------------------------------- + eval_weight_strata_df = objS.biology[ 'weight' ][ 'weight_strata_df' ] + ### Check shape + assert eval_weight_strata_df.shape == expected_dimensions + ### Check datatypes + assert np.all( eval_weight_strata_df.dtypes == expected_output.dtypes ) + ### Dataframe equality + assert np.allclose( eval_weight_strata_df , expected_output , rtol = 1e-1 ) \ No newline at end of file From 69c515b36b7481d9609ad6b78ee0f5e1c8f169b8 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Thu, 4 Apr 2024 11:43:40 -0700 Subject: [PATCH 06/35] `test_strata_age_binned_weight_proportions` --- echopop/survey.py | 2 + echopop/tests/test_data_transect_analysis.py | 143 ++++++++++++++++++- 2 files changed, 143 insertions(+), 2 deletions(-) diff --git a/echopop/survey.py b/echopop/survey.py index 8b896b3e..7bc05788 100644 --- a/echopop/survey.py +++ b/echopop/survey.py @@ -893,6 +893,7 @@ def strata_age_binned_weight_proportions( self , 'weight_sex_proportion_adult': ( x[ 'count' ] / x.weight_total_adult ).sum() } ) ) .reset_index( ) + .fillna( 0 ) ) length_sex_age_weight_proportions = ( @@ -909,6 +910,7 @@ def strata_age_binned_weight_proportions( self , weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) .assign( weight_length_sex_proportion_all = lambda x: x[ 'count' ] / x.weight_total_all , weight_length_sex_proportion_adult = lambda x: x[ 'count' ] / x.weight_total_adult ) + .fillna( 0 ) ) ### Add these dataframes to the appropriate data attribute diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index c133cfff..c995ab78 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -123,11 +123,40 @@ def test_strata_sex_weight_proportions( mock_survey ): 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , 'species_id': np.repeat( [ 8675309 ] , 8 ) , 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , - 'weight': [ 2.0 , 3.0 , 8.0 , 7.0 , 1.0 , 4.0 , 9.0 , 6.0 ] , + 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] } + ) + + ### Re-parameterize `length_df` with dummy data + objS.biology[ 'length_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12 , 12 , 19 , 19 , 12 , 12 , 19 , 19 ] , + 'length_count': [ 5 , 10 , 15 , 20 , 20 , 15 , 10 , 5 ] + } ) + ### Re-parameterize `fitted_weight` with dummy data + objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Re-parameterize `length_df` with dummy data objS.biology[ 'length_df' ] = pd.DataFrame( { @@ -189,4 +218,114 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Check datatypes assert np.all( eval_weight_strata_df.dtypes == expected_output.dtypes ) ### Dataframe equality - assert np.allclose( eval_weight_strata_df , expected_output , rtol = 1e-1 ) \ No newline at end of file + assert np.allclose( eval_weight_strata_df , expected_output , rtol = 1e-1 ) + +def test_strata_age_binned_weight_proportions( mock_survey ): + + #### Pull in mock Survey object + objS = mock_survey + + ### Initialize objS for `weight` + objS.biology[ 'weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + objS.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , + 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , + 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] + } + ) + + ### Re-parameterize `length_bins` with dummy data + objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + + ### Evaluate object for later comparison + objS.strata_age_binned_weight_proportions( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = { 'age_proportions': tuple( [ 4 , 4 ] ) , + 'age_weight_proportions': tuple( [ 4 , 4 ] ) , + 'sex_age_weight_proportions': tuple( [ 12 , 5 ] ) , + 'length_sex_age_weight_proportions': tuple( [ 24 , 9 ] ) } + + # ---- Expected output + expected_output = { + 'age_proportions': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) , + 'age_weight_proportions': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) , + 'sex_age_weight_proportions': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) , + 'length_sex_age_weight_proportions': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 1 , 1 , 1 , + 2 , 2 , 2 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'length_bin': pd.cut( np.tile( [ 12.0 , 12.0 , 12.0 , 18.0 , 18.0 , 18.0 ] , 4 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 8 ) , + 'count': [ 5.0 , 3.0 , 2.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 5.0 , 2.0 , 3.0 , + 5.0 , 3.0 , 2.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 5.0 , 3.0 , 2.0 ] , + 'weight_total_all': [ 10.0 , 5.0 , 5.0 , 10.0 , 5.0 , 5.0 , + 10.0 , 5.0 , 5.0 , 10.0 , 5.0 , 5.0 , + 10.0 , 6.0 , 4.0 , 10.0 , 6.0 , 4.0 , + 10.0 , 6.0 , 4.0 , 10.0 , 6.0 , 4.0 ] , + 'weight_total_adult': [ 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 5.0 , 2.0 , 3.0 , 5.0 , 2.0 , 3.0 , + 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 5.0 , 3.0 , 2.0 , 5.0 , 3.0 , 2.0 ] , + 'weight_length_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 0.5 , 0.5 , 0.5 ] , + 'weight_length_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) + } + + #---------------------------------- + ### Run tests: `strata_age_binned_weight_proportions` + #---------------------------------- + eval_age_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] + eval_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] + eval_sex_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] + eval_length_sex_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'length_sex_age_weight_proportions_df' ] + ### Check shape + assert eval_age_proportions_df.shape == expected_dimensions[ 'age_proportions' ] + assert eval_age_weight_proportions_df.shape == expected_dimensions[ 'age_weight_proportions' ] + assert eval_sex_age_weight_proportions_df.shape == expected_dimensions[ 'sex_age_weight_proportions' ] + assert eval_length_sex_age_weight_proportions_df.shape == expected_dimensions[ 'length_sex_age_weight_proportions' ] + ### Check datatypes + assert np.all( eval_age_proportions_df.dtypes == expected_output[ 'age_proportions' ].dtypes ) + assert np.all( eval_age_weight_proportions_df.dtypes == expected_output[ 'age_weight_proportions' ].dtypes ) + assert np.all( eval_sex_age_weight_proportions_df.dtypes == expected_output[ 'sex_age_weight_proportions' ].dtypes ) + assert np.all( eval_length_sex_age_weight_proportions_df.dtypes == expected_output[ 'length_sex_age_weight_proportions' ].dtypes ) + ### Dataframe equality + assert eval_age_proportions_df.equals( expected_output[ 'age_proportions' ] ) + assert eval_age_weight_proportions_df.equals( expected_output[ 'age_weight_proportions' ] ) + assert eval_sex_age_weight_proportions_df.equals( expected_output[ 'sex_age_weight_proportions' ] ) + assert eval_length_sex_age_weight_proportions_df.equals( expected_output[ 'length_sex_age_weight_proportions' ] ) \ No newline at end of file From a38cf58d09136a419c5ba1ea7e7070491fbd9872 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Thu, 4 Apr 2024 14:04:21 -0700 Subject: [PATCH 07/35] `test_nasc_to_biomass_conversion` --- echopop/tests/test_data_transect_analysis.py | 282 ++++++++++++++++++- 1 file changed, 280 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index c995ab78..6c29834f 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -132,6 +132,7 @@ def test_strata_sex_weight_proportions( mock_survey ): objS.biology[ 'length_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , 'group': np.repeat( 'sexed' , 8 ) , 'species_id': np.repeat( [ 8675309 ] , 8 ) , @@ -234,7 +235,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , 'group': np.repeat( 'sexed' , 8 ) , - 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , 'species_id': np.repeat( [ 8675309 ] , 8 ) , 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , @@ -328,4 +329,281 @@ def test_strata_age_binned_weight_proportions( mock_survey ): assert eval_age_proportions_df.equals( expected_output[ 'age_proportions' ] ) assert eval_age_weight_proportions_df.equals( expected_output[ 'age_weight_proportions' ] ) assert eval_sex_age_weight_proportions_df.equals( expected_output[ 'sex_age_weight_proportions' ] ) - assert eval_length_sex_age_weight_proportions_df.equals( expected_output[ 'length_sex_age_weight_proportions' ] ) \ No newline at end of file + assert eval_length_sex_age_weight_proportions_df.equals( expected_output[ 'length_sex_age_weight_proportions' ] ) + +def test_nasc_to_biomass_conversion( mock_survey ): + + #### Pull in mock Survey object + objS = mock_survey + + ### Initialize various attributes + objS.acoustics[ 'sigma_bs' ] = { } + objS.statistics[ 'length_weight' ] = { } + objS.biology[ 'weight' ] = { } + objS.biology[ 'population' ] = { } + + ### Create mock data for `age_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ] = { } + objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `age_weight_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `sex_age_weight_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) + + ### Create mock data for 'length_weight_df' + objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Create mock data for `weight_strata_df` + objS.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } + ) + + ### Create mock data for `strata_mean` (sigma_bs) + objS.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'species_id': np.repeat( 8675309 , 2 ) , + 'sigma_bs_mean': 1.630277e-8 + } + ) + + ### Create mock data for `nasc_df` + objS.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Create mock data for `strata_df` + objS.spatial[ 'strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'haul_num': [ 1 , 2 ] , + 'fraction_hake': [ 1.000 , 0.500 ] + } + ) + + ### Evaluate object for later comparison + objS.nasc_to_biomass_conversion( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = { + 'areal_density': { + 'number_density': tuple( [ 32 , 10 ] ) , + 'biomass_density': tuple( [ 32 , 10 ] ) + } , + 'abundance': { + 'abundance': tuple( [ 32 , 12 ] ) + } , + 'biomass': { + 'biomass': tuple( [ 32 , 10 ] ) , + 'biomass_age': tuple( [ 24 , 8 ] ) + } + } + # ----- Expected output + expected_output = { + 'areal_density': { + 'number_density': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'rho_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 4.88e7 , 4.88e7 , 1.99e7 , 1.99e7 , 2.90e7 , 2.90e7 , 0.0 , 0.0 , + 2.44e8 , 2.44e8 , 1.45e8 , 1.45e8 , 9.94e7 , 9.94e7 , 0.0 , 0.0 , + 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'rho_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 4.88e7 , 0.0 , 1.99e7 , 0.0 , 2.89e7 , 0.0 , 0.0 , 0.0 , + 2.44e8 , 0.0 , 1.45e8 , 0.0 , 9.94e7 , 0.0 , 0.0 , 0.0 , + 2.44e9 , 0.0 , 1.45e9 , 0.0 , 9.94e8 , 0.0 , 0.0] ] ) , + } ) , + 'biomass_density': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'B_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 1.50e8 , 1.50e8 , 1.32e8 , 1.32e8 , 1.37e8 , 1.37e8 , 0.0 , 0.0 , + 6.35e8 , 6.35e8 , 9.11e8 , 9.11e8 , 2.69e8 , 2.69e8 , 0.0 , 0.0 , + 6.35e9 , 6.35e9 , 9.11e9 , 9.11e9 , 2.69e9 , 2.69e9 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'B_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 1.5e8 , 0.0 , 1.32e8 , 0.0 , 1.36e8 , 0.0 , 0.0 , 0.0 , + 6.35e8 , 0.0 , 9.11e8 , 0.0 , 2.69e8 , 0.0 , 0.0 , 0.0 , + 6.35e9 , 0.0 , 9.11e9 , 0.0 , 2.69e9 , 0.0 , 0.0] ] ) , + } ) , + } , + 'abundance': { + 'abundance': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 16 ) , + np.repeat( 1e3 , 8 ) ] ) , + 'NASC_no_age1': np.concatenate( [ np.repeat( 0 , 8 ) , + np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 8 ) , + np.repeat( 1e3 , 8 ) ] ) , + 'N': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 4.88e8 , 4.88e8 , 1.99e8 , 1.99e8 , 2.90e8 , 2.90e8 , 0.0 , 0.0 , + 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 , + 2.42e10 , 2.42e10 , 1.43e10 , 1.43e10 , 9.84e9 , 9.84e9 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'N_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 4.88e8 , 0.0 , 1.99e8 , 0.0 , 2.90e8, 0.0 , 0.0 , 0.0 , + 2.44e9 , 0.0 , 1.45e9 , 0.0 , 9.94e8 , 0.0 , 0.0 , 0.0 , + 2.42e10 , 0.0 , 1.43e10 , 0.0 , 9.84e9 , 0.0 , 0.0] ] ) , + } ) , + } , + 'biomass': { + 'biomass': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'B': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 1.50e9 , 1.50e9 , 1.32e9 , 1.32e9 , 1.37e9 , 1.37e9 , 0.0 , 0.0 , + 6.35e9 , 6.35e9 , 9.11e9 , 9.11e9 , 2.69e9 , 2.69e9 , 0.0 , 0.0 , + 6.29e10 , 6.29e10 , 9.02e10 , 9.02e10 , 2.67e10 , 2.67e10 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'B_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 1.50e9 , 0.0 , 1.32e9 , 0.0 , 1.37e9 , 0.0 , 0.0 , 0.0 , + 6.35e9 , 0.0 , 9.11e9 , 0.0 , 2.69e9 , 0.0 , 0.0 , 0.0 , + 6.29e10 , 0.0 , 9.02e10 , 0.0 , 2.67e10 , 0.0 , 0.0] ] ) , + } ) , + 'biomass_age': pd.DataFrame( { + 'transect_num': np.tile( [ 1 , 2 ] , 12 ).astype( np.int64 ) , + 'latitude': np.concatenate( [ np.tile( [ 20.0 , 30.0 ] , 6 ) , + np.tile( [ 40.0 , 50.0 ] , 6 ) ] ) , + 'longitude': np.concatenate( [ np.tile( [ -180.0 , -120.0 ] , 6 ) , + np.tile( [ -170.0 , -110.0 ] , 6 ) ] ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 2 , 2 ] , 6 ).astype( np.int64 ) , + 'sex': np.concatenate( [ np.repeat( [ 'all' , 'male' , 'female' ] , 4 ) , + np.repeat( [ 'all' , 'male' , 'female' ] , 4 ) ] ) , + 'age_proportion': np.tile( [ 0.0 , 0.0 , 1.0 , 1.0 ] , 6 ) , + 'B_age': np.concatenate( [ np.repeat( 0.0 , 3 ) , [ 1.50e9 ] , + np.repeat( 0.0 , 3 ) , [ 1.32e9 ] , + np.repeat( 0.0 , 3 ) , [ 1.37e9 ] , + np.repeat( 0.0 , 2 ) , [ 6.35e9 ] , + [ 6.29e10 , 0.00 , 0.00 , 9.11e9 , 9.02e10 , + 0.00 , 0.00 , 2.69e9 , 2.67e10 ] ] ) , + } ) , + } + } + + #---------------------------------- + ### Run tests: `test_nasc_to_biomass_conversion` + #---------------------------------- + eval_number_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'number_density_df' ] + eval_biomass_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'biomass_density_df' ] + eval_abundance_df = objS.biology[ 'population' ][ 'abundance' ][ 'abundance_df' ] + eval_biomass_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_df' ] + eval_biomass_age_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_age_df' ] + ### Check shape + assert eval_number_density_df.shape == expected_dimensions[ 'areal_density' ][ 'number_density' ] + assert eval_biomass_density_df.shape == expected_dimensions[ 'areal_density' ][ 'biomass_density' ] + assert eval_abundance_df.shape == expected_dimensions[ 'abundance' ][ 'abundance' ] + assert eval_biomass_df.shape == expected_dimensions[ 'biomass' ][ 'biomass' ] + assert eval_biomass_age_df.shape == expected_dimensions[ 'biomass' ][ 'biomass_age' ] + ### Check datatypes + assert np.all( eval_number_density_df.dtypes == expected_output[ 'areal_density' ][ 'number_density' ].dtypes ) + assert np.all( eval_biomass_density_df.dtypes == expected_output[ 'areal_density' ][ 'biomass_density' ].dtypes ) + assert np.all( eval_abundance_df.dtypes == expected_output[ 'abundance' ][ 'abundance' ].dtypes ) + assert np.all( eval_biomass_df.dtypes == expected_output[ 'biomass' ][ 'biomass' ].dtypes ) + assert np.all( eval_biomass_age_df.dtypes == expected_output[ 'biomass' ][ 'biomass_age' ].dtypes ) + ### Check dataframe equality + assert np.all( eval_number_density_df.sex == expected_output[ 'areal_density' ][ 'number_density' ].sex ) + assert np.allclose( eval_number_density_df[ [ 'rho_a' , 'rho_a_adult' ] ] , + expected_output[ 'areal_density' ][ 'number_density' ][ [ 'rho_a' , 'rho_a_adult' ] ] , + rtol = 1e-1 ) + assert np.all( eval_biomass_density_df.sex == expected_output[ 'areal_density' ][ 'biomass_density' ].sex ) + assert np.allclose( eval_biomass_density_df[ [ 'B_a' , 'B_a_adult' ] ] , + expected_output[ 'areal_density' ][ 'biomass_density' ][ [ 'B_a' , 'B_a_adult' ] ] , + rtol = 1e-1 ) + assert np.all( eval_abundance_df.sex == expected_output[ 'abundance' ][ 'abundance' ].sex ) + assert np.allclose( eval_abundance_df[ [ 'N' , 'N_adult' ] ] , + expected_output[ 'abundance' ][ 'abundance' ][ [ 'N' , 'N_adult' ] ] , + rtol = 1e-1 ) + assert np.all( eval_biomass_df.sex == expected_output[ 'biomass' ][ 'biomass' ].sex ) + assert np.allclose( eval_biomass_df[ [ 'B' , 'B_adult' ] ] , + expected_output[ 'biomass' ][ 'biomass' ][ [ 'B' , 'B_adult' ] ] , + rtol = 1e-1 ) + assert np.all( eval_biomass_age_df.sex == expected_output[ 'biomass' ][ 'biomass_age' ].sex ) + assert np.allclose( eval_biomass_age_df[ [ 'B_age' ] ] , + expected_output[ 'biomass' ][ 'biomass_age' ][ [ 'B_age' ] ] , + rtol = 1e-1 ) + + \ No newline at end of file From eb535ae976ecd3f0cf6d916f6e52f9a873cbc0ff Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 15:11:55 -0700 Subject: [PATCH 08/35] `test_index_transect_age_sex_proportions` --- echopop/survey.py | 2 +- echopop/tests/test_transect_functions.py | 164 +++++++++++++++++++++++ 2 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 echopop/tests/test_transect_functions.py diff --git a/echopop/survey.py b/echopop/survey.py index 7bc05788..dd47d493 100644 --- a/echopop/survey.py +++ b/echopop/survey.py @@ -910,7 +910,7 @@ def strata_age_binned_weight_proportions( self , weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) .assign( weight_length_sex_proportion_all = lambda x: x[ 'count' ] / x.weight_total_all , weight_length_sex_proportion_adult = lambda x: x[ 'count' ] / x.weight_total_adult ) - .fillna( 0 ) + .replace( np.nan , 0 ) ) ### Add these dataframes to the appropriate data attribute diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py new file mode 100644 index 00000000..ac35c4c5 --- /dev/null +++ b/echopop/tests/test_transect_functions.py @@ -0,0 +1,164 @@ +import pandas as pd +import numpy as np +import copy +from echopop.survey import Survey +from echopop.computation.biology import index_transect_age_sex_proportions + +def test_index_transect_age_sex_proportions( mock_survey ): + + #### Pull in mock Survey object + objS = mock_survey + + ### Initialize various attributes + objS.acoustics[ 'sigma_bs' ] = { } + objS.statistics[ 'length_weight' ] = { } + objS.biology[ 'weight' ] = { } + objS.biology[ 'population' ] = { } + + ### Create mock data for `age_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ] = { } + objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `age_weight_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `sex_age_weight_proportions_df` + objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) + + ### Create mock data for 'length_weight_df' + objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Create mock data for `weight_strata_df` + objS.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } + ) + + ### Create mock data for `strata_mean` (sigma_bs) + objS.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'species_id': np.repeat( 8675309 , 2 ) , + 'sigma_bs_mean': 1.630277e-8 + } + ) + + ### Create mock data for `nasc_df` + objS.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Create mock data for `strata_df` + objS.spatial[ 'strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'haul_num': [ 1 , 2 ] , + 'fraction_hake': [ 1.000 , 0.500 ] + } + ) + + ### Bundle the mocked data into their respective inputs for `index_transect_age_sex_proportions` + test_acoustics_dict = copy.deepcopy( objS.acoustics ) + test_biology_dict = copy.deepcopy( objS.biology ) + test_info_strata = objS.spatial[ 'strata_df' ].copy( ) + + ### Evaluate object for later comparison + eval_nasc_fraction_total_df = index_transect_age_sex_proportions( test_acoustics_dict , + test_biology_dict , + test_info_strata ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 8 , 24 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 2 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 2 ) , + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 2 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , + 'haul_num': np.repeat( [ 1 , 2 ] , 4 ).astype( np.int64 ) , + 'interval': np.repeat( [ 10.0 , 10.0 , 10.0 , 9.9 ] , 2 ) , + 'interval_area': np.repeat( [ 10.0 , 10.0 , 10.0 , 9.9 ] , 2 ) , + 'NASC_all_ages': np.repeat( [ 1e1 , 1e2 , 1e2 , 1e3 ] , 2 ) , + 'NASC_no_age1': np.repeat( [ 0.0 , 1e1 , 1e2 , 1e3 ] , 2 ) , + 'fraction_hake': np.repeat( [ 1.0 , 0.5 ] , 4 ) , + 'species_id': np.repeat( 8675309 , 8 ).astype( np.int32 ) , + 'sigma_bs_mean': np.repeat( 1.630277e-8 , 8 ) , + 'proportion_female': np.repeat( [ 0.592593 , 0.407407 ] , 4 ) , + 'proportion_male': np.repeat( [ 0.407407 , 0.592593 ] , 4 ) , + 'proportion_station_1': np.repeat( 0.925926 , 8 ) , + 'proportion_station_2': np.repeat( 0.074074 , 8 ) , + 'average_weight_female': np.repeat( [ 4.719110 , 2.707892 ] , 4 ) , + 'average_weight_male': np.repeat( [ 6.640487 , 6.299942 ] , 4 ) , + 'average_weight_total': np.repeat( [ 3.066481 , 2.603519 ] , 4 ) , + 'age': np.tile( [ 1 , 2 ] , 4 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 8 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) , + 'weight_age_proportion_all': np.repeat( 0.5 , 8 ) , + 'weight_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) + } + ) + + #---------------------------------- + ### Run tests: `test_index_transect_age_sex_proportions` + #---------------------------------- + ### Check shape + assert eval_nasc_fraction_total_df.shape == expected_dimensions + ### Check datatypes + assert np.all( eval_nasc_fraction_total_df.dtypes == expected_output.dtypes ) + ### Dataframe equality + assert np.allclose( eval_nasc_fraction_total_df , expected_output , rtol = 1e-1 ) + + \ No newline at end of file From 818fd4df39ac0a6073cf878df6bd50d68c3b8fd2 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 15:20:00 -0700 Subject: [PATCH 09/35] `test_correct_transect_intervals` --- echopop/tests/test_transect_functions.py | 54 +++++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index ac35c4c5..23f31687 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -3,6 +3,7 @@ import copy from echopop.survey import Survey from echopop.computation.biology import index_transect_age_sex_proportions +from echopop.computation.spatial import correct_transect_intervals def test_index_transect_age_sex_proportions( mock_survey ): @@ -152,7 +153,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) #---------------------------------- - ### Run tests: `test_index_transect_age_sex_proportions` + ### Run tests: `index_transect_age_sex_proportions` #---------------------------------- ### Check shape assert eval_nasc_fraction_total_df.shape == expected_dimensions @@ -161,4 +162,53 @@ def test_index_transect_age_sex_proportions( mock_survey ): ### Dataframe equality assert np.allclose( eval_nasc_fraction_total_df , expected_output , rtol = 1e-1 ) - \ No newline at end of file +def test_correct_transect_intervals( ): + + ### Create mock data for `nasc_df` + test_nasc_dataframe = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Evaluate object for later comparison + eval_nasc_interval = correct_transect_intervals( test_nasc_dataframe ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 4 , 9 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'interval': [ 10.0 , 10.0 , 10.0 , 9.9 ] , + 'interval_area': [ 10.0 , 10.0 , 10.0 , 9.9 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + } + ) + + #---------------------------------- + ### Run tests: `correct_transect_intervals` + #---------------------------------- + ### Check shape + assert eval_nasc_interval.shape == expected_dimensions + ### Check datatypes + assert np.all( eval_nasc_interval.dtypes == expected_output.dtypes ) + ### Dataframe equality + assert np.allclose( eval_nasc_interval , expected_output ) \ No newline at end of file From b307f644e4b76d702743cd2f20ca3b431becdc86 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 15:26:35 -0700 Subject: [PATCH 10/35] `test_calculate_start_end_coordinates` --- echopop/tests/test_transect_functions.py | 51 +++++++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 23f31687..21038f14 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -167,7 +167,7 @@ def test_correct_transect_intervals( ): ### Create mock data for `nasc_df` test_nasc_dataframe = pd.DataFrame( { - 'transect_num': [ 1 , 2 , 3 , 4] , + 'transect_num': [ 1 , 2 , 3 , 4 ] , 'stratum_num': [ 0 , 0 , 1 , 1 ] , 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , @@ -211,4 +211,51 @@ def test_correct_transect_intervals( ): ### Check datatypes assert np.all( eval_nasc_interval.dtypes == expected_output.dtypes ) ### Dataframe equality - assert np.allclose( eval_nasc_interval , expected_output ) \ No newline at end of file + assert np.allclose( eval_nasc_interval , expected_output ) + +def test_calculate_start_end_coordinates( ): + + ### Create mock data for `nasc_df` + test_nasc_df = pd.DataFrame( + { + 'transect_num': [ 1 , 1 , 2 , 2 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Evaluate for later comparison + eval_test_nasc_df = calculate_start_end_coordinates( test_nasc_df , + 'transect_num' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 2 , 4 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': [ 1 , 2 ] , + 'minimum_longitude': [ -180.0 , -170.0 ] , + 'maximum_longitude': [ -120.0 , -110.0 ] , + 'center_latitude': [ 25.0 , 45.0 ] + } + ) + + #---------------------------------- + ### Run tests: `calculate_start_end_coordinates` + #---------------------------------- + ### Check shape + assert eval_test_nasc_df.shape == expected_dimensions + ### Check datatypes + assert np.all( eval_test_nasc_df.dtypes == expected_output.dtypes ) + ### Dataframe equality + assert eval_test_nasc_df.equals( expected_output ) \ No newline at end of file From dbaa216ea305012fed9863f8be7a1fb7c45db6df Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 15:30:02 -0700 Subject: [PATCH 11/35] `test_calculate_transect_distance` --- echopop/tests/test_transect_functions.py | 54 +++++++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 21038f14..9c07162e 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -3,7 +3,7 @@ import copy from echopop.survey import Survey from echopop.computation.biology import index_transect_age_sex_proportions -from echopop.computation.spatial import correct_transect_intervals +from echopop.computation.spatial import correct_transect_intervals , calculate_start_end_coordinates , calculate_transect_distance def test_index_transect_age_sex_proportions( mock_survey ): @@ -258,4 +258,54 @@ def test_calculate_start_end_coordinates( ): ### Check datatypes assert np.all( eval_test_nasc_df.dtypes == expected_output.dtypes ) ### Dataframe equality - assert eval_test_nasc_df.equals( expected_output ) \ No newline at end of file + assert eval_test_nasc_df.equals( expected_output ) + +def test_calculate_transect_distance( ): + + ### Create mock data for `nasc_df` + test_nasc_df = pd.DataFrame( + { + 'transect_num': [ 1 , 1 , 2 , 2 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 2.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Evaluate for later comparison + eval_test_nasc_df = calculate_transect_distance( test_nasc_df , + 'transect_num' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 2 , 7 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': [ 1 , 2 ] , + 'minimum_longitude': [ -180.0 , -170.0 ] , + 'maximum_longitude': [ -120.0 , -110.0 ] , + 'center_latitude': [ 25.0 , 45.0 ] , + 'transect_distance': [ 3241.273891 , 2493.203304 ] , + 'transect_spacing': [ 2.0 , 2.0 ] , + 'transect_area': [ 6482.547781 , 4986.406609 ] + } + ) + + #---------------------------------- + ### Run tests: `calculate_start_end_coordinates` + #---------------------------------- + ### Check shape + assert eval_test_nasc_df.shape == expected_dimensions + ### Check datatypes + assert np.all( eval_test_nasc_df.dtypes == expected_output.dtypes ) + ### Dataframe equality + assert np.allclose( eval_test_nasc_df , expected_output ) \ No newline at end of file From 236e58d57f94cb4231740aaa3eb562493ce7ab1e Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 17:05:04 -0700 Subject: [PATCH 12/35] `test_stratified_transect_statistic` --- echopop/tests/test_stratified_summary.py | 191 +++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 echopop/tests/test_stratified_summary.py diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py new file mode 100644 index 00000000..e96d0acc --- /dev/null +++ b/echopop/tests/test_stratified_summary.py @@ -0,0 +1,191 @@ +import numpy as np +import pandas as pd +from echopop.tests.utility_testing_functions import dictionary_shape_equal +from echopop.computation.statistics import stratified_transect_statistic + +def test_stratified_transect_statistic( ): + + ### Create mock data for `transect_summary` + test_transect_summary = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'minimum_longitude': [ -5.0 , -3.0 , -1.0 , 1.0 ] , + 'maxmum_longitude': [ -2.0 , 5.0 , 3.0 , 7.0 ] , + 'center_latitude': [ 10.0 , 11.0 , 12.5 , 13.5 ] , + 'transect_distance': [ 177.600950 , 472.070493 , 234.766275 , 350.736855 ] , + 'transect_spacing': [ 2.0 , 2.0 , 2.0 , 2.0 ] , + 'transect_area': [ 355.201900 , 944.140986 , 469.532550 , 701.473710 ] , + 'B_adult': [ 1e2 , 1e3 , 1e5 , 1e4 ] , + 'stratum_inpfc': [ 1 , 1 , 2 , 2 ] + } + ) + + ### Create mock data for `strata_summary` + test_strata_summary = pd.DataFrame( + { + 'stratum_inpfc': [ 1 , 2 ] , + 'num_transects': [ 2 , 2 ] , + 'total_transect_area': [ 1299.342886 , 1171.006260 ] , + } + ) + + ### Evaluate for later comparison + # ---- Replicates == 1 + # ---- Transect sample proportion == 100% + test_transect_sample = 1.0 + test_transect_replicates = 1 + eval_single_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + # ---- Replicates == 10 + # ---- Transect sample proportion == 100% + test_transect_sample = 1.0 + test_transect_replicates = 10 + eval_single_rep_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + # ---- Replicates == 1 + # ---- Transect sample proportion == 50% + test_transect_sample = 0.5 + test_transect_replicates = 1 + np.random.seed( 10 ) + eval_single_sub_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + # ---- Replicates == 1 + # ---- Transect sample proportion == 50% + test_transect_sample = 0.5 + test_transect_replicates = 10 + np.random.seed( 1800 ) + eval_single_sub_rep_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + expected_output = { + 'biomass': { + 'mean': { + 'estimate': 1 , + 'confidence_interval': np.array( [ 1 , 1 ] ) , + } , + 'variance': { + 'estimate': 1 , + 'confidence_interval': np.array( [ 1 , 1 ] ) , + } , + 'CV': { + 'estimate': 1 , + 'confidence_interval': np.array( [ 1 , 1 ] ) , + } , + } + } + + #---------------------------------- + ### Run tests: `stratified_transect_statistic` + #---------------------------------- + ### Dictionary structure + # !!! TODO: based on the original data structure -- will need to be updated once the core data structure is also updated + # ---- Check attributes + assert set( eval_single_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) + assert set( eval_single_rep_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) + assert set( eval_single_sub_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) + assert set( eval_single_sub_rep_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) + # ---- Check sub-directory keys and structure + assert dictionary_shape_equal( eval_single_stratified_results , expected_output ) + assert dictionary_shape_equal( eval_single_rep_stratified_results , expected_output ) + assert dictionary_shape_equal( eval_single_sub_stratified_results , expected_output ) + assert dictionary_shape_equal( eval_single_sub_rep_stratified_results , expected_output ) + ### Data outputs + # ++++ mean + # ---- > estimate + assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , + 54947653.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , + 54947653.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , + 117230560.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , + 54463985.0 , + rtol = 1e-2 ) + # ---- > confidence interval + assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , + np.array( [ 54947653.28 , 54947653.28 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , + np.array( [ 54947653.28 , 54947653.28 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , + np.array( [ 1.17e8 , 1.172e8 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , + np.array( [ -4.69e7 , 1.57e8 ] ) , + rtol = 1e-2 ) + # ++++ variance + assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , + 54846534.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , + 54846534.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , + 116601900.0 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , + 53662832.0 , + rtol = 1e-2 ) + # ---- > confidence interval + assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , + np.array( [ 54846534.0 , 54846534.0 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , + np.array( [ 54846534.0 , 54846534.0 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , + np.array( [ 1.17e8 , 1.17e8] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , + np.array( [ -4.71e7 , 1.53e8 ] ) , + rtol = 1e-2 ) + # ++++ CV + assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , + 0.998 , + rtol = 1e-2 ) + assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , + 0.998 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , + 0.995 , + rtol = 1e-2 ) + assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , + 0.971 , + rtol = 1e-2 ) + # ---- > confidence interval + assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , + np.array( [ 0.998 , 0.998 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , + np.array( [ 0.998 , 0.998 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , + np.array( [ 0.995 , 0.995 ] ) , + rtol = 1e-2 ) + assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , + np.array( [ 0.904 , 1.038 ] ) , + rtol = 1e-2 ) + + + From 5db231e0498230ff95a93a5350abf4a3d8f0a99d Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 17:12:03 -0700 Subject: [PATCH 13/35] `test_confidence_interval` --- echopop/tests/test_stratified_summary.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py index e96d0acc..31709504 100644 --- a/echopop/tests/test_stratified_summary.py +++ b/echopop/tests/test_stratified_summary.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd from echopop.tests.utility_testing_functions import dictionary_shape_equal -from echopop.computation.statistics import stratified_transect_statistic +from echopop.computation.statistics import stratified_transect_statistic , confidence_interval def test_stratified_transect_statistic( ): @@ -187,5 +187,24 @@ def test_stratified_transect_statistic( ): np.array( [ 0.904 , 1.038 ] ) , rtol = 1e-2 ) +def test_confidence_interval( ): + ### Mock values + test_values = [ 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 4.0 , 3.0 , 2.0 , 1.0 ] + + ### Evaluate for comparison later + eval_ci_values = confidence_interval( test_values ) + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 2 , ] ) + # ---- Expected output + expected_output = np.array( [ 0.201 , 5.355 ] ) + + #---------------------------------- + ### Run tests: `confidence_interval` + #---------------------------------- + assert np.allclose( eval_ci_values , expected_output , rtol = 1e-2 ) + From 0282d62de98f9c3c5f4432d150eaee316b4ebd18 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 17:23:38 -0700 Subject: [PATCH 14/35] `test_bin_variable` --- echopop/tests/test_operations.py | 47 ++++++++++++++++++++++++ echopop/tests/test_stratified_summary.py | 3 ++ 2 files changed, 50 insertions(+) create mode 100644 echopop/tests/test_operations.py diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py new file mode 100644 index 00000000..eb2338ed --- /dev/null +++ b/echopop/tests/test_operations.py @@ -0,0 +1,47 @@ +import numpy as np +import pandas as pd +from echopop.computation.operations import bin_variable , bin_stats , count_variable , meld , stretch , group_merge + +def test_bin_variable( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' ] , + 'length': [ 2.0 , 4.0 , 8.0 ] , + } , + ) + + ### Mock bin_values + test_bin_values = np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.bin_variable( test_bin_values , 'length' ) + # ---- Normal function + eval_dataframe_function = bin_variable( test_dataframe , test_bin_values , 'length' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 3 , 3 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' ] , + 'length': [ 2.0 , 4.0 , 8.0 ] , + 'length_bin': pd.cut( [ 2.0 , 4.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + } , + ) + + #---------------------------------- + ### Run tests: `bin_variable` + #---------------------------------- + ### Check shape + assert eval_dataframe_monkey.shape == expected_dimensions + assert eval_dataframe_function.shape == expected_dimensions + ### Check output + assert eval_dataframe_monkey.equals( expected_output ) + assert eval_dataframe_function.equals( expected_output ) \ No newline at end of file diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py index 31709504..f9185128 100644 --- a/echopop/tests/test_stratified_summary.py +++ b/echopop/tests/test_stratified_summary.py @@ -206,5 +206,8 @@ def test_confidence_interval( ): #---------------------------------- ### Run tests: `confidence_interval` #---------------------------------- + ### Check shape + assert eval_ci_values.shape == expected_dimensions + ### Check output assert np.allclose( eval_ci_values , expected_output , rtol = 1e-2 ) From c2a1d964cb824c299a57201530df4ecddbca651a Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 17:40:38 -0700 Subject: [PATCH 15/35] `test_bin_stats` --- echopop/tests/test_operations.py | 104 ++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index eb2338ed..7f7f7714 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -44,4 +44,106 @@ def test_bin_variable( ): assert eval_dataframe_function.shape == expected_dimensions ### Check output assert eval_dataframe_monkey.equals( expected_output ) - assert eval_dataframe_function.equals( expected_output ) \ No newline at end of file + assert eval_dataframe_function.equals( expected_output ) + +def test_bin_stats( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' ] , + 'length': [ 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 ] , + 'weight': [ 100.0 , 200.0 , 300.0 , 300.0 , 200.0 , 100.0 ] , + 'location': [ 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'lost city of z' , 'lost city of z' , 'lost city of z' ] , + } , + ) + + ### Mock bin_values + test_bin_values = np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) + + ### Evaluate for later comparison + # ++++ No contrast | length + weight + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_lwnc = test_dataframe.bin_stats( 'length' , test_bin_values ) + # ---- Normal function + eval_dataframe_function_lwnc = bin_stats( test_dataframe , 'length' , test_bin_values ) + # ++++ No contrast | length + eval_dataframe_monkey_lnc = test_dataframe.bin_stats( 'length' , test_bin_values , variables = 'length' ) + # ---- Normal function + eval_dataframe_function_lnc = bin_stats( test_dataframe , 'length' , test_bin_values , variables = 'length' ) + # ++++ No contrast | length ~ function: just mean + eval_dataframe_monkey_lncm = test_dataframe.bin_stats( 'length' , test_bin_values , variables = 'length' , functions = [ 'mean' ] ) + # ---- Normal function + eval_dataframe_function_lncm = bin_stats( test_dataframe , 'length' , test_bin_values , variables = 'length' , functions = [ 'mean' ] ) + # ++++ No contrast | length ~ function: just mean + eval_dataframe_monkey_lwc = test_dataframe.bin_stats( 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) + # ---- Normal function + eval_dataframe_function_lwc = bin_stats( test_dataframe , 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions_lwnc = tuple( [ 4 , 5 ] ) + expected_dimensions_lnc = tuple( [ 4 , 3 ] ) + expected_dimensions_lncm = tuple( [ 4 , 2 ] ) + expected_dimensions_lwc = tuple( [ 8 , 4 ] ) + # ---- Expected output + expected_output_lwnc = pd.DataFrame( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , + 'n_weight': [ 2 , 1 , 2 , 1 ] , + } , + ) + expected_output_lnc = pd.DataFrame( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + } , + ) + expected_output_lncm = pd.DataFrame( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] + } , + ) + expected_output_lwc = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , + 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , + 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + } , + ) + + #---------------------------------- + ### Run tests: `bin_stats` + #---------------------------------- + ### Check shape + assert eval_dataframe_monkey_lwnc.shape == expected_dimensions_lwnc + assert eval_dataframe_function_lwnc.shape == expected_dimensions_lwnc + assert eval_dataframe_monkey_lnc.shape == expected_dimensions_lnc + assert eval_dataframe_function_lnc.shape == expected_dimensions_lnc + assert eval_dataframe_monkey_lncm.shape == expected_dimensions_lncm + assert eval_dataframe_function_lncm.shape == expected_dimensions_lncm + assert eval_dataframe_monkey_lwc.shape == expected_dimensions_lwc + assert eval_dataframe_function_lwc.shape == expected_dimensions_lwc + ### Check output + assert eval_dataframe_monkey_lwnc.equals( expected_output_lwnc ) + assert eval_dataframe_function_lwnc.equals( expected_output_lwnc ) + assert eval_dataframe_monkey_lnc.equals( expected_output_lnc ) + assert eval_dataframe_function_lnc.equals( expected_output_lnc ) + assert eval_dataframe_monkey_lncm.equals( expected_output_lncm ) + assert eval_dataframe_function_lncm.equals( expected_output_lncm ) + assert eval_dataframe_monkey_lwc.equals( expected_output_lwc ) + assert eval_dataframe_function_lwc.equals( expected_output_lwc ) \ No newline at end of file From 7514429d76e29e10cf7e30fa0cb2407d881b6acb Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 17:53:52 -0700 Subject: [PATCH 16/35] `test_count_variable` --- echopop/tests/test_operations.py | 54 +++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 7f7f7714..96cc884e 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -146,4 +146,56 @@ def test_bin_stats( ): assert eval_dataframe_monkey_lncm.equals( expected_output_lncm ) assert eval_dataframe_function_lncm.equals( expected_output_lncm ) assert eval_dataframe_monkey_lwc.equals( expected_output_lwc ) - assert eval_dataframe_function_lwc.equals( expected_output_lwc ) \ No newline at end of file + assert eval_dataframe_function_lwc.equals( expected_output_lwc ) + +def test_count_variable( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' , + 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' ] , + 'length': [ 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 , + 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 ] , + 'location': [ 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'lost city of z' , 'lost city of z' , 'lost city of z' , + 'lost city of z' , 'lost city of z' , 'lost city of z' ] , + 'length_count': [ 10 , 20 , 30 , 40 , 50 , 60 , + 60 , 50 , 40 , 30 , 20 , 10 ] , + } , + ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.count_variable( [ 'location' , 'animal' ] , 'length_count' , 'sum' ) + # ---- Normal function + eval_dataframe_function = count_variable( test_dataframe , [ 'location' , 'animal' ] , 'length_count' , 'sum' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 12 , 3 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'location': np.repeat( [ 'lost city of z' , 'timbuktu' ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'count': [ 50 , 30 , 10 , 60 , 20 , 40 , + 20 , 40 , 60 , 10 , 50 , 30 ] , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + ### Check shape + assert eval_dataframe_monkey.shape == expected_dimensions + assert eval_dataframe_function.shape == expected_dimensions + ### Check dataframe equality + assert eval_dataframe_monkey.equals( expected_output ) + assert eval_dataframe_function.equals( expected_output ) \ No newline at end of file From e47bd6a9cd79169fc74290156c002f8644d99ded Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Fri, 5 Apr 2024 18:29:08 -0700 Subject: [PATCH 17/35] `test_meld` --- echopop/computation/operations.py | 2 +- echopop/tests/test_operations.py | 80 ++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/echopop/computation/operations.py b/echopop/computation/operations.py index badcf46a..a8b75284 100644 --- a/echopop/computation/operations.py +++ b/echopop/computation/operations.py @@ -154,7 +154,7 @@ def meld( specimen_dataframe: pd.DataFrame , # Concatenate the data frames and return return pd.concat( [ specimen_stacked , length_dataframe ] , - join = 'inner' ) + join = 'inner' ).reset_index( drop = True ) @patch_method_to_DataFrame( pd.DataFrame ) def stretch( dataframe , diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 96cc884e..59ec8209 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -198,4 +198,82 @@ def test_count_variable( ): assert eval_dataframe_function.shape == expected_dimensions ### Check dataframe equality assert eval_dataframe_monkey.equals( expected_output ) - assert eval_dataframe_function.equals( expected_output ) \ No newline at end of file + assert eval_dataframe_function.equals( expected_output ) + +def test_meld( ): + + ### Mock specimen dataframe + test_specimen_dataframe = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 12 ) , + 'species_id': np.tile( [ 'big blue bass' , 'pretty pink pony' , 'silly silver silkworm' ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 6 ) , + 'group': np.repeat( 'sexed' , 12 ) , + 'station': np.repeat( 'clouds' , 12 ) , + 'length': [ 5.0 , 4.0 , 6.0 , 5.0 , 5.0 , 4.0 , + 5.0 , 5.0 , 7.0 , 4.0 , 5.0 , 6.0 ] , + 'length_bin': pd.cut( [ 5.0 , 4.0 , 6.0 , 5.0 , 5.0 , 4.0 , + 5.0 , 5.0 , 7.0 , 4.0 , 5.0 , 6.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + } , + ) + + ### Mock length dataframe + test_length_dataframe = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 6 ) , + 'species_id': np.tile( [ 'big blue bass' , 'pretty pink pony' , 'silly silver silkworm' ] , 2 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 3 ) , + 'group': np.repeat( 'sexed' , 6 ) , + 'station': np.repeat( 'waves' , 6 ) , + 'length': [ 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + 'length_bin': pd.cut( [ 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'length_count': [ 10 , 20 , 30 , 30 , 20 , 10 ] , + } , + ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_specimen_dataframe.meld( test_length_dataframe ) + # ---- Normal function + eval_dataframe_function = meld( test_specimen_dataframe , test_length_dataframe ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 16 , 8 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 16 ) , + 'species_id': np.concatenate( [ np.repeat( 'big blue bass' , 3 ) , + np.repeat( 'pretty pink pony' , 3 ) , + np.repeat( 'silly silver silkworm' , 4 ) , + np.tile( [ 'big blue bass' , 'pretty pink pony' , + 'silly silver silkworm' ] , 2 ) ] ) , + 'sex': [ 'female' , 'female' , 'male' , 'female' , 'female' , 'male' , 'female' , 'female' , + 'male' , 'male' , 'male' , 'female' , 'male' , 'female' , 'male' , 'female' ] , + 'group': np.repeat( 'sexed' , 16 ) , + 'station': np.concatenate( [ np.repeat( 'clouds' , 10 ) , + np.repeat( 'waves' , 6 ) ] ) , + 'length': [ 4.0 , 5.0 , 5.0 , 4.0 , 5.0 , 5.0 , 4.0 , 6.0 , + 6.0 , 7.0 , 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + 'length_bin': pd.cut( [ 4.0 , 5.0 , 5.0 , 4.0 , 5.0 , 5.0 , 4.0 , 6.0 , + 6.0 , 7.0 , 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'length_count': [ 1 , 1 , 2 , 1 , 1 , 2 , 1 , 1 , 1 , 1 , + 10 , 20 , 30 , 30 , 20 , 10 ] , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + ### Check shape + assert eval_dataframe_monkey.shape == expected_dimensions + assert eval_dataframe_function.shape == expected_dimensions + ### Check output + assert np.all( eval_dataframe_monkey == expected_output ) + assert np.all( eval_dataframe_function == expected_output ) \ No newline at end of file From e2a027339ca7d2a8512c833848b51639f4850f63 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Sat, 6 Apr 2024 10:50:24 -0700 Subject: [PATCH 18/35] `test_stretch` --- echopop/tests/test_operations.py | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 59ec8209..21ee54be 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -268,6 +268,54 @@ def test_meld( ): } , ) + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + ### Check shape + assert eval_dataframe_monkey.shape == expected_dimensions + assert eval_dataframe_function.shape == expected_dimensions + ### Check output + assert np.all( eval_dataframe_monkey == expected_output ) + assert np.all( eval_dataframe_function == expected_output ) + +def test_stretch( ): + + ### Create mock dataframe + test_dataframe = pd.DataFrame( + { + 'stratum_num': [ 1 , 1 , 2 , 2 ] , + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'latitude': [ 0.0 , 1.0 , 3.0 , 4.0 ] , + 'longitude': [ -1.0 , 0.0 , 1.0 , 2.0 ] , + 'load_a_male': [ 5.0 , 4.0 , 2.0 , 1.0 ] , + 'load_a_female': [ 10.0 , 3.0 , 5.0 , 6.0 ] , + } , + ) + + ### Eval for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.stretch( variable = 'load_a' ) + # ---- Normal function + eval_dataframe_function = stretch( test_dataframe , variable = 'load_a' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 8 , 6 ] ) + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 2 ) , + 'latitude': np.repeat( [ 0.0 , 1.0 , 3.0 , 4.0 ] , 2 ) , + 'longitude': np.repeat( [ -1.0 , 0.0 , 1.0 , 2.0 ] , 2 ) , + 'stratum_num': np.repeat( [ 1 , 2 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'load_a': [ 5.0 , 10.0 , 4.0 , 3.0 , + 2.0 , 5.0 , 1.0 , 6.0 ] , + } , + ) + #---------------------------------- ### Run tests: `count_variable` #---------------------------------- From 56fddcb1d7d1ca8d350ca86a8b32d5c0e8c23265 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Sat, 6 Apr 2024 11:28:33 -0700 Subject: [PATCH 19/35] `test_group_merge` --- echopop/tests/test_operations.py | 121 ++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 21ee54be..34851bd8 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -324,4 +324,123 @@ def test_stretch( ): assert eval_dataframe_function.shape == expected_dimensions ### Check output assert np.all( eval_dataframe_monkey == expected_output ) - assert np.all( eval_dataframe_function == expected_output ) \ No newline at end of file + assert np.all( eval_dataframe_function == expected_output ) + +def test_group_merge( ): + + ### Create mock dataframe 1 + test_dataframe_a = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + + } , + ) + + ### Create mock dataframe 2 + test_dataframe_b = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + + } , + ) + + ### Create mock dataframe 3 + test_dataframe_c = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 2 ) , + 'group': np.tile( [ 'sleepy' , 'alert' ] , 2 ) , + 'categorical_metric': np.tile( [ 'zippity' , 'doo' ] , 2 ) , + } , + ) + + ### Evaluate for later comparison + # ++++ Drop NA + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_dropna = test_dataframe_a.group_merge( [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = True ) + # ---- Normal function + eval_dataframe_function_dropna = group_merge( test_dataframe_a , + [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = True ) + # ++++ Don't drop NA + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_keepna = test_dataframe_a.group_merge( [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = False ) + # ---- Normal function + eval_dataframe_function_keepna = group_merge( test_dataframe_a , + [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = False ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions_dropna = tuple( [ 12 , 6 ] ) + expected_dimensions_keepna = tuple( [ 14 , 6 ] ) + # ---- Expected output + expected_output_dropna = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + } , + ) + expected_output_keepna = pd.DataFrame( + { + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 6 ) , [ 1 , 2 ] ] ) , + 'animal': np.concatenate( [ np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + np.repeat( np.nan , 2 ).astype( 'object' ) ] ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + np.nan , np.nan ] , + 'group': np.concatenate( [ np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + [ 'alert' , 'sleepy' ] ] ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , + np.nan , np.nan ] , + 'categorical_metric': np.concatenate( [ np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + [ 'doo' , 'zippity' ] ] ) , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + ### Check shape + # ++++ NaN removed + assert eval_dataframe_monkey_dropna.shape == expected_dimensions_dropna + assert eval_dataframe_function_dropna.shape == expected_dimensions_dropna + # ++++ NaN kept + assert eval_dataframe_monkey_keepna.shape == expected_dimensions_keepna + assert eval_dataframe_function_keepna.shape == expected_dimensions_keepna + ### Check output + # ++++ NaN removed + assert np.all( eval_dataframe_monkey_dropna == expected_output_dropna ) + assert np.all( eval_dataframe_function_dropna == expected_output_dropna ) + # ++++ NaN kept + assert eval_dataframe_function_keepna.equals( expected_output_keepna ) + assert eval_dataframe_function_keepna.equals( expected_output_keepna ) \ No newline at end of file From 647407ef35ae21cc8acce939531d687b2910b544 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Sat, 6 Apr 2024 11:45:57 -0700 Subject: [PATCH 20/35] Bug fix for missing ')' --- echopop/survey.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/echopop/survey.py b/echopop/survey.py index dd47d493..5180917b 100644 --- a/echopop/survey.py +++ b/echopop/survey.py @@ -1247,11 +1247,12 @@ def krige( self , { 'kriged_biomass_df': kriged_dataframe } + ) ### TODO: This should be refactored out as an external function ###### rather than a Survey-class method. ### Apportion biomass based on age and sex - self.apportion_kriged_biomass( species_id ) + # self.apportion_kriged_biomass( species_id ) def apportion_kriged_biomass( self , species_id ): From ba6bb225c863d1d5072fabc03b19428f740ab01b Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Sat, 6 Apr 2024 11:58:59 -0700 Subject: [PATCH 21/35] Fix `test_strata_sex_weight_proportions` failure --- echopop/tests/test_data_transect_analysis.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index 6c29834f..8cad23b4 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -117,7 +117,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Re-parameterize `specimen_df` with dummy data objS.biology[ 'specimen_df' ] = pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , 'group': np.repeat( 'sexed' , 8 ) , 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , @@ -131,7 +131,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Re-parameterize `length_df` with dummy data objS.biology[ 'length_df' ] = pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , 'group': np.repeat( 'sexed' , 8 ) , @@ -161,7 +161,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Re-parameterize `length_df` with dummy data objS.biology[ 'length_df' ] = pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , 'group': np.repeat( 'sexed' , 8 ) , 'species_id': np.repeat( [ 8675309 ] , 8 ) , @@ -200,7 +200,7 @@ def test_strata_sex_weight_proportions( mock_survey ): # ---- Expected output expected_output = pd.DataFrame( { - 'stratum_num': np.array( [ 0 , 1 ] ).astype( np.int32 ) , + 'stratum_num': np.array( [ 0 , 1 ] ).astype( int ) , 'proportion_female': [ 0.59 , 0.41 ] , 'proportion_male': [ 0.41 , 0.59 ] , 'proportion_station_1': [ 0.93 , 0.93 ] , From 88065739af8a200703f735d1c08b1b3fe621030d Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 09:27:18 -0700 Subject: [PATCH 22/35] Changes to `test_nasc_to_biomass_conversion` --- echopop/tests/test_data_transect_analysis.py | 58 ++++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index 8cad23b4..47c5dfe5 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -503,13 +503,13 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , - 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , - np.repeat( 1e2 , 16 ) , - np.repeat( 1e3 , 8 ) ] ) , 'NASC_no_age1': np.concatenate( [ np.repeat( 0 , 8 ) , np.repeat( 1e1 , 8 ) , np.repeat( 1e2 , 8 ) , np.repeat( 1e3 , 8 ) ] ) , + 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 16 ) , + np.repeat( 1e3 , 8 ) ] ) , 'N': np.concatenate( [ np.repeat( 0.0 , 8 ) , [ 4.88e8 , 4.88e8 , 1.99e8 , 1.99e8 , 2.90e8 , 2.90e8 , 0.0 , 0.0 , 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 , @@ -567,11 +567,33 @@ def test_nasc_to_biomass_conversion( mock_survey ): #---------------------------------- ### Run tests: `test_nasc_to_biomass_conversion` #---------------------------------- + # ---- Set evaluated dataframes from function eval_number_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'number_density_df' ] eval_biomass_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'biomass_density_df' ] eval_abundance_df = objS.biology[ 'population' ][ 'abundance' ][ 'abundance_df' ] eval_biomass_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_df' ] eval_biomass_age_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_age_df' ] + # ---- Extract expected dataframes to appropriately set indices + expected_number_density_df = ( + expected_output[ 'areal_density' ][ 'number_density' ] + .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'number_density' ].index ) ) ) + ) + expected_biomass_density_df = ( + expected_output[ 'areal_density' ][ 'biomass_density' ] + .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'biomass_density' ].index ) ) ) + ) + expected_abundance_df = ( + expected_output[ 'abundance' ][ 'abundance' ] + .set_index( pd.Index( list( expected_output[ 'abundance' ][ 'abundance' ].index ) ) ) + ) + expected_biomass_df = ( + expected_output[ 'biomass' ][ 'biomass' ] + .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass' ].index ) ) ) + ) + expected_biomass_age_df = ( + expected_output[ 'biomass' ][ 'biomass_age' ] + .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass_age' ].index ) ) ) + ) ### Check shape assert eval_number_density_df.shape == expected_dimensions[ 'areal_density' ][ 'number_density' ] assert eval_biomass_density_df.shape == expected_dimensions[ 'areal_density' ][ 'biomass_density' ] @@ -579,31 +601,31 @@ def test_nasc_to_biomass_conversion( mock_survey ): assert eval_biomass_df.shape == expected_dimensions[ 'biomass' ][ 'biomass' ] assert eval_biomass_age_df.shape == expected_dimensions[ 'biomass' ][ 'biomass_age' ] ### Check datatypes - assert np.all( eval_number_density_df.dtypes == expected_output[ 'areal_density' ][ 'number_density' ].dtypes ) - assert np.all( eval_biomass_density_df.dtypes == expected_output[ 'areal_density' ][ 'biomass_density' ].dtypes ) - assert np.all( eval_abundance_df.dtypes == expected_output[ 'abundance' ][ 'abundance' ].dtypes ) - assert np.all( eval_biomass_df.dtypes == expected_output[ 'biomass' ][ 'biomass' ].dtypes ) - assert np.all( eval_biomass_age_df.dtypes == expected_output[ 'biomass' ][ 'biomass_age' ].dtypes ) + assert np.all( eval_number_density_df.dtypes == expected_number_density_df.dtypes ) + assert np.all( eval_biomass_density_df.dtypes == expected_biomass_density_df.dtypes ) + assert np.all( eval_abundance_df.dtypes == expected_abundance_df.dtypes ) + assert np.all( eval_biomass_df.dtypes == expected_biomass_df.dtypes ) + assert np.all( eval_biomass_age_df.dtypes == expected_biomass_age_df.dtypes ) ### Check dataframe equality - assert np.all( eval_number_density_df.sex == expected_output[ 'areal_density' ][ 'number_density' ].sex ) + assert np.all( eval_number_density_df.sex == expected_number_density_df.sex ) assert np.allclose( eval_number_density_df[ [ 'rho_a' , 'rho_a_adult' ] ] , - expected_output[ 'areal_density' ][ 'number_density' ][ [ 'rho_a' , 'rho_a_adult' ] ] , + expected_number_density_df[ [ 'rho_a' , 'rho_a_adult' ] ] , rtol = 1e-1 ) - assert np.all( eval_biomass_density_df.sex == expected_output[ 'areal_density' ][ 'biomass_density' ].sex ) + assert np.all( eval_biomass_density_df.sex == expected_biomass_density_df.sex ) assert np.allclose( eval_biomass_density_df[ [ 'B_a' , 'B_a_adult' ] ] , - expected_output[ 'areal_density' ][ 'biomass_density' ][ [ 'B_a' , 'B_a_adult' ] ] , + expected_biomass_density_df[ [ 'B_a' , 'B_a_adult' ] ] , rtol = 1e-1 ) - assert np.all( eval_abundance_df.sex == expected_output[ 'abundance' ][ 'abundance' ].sex ) + assert np.all( eval_abundance_df.sex == expected_abundance_df.sex ) assert np.allclose( eval_abundance_df[ [ 'N' , 'N_adult' ] ] , - expected_output[ 'abundance' ][ 'abundance' ][ [ 'N' , 'N_adult' ] ] , + expected_abundance_df[ [ 'N' , 'N_adult' ] ] , rtol = 1e-1 ) - assert np.all( eval_biomass_df.sex == expected_output[ 'biomass' ][ 'biomass' ].sex ) + assert np.all( eval_biomass_df.sex == expected_biomass_df.sex ) assert np.allclose( eval_biomass_df[ [ 'B' , 'B_adult' ] ] , - expected_output[ 'biomass' ][ 'biomass' ][ [ 'B' , 'B_adult' ] ] , + expected_biomass_df[ [ 'B' , 'B_adult' ] ] , rtol = 1e-1 ) - assert np.all( eval_biomass_age_df.sex == expected_output[ 'biomass' ][ 'biomass_age' ].sex ) + assert np.all( eval_biomass_age_df.sex == expected_biomass_age_df.sex ) assert np.allclose( eval_biomass_age_df[ [ 'B_age' ] ] , - expected_output[ 'biomass' ][ 'biomass_age' ][ [ 'B_age' ] ] , + expected_biomass_age_df[ [ 'B_age' ] ] , rtol = 1e-1 ) \ No newline at end of file From 2be812ef6b39fa45e8647ad7193dda93a3ad90e8 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 09:32:52 -0700 Subject: [PATCH 23/35] Update to `test_index_transect_age_sex_proportions` --- echopop/tests/test_transect_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 9c07162e..15be99ff 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -135,7 +135,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): 'NASC_all_ages': np.repeat( [ 1e1 , 1e2 , 1e2 , 1e3 ] , 2 ) , 'NASC_no_age1': np.repeat( [ 0.0 , 1e1 , 1e2 , 1e3 ] , 2 ) , 'fraction_hake': np.repeat( [ 1.0 , 0.5 ] , 4 ) , - 'species_id': np.repeat( 8675309 , 8 ).astype( np.int32 ) , + 'species_id': np.repeat( 8675309 , 8 ).astype( np.int64 ) , 'sigma_bs_mean': np.repeat( 1.630277e-8 , 8 ) , 'proportion_female': np.repeat( [ 0.592593 , 0.407407 ] , 4 ) , 'proportion_male': np.repeat( [ 0.407407 , 0.592593 ] , 4 ) , From b6ca6cb44b8ed4df8ee43f2f13fd734cfff1cf37 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 09:55:41 -0700 Subject: [PATCH 24/35] Further test fix for `test_group_merge` --- echopop/tests/test_operations.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 34851bd8..6c510ea2 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -442,5 +442,8 @@ def test_group_merge( ): assert np.all( eval_dataframe_monkey_dropna == expected_output_dropna ) assert np.all( eval_dataframe_function_dropna == expected_output_dropna ) # ++++ NaN kept - assert eval_dataframe_function_keepna.equals( expected_output_keepna ) - assert eval_dataframe_function_keepna.equals( expected_output_keepna ) \ No newline at end of file + eval_nan_value_mask = pd.isnull( eval_dataframe_monkey_keepna.animal ) + expected_nan_value_mask = pd.isnull( expected_output_keepna.animal ) + assert len( expected_output_keepna[ expected_nan_value_mask ] ) == 2 + assert eval_dataframe_monkey_keepna[ ~ eval_nan_value_mask ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) + assert eval_dataframe_function_keepna[ ~ eval_nan_value_mask ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) \ No newline at end of file From 5305b11dbb6652fe215508069c873c4db0a8c932 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 10:56:56 -0700 Subject: [PATCH 25/35] Add more complete `diff` to `conftest` --- echopop/tests/conftest.py | 6 ++++++ echopop/tests/test_data_transect_analysis.py | 16 +++++++++++----- echopop/tests/test_transect_functions.py | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index bfe9ed1f..4a411a65 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -1,7 +1,13 @@ import pytest from pathlib import Path from echopop import Survey +from _pytest.assertion.util import assertrepr_compare +def pytest_assertrepr_compare( config , op , left , right ): + # hack the verbosity so we always show full diffs on assertion failures, + # even if we're otherwise not fully verbose + config.option.verbose = 2 + return assertrepr_compare( config , op , left , right) # Set up path to test_data folder HERE = Path(__file__).parent.absolute() diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index 47c5dfe5..e47f1671 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -569,30 +569,36 @@ def test_nasc_to_biomass_conversion( mock_survey ): #---------------------------------- # ---- Set evaluated dataframes from function eval_number_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'number_density_df' ] + eval_number_density_df.reset_index( drop = True , inplace = True ) eval_biomass_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'biomass_density_df' ] + eval_biomass_density_df.reset_index( drop = True , inplace = True ) eval_abundance_df = objS.biology[ 'population' ][ 'abundance' ][ 'abundance_df' ] + eval_abundance_df.reset_index( drop = True , inplace = True ) eval_biomass_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_df' ] + eval_biomass_df.reset_index( drop = True , inplace = True ) eval_biomass_age_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_age_df' ] + eval_biomass_age_df.reset_index( drop = True , inplace = True ) + # ---- Extract expected dataframes to appropriately set indices expected_number_density_df = ( expected_output[ 'areal_density' ][ 'number_density' ] - .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'number_density' ].index ) ) ) + # .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'number_density' ].index ) ) ) ) expected_biomass_density_df = ( expected_output[ 'areal_density' ][ 'biomass_density' ] - .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'biomass_density' ].index ) ) ) + # .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'biomass_density' ].index ) ) ) ) expected_abundance_df = ( expected_output[ 'abundance' ][ 'abundance' ] - .set_index( pd.Index( list( expected_output[ 'abundance' ][ 'abundance' ].index ) ) ) + # .set_index( pd.Index( list( expected_output[ 'abundance' ][ 'abundance' ].index ) ) ) ) expected_biomass_df = ( expected_output[ 'biomass' ][ 'biomass' ] - .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass' ].index ) ) ) + # .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass' ].index ) ) ) ) expected_biomass_age_df = ( expected_output[ 'biomass' ][ 'biomass_age' ] - .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass_age' ].index ) ) ) + # .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass_age' ].index ) ) ) ) ### Check shape assert eval_number_density_df.shape == expected_dimensions[ 'areal_density' ][ 'number_density' ] diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 15be99ff..9c07162e 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -135,7 +135,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): 'NASC_all_ages': np.repeat( [ 1e1 , 1e2 , 1e2 , 1e3 ] , 2 ) , 'NASC_no_age1': np.repeat( [ 0.0 , 1e1 , 1e2 , 1e3 ] , 2 ) , 'fraction_hake': np.repeat( [ 1.0 , 0.5 ] , 4 ) , - 'species_id': np.repeat( 8675309 , 8 ).astype( np.int64 ) , + 'species_id': np.repeat( 8675309 , 8 ).astype( np.int32 ) , 'sigma_bs_mean': np.repeat( 1.630277e-8 , 8 ) , 'proportion_female': np.repeat( [ 0.592593 , 0.407407 ] , 4 ) , 'proportion_male': np.repeat( [ 0.407407 , 0.592593 ] , 4 ) , From c3e8807a65470b551b75cb7b73278e6032991818 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 11:51:28 -0700 Subject: [PATCH 26/35] Updated test functions (checked package versions) --- echopop/tests/test_data_transect_analysis.py | 50 +++++++++----------- echopop/tests/test_operations.py | 31 ++++++------ 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index e47f1671..c822bc64 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -261,19 +261,19 @@ def test_strata_age_binned_weight_proportions( mock_survey ): # ---- Expected output expected_output = { 'age_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int32 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , 'age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int32 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , 'sex_age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int32 ) , 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , @@ -281,7 +281,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) } ) , 'length_sex_age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int32 ) , 'age': np.tile( [ 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'length_bin': pd.cut( np.tile( [ 12.0 , 12.0 , 12.0 , 18.0 , 18.0 , 18.0 ] , 4 ) , @@ -503,13 +503,14 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 16 ) , + np.repeat( 1e3 , 8 ) ] ) , 'NASC_no_age1': np.concatenate( [ np.repeat( 0 , 8 ) , np.repeat( 1e1 , 8 ) , np.repeat( 1e2 , 8 ) , np.repeat( 1e3 , 8 ) ] ) , - 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , - np.repeat( 1e2 , 16 ) , - np.repeat( 1e3 , 8 ) ] ) , + 'N': np.concatenate( [ np.repeat( 0.0 , 8 ) , [ 4.88e8 , 4.88e8 , 1.99e8 , 1.99e8 , 2.90e8 , 2.90e8 , 0.0 , 0.0 , 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 , @@ -544,22 +545,17 @@ def test_nasc_to_biomass_conversion( mock_survey ): 6.29e10 , 0.0 , 9.02e10 , 0.0 , 2.67e10 , 0.0 , 0.0] ] ) , } ) , 'biomass_age': pd.DataFrame( { - 'transect_num': np.tile( [ 1 , 2 ] , 12 ).astype( np.int64 ) , - 'latitude': np.concatenate( [ np.tile( [ 20.0 , 30.0 ] , 6 ) , - np.tile( [ 40.0 , 50.0 ] , 6 ) ] ) , - 'longitude': np.concatenate( [ np.tile( [ -180.0 , -120.0 ] , 6 ) , - np.tile( [ -170.0 , -110.0 ] , 6 ) ] ) , + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 6 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 6 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 6 ) , 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , - 'age': np.tile( [ 1 , 1 , 2 , 2 ] , 6 ).astype( np.int64 ) , - 'sex': np.concatenate( [ np.repeat( [ 'all' , 'male' , 'female' ] , 4 ) , - np.repeat( [ 'all' , 'male' , 'female' ] , 4 ) ] ) , - 'age_proportion': np.tile( [ 0.0 , 0.0 , 1.0 , 1.0 ] , 6 ) , - 'B_age': np.concatenate( [ np.repeat( 0.0 , 3 ) , [ 1.50e9 ] , - np.repeat( 0.0 , 3 ) , [ 1.32e9 ] , - np.repeat( 0.0 , 3 ) , [ 1.37e9 ] , - np.repeat( 0.0 , 2 ) , [ 6.35e9 ] , - [ 6.29e10 , 0.00 , 0.00 , 9.11e9 , 9.02e10 , - 0.00 , 0.00 , 2.69e9 , 2.67e10 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 12 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , 'female' , 'female' ] , 4 ) , + 'age_proportion': np.tile( [ 0.0 , 1.0 ] , 12 ) , + 'B_age': np.concatenate( [ np.repeat( 0.0 , 7 ) , + [ 1.497e9 , 0.000 , 1.321e9 , 0.000 , 1.365e9 , 0.000 , + 6.354e9 , 0.000 , 9.111e9 , 0.000 , 2.693e9 , 0.000 , + 6.291e10 , 0.000 , 9.020e10 , 0.000 , 2.666e10 ] ] ) , } ) , } } @@ -569,15 +565,15 @@ def test_nasc_to_biomass_conversion( mock_survey ): #---------------------------------- # ---- Set evaluated dataframes from function eval_number_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'number_density_df' ] - eval_number_density_df.reset_index( drop = True , inplace = True ) + # eval_number_density_df.reset_index( drop = True , inplace = True ) eval_biomass_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'biomass_density_df' ] - eval_biomass_density_df.reset_index( drop = True , inplace = True ) + # eval_biomass_density_df.reset_index( drop = True , inplace = True ) eval_abundance_df = objS.biology[ 'population' ][ 'abundance' ][ 'abundance_df' ] - eval_abundance_df.reset_index( drop = True , inplace = True ) + # eval_abundance_df.reset_index( drop = True , inplace = True ) eval_biomass_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_df' ] - eval_biomass_df.reset_index( drop = True , inplace = True ) + # eval_biomass_df.reset_index( drop = True , inplace = True ) eval_biomass_age_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_age_df' ] - eval_biomass_age_df.reset_index( drop = True , inplace = True ) + # eval_biomass_age_df.reset_index( drop = True , inplace = True ) # ---- Extract expected dataframes to appropriately set indices expected_number_density_df = ( diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 6c510ea2..35385d12 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -410,20 +410,19 @@ def test_group_merge( ): ) expected_output_keepna = pd.DataFrame( { - 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 6 ) , [ 1 , 2 ] ] ) , - 'animal': np.concatenate( [ np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , - 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , - np.repeat( np.nan , 2 ).astype( 'object' ) ] ) , + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , None ] , 2 ).astype( object ) , 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + np.nan , 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , - np.nan , np.nan ] , - 'group': np.concatenate( [ np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , - [ 'alert' , 'sleepy' ] ] ) , + np.nan ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + np.nan , 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , - np.nan , np.nan ] , - 'categorical_metric': np.concatenate( [ np.repeat( [ 'zippity' , 'doo' ] , 6 ) , - [ 'doo' , 'zippity' ] ] ) , + np.nan ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , } , ) @@ -442,8 +441,10 @@ def test_group_merge( ): assert np.all( eval_dataframe_monkey_dropna == expected_output_dropna ) assert np.all( eval_dataframe_function_dropna == expected_output_dropna ) # ++++ NaN kept - eval_nan_value_mask = pd.isnull( eval_dataframe_monkey_keepna.animal ) - expected_nan_value_mask = pd.isnull( expected_output_keepna.animal ) - assert len( expected_output_keepna[ expected_nan_value_mask ] ) == 2 - assert eval_dataframe_monkey_keepna[ ~ eval_nan_value_mask ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) - assert eval_dataframe_function_keepna[ ~ eval_nan_value_mask ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) \ No newline at end of file + eval_nan_value_mask_monkey = pd.isnull( eval_dataframe_monkey_keepna.insert_metric_here ) + eval_nan_value_mask_function = pd.isnull( eval_dataframe_monkey_keepna.insert_metric_here ) + expected_nan_value_mask = pd.isnull( expected_output_keepna.insert_metric_here ) + assert len( eval_dataframe_monkey_keepna[ eval_nan_value_mask_monkey ] ) == 2 + assert len( eval_dataframe_function_keepna[ eval_nan_value_mask_function ] ) == 2 + assert eval_dataframe_monkey_keepna[ ~ eval_nan_value_mask_monkey ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) + assert eval_dataframe_function_keepna[ ~ eval_nan_value_mask_function ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) \ No newline at end of file From ab96c1b759e63390d787f5424cf3ff6fd1fdf199 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 11:58:18 -0700 Subject: [PATCH 27/35] Adjustments to `int32` vs `int64` --- echopop/tests/test_data_transect_analysis.py | 2 +- echopop/tests/test_transect_functions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index c822bc64..4d73df6f 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -261,7 +261,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): # ---- Expected output expected_output = { 'age_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int32 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 9c07162e..15be99ff 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -135,7 +135,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): 'NASC_all_ages': np.repeat( [ 1e1 , 1e2 , 1e2 , 1e3 ] , 2 ) , 'NASC_no_age1': np.repeat( [ 0.0 , 1e1 , 1e2 , 1e3 ] , 2 ) , 'fraction_hake': np.repeat( [ 1.0 , 0.5 ] , 4 ) , - 'species_id': np.repeat( 8675309 , 8 ).astype( np.int32 ) , + 'species_id': np.repeat( 8675309 , 8 ).astype( np.int64 ) , 'sigma_bs_mean': np.repeat( 1.630277e-8 , 8 ) , 'proportion_female': np.repeat( [ 0.592593 , 0.407407 ] , 4 ) , 'proportion_male': np.repeat( [ 0.407407 , 0.592593 ] , 4 ) , From 3bf4f399be7707b96f0749a312bba3f9d459017f Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Wed, 10 Apr 2024 12:01:25 -0700 Subject: [PATCH 28/35] Additional data type adjustments --- echopop/tests/test_data_transect_analysis.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index 4d73df6f..faf2ccd4 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -267,13 +267,13 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , 'age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int32 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , 'sex_age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int32 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , @@ -281,7 +281,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) } ) , 'length_sex_age_weight_proportions': pd.DataFrame( { - 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int32 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'length_bin': pd.cut( np.tile( [ 12.0 , 12.0 , 12.0 , 18.0 , 18.0 , 18.0 ] , 4 ) , From 35b6746d6ea90c3e0a8fb50f9868963997bf5fa4 Mon Sep 17 00:00:00 2001 From: Wu-Jung Lee Date: Fri, 12 Apr 2024 08:43:58 -0700 Subject: [PATCH 29/35] remove spurious files under docs --- docs/index copy.md | 18 ----------------- docs/theory_implementation.ipynb | 33 -------------------------------- 2 files changed, 51 deletions(-) delete mode 100644 docs/index copy.md delete mode 100644 docs/theory_implementation.ipynb diff --git a/docs/index copy.md b/docs/index copy.md deleted file mode 100644 index b738f562..00000000 --- a/docs/index copy.md +++ /dev/null @@ -1,18 +0,0 @@ -# EchoPro - -This site currently hosts example Jupyter notebooks for the new Python EchoPro package (https://github.com/uw-echospace/EchoPro/). Over time, the documentation for this package will be added here too. - -The Jupyter notebooks are shown in a "rendered", executed form. - -```{admonition} Glitches with some interactive graphical elements -While the notebooks in this site are rendered, there are some glitches in the display we're still working out. In particular, an [ipywidgets](https://ipywidgets.readthedocs.io/en/stable/) interactive graphical element in the semivariogram widget doesn't display correctly. The notebooks do run correctly when executed with Jupyter Notebook ("classic", not JupyterLab). -``` - -Go to the individual example notebooks below or in the table of content on the left. - -```{tableofcontents} -``` - -## Installation - -See the [README.md](https://github.com/uw-echospace/EchoPro/blob/master/README.md) in the EchoPro repository for installation and execution instructions. diff --git a/docs/theory_implementation.ipynb b/docs/theory_implementation.ipynb deleted file mode 100644 index 76375149..00000000 --- a/docs/theory_implementation.ipynb +++ /dev/null @@ -1,33 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "(theory:theory_base)=\n", - "# Theory" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From e45ae29d27c7aaf39f18cacacce12fe1d23f41fd Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 15 Apr 2024 12:42:24 -0700 Subject: [PATCH 30/35] Edits to `conftest` and utility functions location --- echopop/tests/conftest.py | 102 +++++++++++---------- echopop/tests/utility_testing_functions.py | 34 ------- 2 files changed, 55 insertions(+), 81 deletions(-) delete mode 100644 echopop/tests/utility_testing_functions.py diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index 4a411a65..1e95bcc8 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -1,71 +1,79 @@ import pytest +import numpy as np from pathlib import Path from echopop import Survey from _pytest.assertion.util import assertrepr_compare -def pytest_assertrepr_compare( config , op , left , right ): - # hack the verbosity so we always show full diffs on assertion failures, - # even if we're otherwise not fully verbose - config.option.verbose = 2 - return assertrepr_compare( config , op , left , right) - -# Set up path to test_data folder +### Set up path to the `test_data` folder HERE = Path(__file__).parent.absolute() TEST_DATA_ROOT = HERE.parent / "test_data" -@pytest.fixture(scope="session") -def test_path(): +### Fixtures +# ---- Test root/config/input file paths +@pytest.fixture( scope = "session" ) +def test_path( ) : + return { - "ROOT": TEST_DATA_ROOT, - "CONFIG": TEST_DATA_ROOT / "config_files", - "INPUT": TEST_DATA_ROOT / "input_files", # this doesn't exist yet + "ROOT" : TEST_DATA_ROOT , + "CONFIG" : TEST_DATA_ROOT / "config_files" , + "INPUT" : TEST_DATA_ROOT / "input_files" , } +# ---- Mock `Survey` class object +@pytest.fixture( scope = "session") +def mock_survey( test_path ) -> Survey : -@pytest.fixture(scope="session") -def mock_survey(test_path) -> Survey: return Survey( - init_config_path=Path(test_path["CONFIG"] / "config_init.yml"), - survey_year_config_path=Path(test_path["CONFIG"] / "config_survey.yml"), + init_config_path =Path( test_path[ "CONFIG" ] / "config_init.yml" ) , + survey_year_config_path =Path( test_path[ "CONFIG" ] / "config_survey.yml" ) , ) - -# ============ below from previous version, remove after revamping is complete ============ -@pytest.fixture(scope="session") -def config_base_path() -> Path: +# ---- Dictionary shape comparison utility function +@pytest.fixture( scope = "session") +def dictionary_shape( dictionary: dict ) : """ - Defines the base directory path for the - configuration files. - - Returns - ------- - pathlib.Path - The base directory path for the configuration files + A utility test function that extracts the shape of a nested dictionary """ - return HERE / "../config_files" + if isinstance( dictionary , dict ) : + return( { i: dictionary_shape( dictionary[ i ] ) for i in dictionary } ) + else: + return None -@pytest.fixture(scope="session") -def reports_base_path() -> Path: +# ---- Test for comparing Dictionary shapes/dimensions +@pytest.fixture( scope = "session") +def dictionary_shape_equal( dictionary1: dict , + dictionary2: dict ): """ - Defines the base directory path were all reports - generated should be saved. - Returns - ------- - pathlib.Path - The base directory path for the reports + Tests equality between the shapes of two nested dictionaries """ - return HERE / "tests/reports/echopop_python_output" - + result = dictionary_shape( dictionary1 ) == dictionary_shape( dictionary2 ) + + if result : + return result + else: + if set( dictionary_shape( dictionary1 ) ) <= set( dictionary_shape( dictionary2 ) ) : + tracked_true = [ ] + + for j in dictionary2.keys( ) : + test = set( dictionary1[ j ].keys( ) ) <= ( dictionary2[ j ].keys( ) ) + tracked_true.append( test ) + + if np.all( tracked_true ) : + return True + else : + return result + else : + return result -@pytest.fixture(scope="session") -def matlab_output_base_path() -> Path: +### Hook functions +def pytest_assertrepr_compare( config , op , left , right ): """ - Defines the base directory path for the - Matlab output files. - Returns - ------- - pathlib.Path - The base directory path for the Matlab output files + Hook function that always shows the full `diff` on assertion + failures by increasing the verbosity (`config.option.verbose`) """ - return Path("") + + ### Adjust configuration `diff` verbosity + config.option.verbose = 2 + + return assertrepr_compare( config , op , left , right) \ No newline at end of file diff --git a/echopop/tests/utility_testing_functions.py b/echopop/tests/utility_testing_functions.py deleted file mode 100644 index 98a370e7..00000000 --- a/echopop/tests/utility_testing_functions.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np - -def dictionary_shape( dictionary: dict ): - """ - A utility test function that extracts the shape of a nested dictionary - """ - if isinstance( dictionary , dict ): - return( { i: dictionary_shape( dictionary[ i ] ) for i in dictionary } ) - else: - return None - -def dictionary_shape_equal( dictionary1: dict , - dictionary2: dict ): - """ - Tests equality between the shapes of two nested dictionaries - """ - result = dictionary_shape( dictionary1 ) == dictionary_shape( dictionary2 ) - - if result: - return result - else: - if set( dictionary_shape( dictionary1 ) ) <= set( dictionary_shape( dictionary2 ) ): - tracked_true = [ ] - - for j in dictionary2.keys( ): - test = set( dictionary1[ j ].keys( ) ) <= ( dictionary2[ j ].keys( ) ) - tracked_true.append( test ) - - if np.all( tracked_true ): - return True - else: - return result - else: - return result \ No newline at end of file From 604ea7cd257e3022e063c6eeb0aef328ed86137c Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 15 Apr 2024 19:42:45 -0700 Subject: [PATCH 31/35] Updated utility test functions --- echopop/computation/operations.py | 7 +- echopop/survey.py | 45 +- echopop/tests/conftest.py | 126 ++++- echopop/tests/test_data_transect_analysis.py | 563 ++++++++++--------- echopop/tests/test_transect_functions.py | 159 ++++-- 5 files changed, 532 insertions(+), 368 deletions(-) diff --git a/echopop/computation/operations.py b/echopop/computation/operations.py index a8b75284..5e4a9f78 100644 --- a/echopop/computation/operations.py +++ b/echopop/computation/operations.py @@ -120,7 +120,7 @@ def count_variable( dataframe: pd.DataFrame , return ( dataframe # input dataframe .reset_index( drop=True ) - .groupby( contrasts ) + .groupby( contrasts , observed = False ) .agg({variable: [('count' , fun)]}) .replace(np.nan, 0 ) .droplevel( level = 0 , axis = 1 ) @@ -146,8 +146,9 @@ def meld( specimen_dataframe: pd.DataFrame , specimen_stacked = ( specimen_dataframe .copy() - .groupby(['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ]) - .apply(lambda x: len(x['length'])) + .groupby( ['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ] , + observed = False )[ [ 'length' ] ] + .apply(lambda x: len( x ) , include_groups = True ) .reset_index(name='length_count') ) diff --git a/echopop/survey.py b/echopop/survey.py index 5180917b..64155d5a 100644 --- a/echopop/survey.py +++ b/echopop/survey.py @@ -669,7 +669,7 @@ def strata_sex_weight_proportions( self , station_length_aggregate = ( station_sex_length # calculate the within-sample sum and proportions (necessary for the downstream dot product calculation) - .pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( sum ) , + .pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( 'sum' ) , within_station_p = lambda x: x[ 'count' ] / x[ 'within_station_n' ] ) ) .replace( np.nan, 0 ) # remove erroneous NaN (divide by 0 or invalid values) .merge( total_n , on = 'stratum_num' ) # merge station_sex_length with total_n @@ -685,8 +685,9 @@ def strata_sex_weight_proportions( self , .loc[ station_length_aggregate.sex.isin( [ 'male' , 'female' ] ) ] # only parse 'male' and 'female' # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , - columns = [ 'stratum_num' ] , - values = [ 'overall_station_p' ] ) + columns = [ 'stratum_num' ] , + values = [ 'overall_station_p' ] , + observed = False ) .groupby( 'sex' ) .sum( ) ) @@ -698,7 +699,8 @@ def strata_sex_weight_proportions( self , # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , columns = 'stratum_num' , - values = 'overall_station_p' ) + values = 'overall_station_p' , + observed = False ) .groupby( 'station' ) .sum() ) @@ -710,7 +712,8 @@ def strata_sex_weight_proportions( self , # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , columns = 'stratum_num' , - values = 'overall_station_p' ) + values = 'overall_station_p' , + observed = False ) .groupby( [ 'sex' , 'station' ] ) .sum() ) @@ -725,7 +728,8 @@ def strata_sex_weight_proportions( self , .reset_index( name = 'stn_p' ) , on = [ 'stratum_num' , 'station' ] ) .pivot_table( columns = 'stratum_num' , index = [ 'station' , 'sex' ] , - values = [ 'stn_p' , 'sex_stn_p' ] ) + values = [ 'stn_p' , 'sex_stn_p' ] , + observed = False ) ) ### Format the length bin proportions so they resemble a similar table/matrix shape as the above metrics @@ -734,7 +738,8 @@ def strata_sex_weight_proportions( self , station_length_aggregate .pivot_table( columns = [ 'sex' , 'station' , 'stratum_num' ] , index = [ 'length_bin' ] , - values = [ 'within_station_p' ] )[ 'within_station_p' ] + values = [ 'within_station_p' ] , + observed = False )[ 'within_station_p' ] ) ### Calculate combined station fraction means @@ -834,13 +839,13 @@ def strata_age_binned_weight_proportions( self , .count_variable( variable = 'length' , contrasts = [ 'stratum_num' , 'age' ] , fun = 'size' ) - .pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) , - stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) ) ) - .groupby( [ 'stratum_num' , 'age' ] ) + .pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) , + stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) ) ) + .groupby( [ 'stratum_num' , 'age' ] , observed = False )[ [ 'age' , 'count' , 'stratum_count_all' , 'stratum_count_total' ] ] .apply( lambda df: pd.Series( { 'count_age_proportion_all': ( df[ 'count' ] / df.stratum_count_all ).sum() , 'count_age_proportion_adult': ( df.loc[ df.age > 1 ][ 'count' ] / df.stratum_count_total ).sum( ) - } ) ) + } ) , include_groups = True ) .reset_index( ) ) @@ -861,17 +866,17 @@ def strata_age_binned_weight_proportions( self , .dropna( how = 'any' ) .pipe( lambda df: df.assign( weight_stratum_all = df .groupby( [ 'stratum_num' ] )[ 'weight' ] - .transform( sum ) , + .transform( 'sum' ) , weight_stratum_adult = df .loc[ lambda x: x.age > 1 ] .groupby( [ 'stratum_num' ] )[ 'weight' ] - .transform( sum ) ) ) + .transform( 'sum' ) ) ) .groupby( [ 'stratum_num' , 'age' ] ) .apply( lambda df: pd.Series( { 'weight_age_proportion_all': ( df.weight / df.weight_stratum_all ).sum( ) , 'weight_age_proportion_adult': ( df.weight / df.weight_stratum_adult ).sum( ) - } ) ) - .reset_index() + } ) , include_groups = False ) + .reset_index( ) ) # Calculate adult proportions/contributions (in terms of summed weight) for each stratum @@ -885,13 +890,13 @@ def strata_age_binned_weight_proportions( self , .count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] , variable = 'weight' , fun = 'sum' ) - .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) , - weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) + .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) , + weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) ) .groupby( [ 'stratum_num' , 'age' , 'sex' ] ) .apply( lambda x: pd.Series( { 'weight_sex_proportion_all': ( x[ 'count' ] / x.weight_total_all ).sum() , 'weight_sex_proportion_adult': ( x[ 'count' ] / x.weight_total_adult ).sum() - } ) ) + } ) , include_groups = False ) .reset_index( ) .fillna( 0 ) ) @@ -906,8 +911,8 @@ def strata_age_binned_weight_proportions( self , .count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] , variable = 'weight' , fun = 'sum' ) - .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) , - weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) + .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) , + weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) ) .assign( weight_length_sex_proportion_all = lambda x: x[ 'count' ] / x.weight_total_all , weight_length_sex_proportion_adult = lambda x: x[ 'count' ] / x.weight_total_adult ) .replace( np.nan , 0 ) diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index 1e95bcc8..90dacb8d 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -1,5 +1,7 @@ import pytest +from typing import Union import numpy as np +import pandas as pd from pathlib import Path from echopop import Survey from _pytest.assertion.util import assertrepr_compare @@ -28,8 +30,20 @@ def mock_survey( test_path ) -> Survey : survey_year_config_path =Path( test_path[ "CONFIG" ] / "config_survey.yml" ) , ) +### Hook functions +def pytest_assertrepr_compare( config , op , left , right ): + """ + Hook function that always shows the full `diff` on assertion + failures by increasing the verbosity (`config.option.verbose`) + """ + + ### Adjust configuration `diff` verbosity + config.option.verbose = 2 + + return assertrepr_compare( config , op , left , right) + +### Utility functions # ---- Dictionary shape comparison utility function -@pytest.fixture( scope = "session") def dictionary_shape( dictionary: dict ) : """ A utility test function that extracts the shape of a nested dictionary @@ -41,7 +55,6 @@ def dictionary_shape( dictionary: dict ) : return None # ---- Test for comparing Dictionary shapes/dimensions -@pytest.fixture( scope = "session") def dictionary_shape_equal( dictionary1: dict , dictionary2: dict ): """ @@ -66,14 +79,105 @@ def dictionary_shape_equal( dictionary1: dict , else : return result -### Hook functions -def pytest_assertrepr_compare( config , op , left , right ): - """ - Hook function that always shows the full `diff` on assertion - failures by increasing the verbosity (`config.option.verbose`) - """ +# ---- Test for dataframe shape equality +def dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , + reference: Union[ tuple , dict ] ): + + ### DataFrame + if ( isinstance( input , pd.DataFrame ) ) & ( isinstance( reference , tuple ) ) : + assert input.shape == reference + + ### Dictionary + elif ( isinstance( input , dict ) ) & ( isinstance( reference , dict ) ): + assert extract_dataframe_shape( input ) == extract_dataframe_shape( reference ) - ### Adjust configuration `diff` verbosity - config.option.verbose = 2 - return assertrepr_compare( config , op , left , right) \ No newline at end of file +# ---- Test for comparing Dictionary equality (including nested DataFrames) +def dictionary_equality( dictionary1: dict , + dictionary2: dict ): + + ### Iterate through nested DataFrames within each dictionary + for key , expected_df in dictionary2.items( ) : + + if isinstance( dictionary1[ key ] , pd.DataFrame ) : + dataframe_equality( dictionary1[ key ] , expected_df ) + + else : + for sub_key , _ in dictionary2[ key ].items( ): + dataframe_equality( dictionary1[ key ][ sub_key ] , + expected_df[ sub_key ] ) + +# ---- Extract dataframe shape +def extract_dataframe_shape( input: Union[ pd.DataFrame , dict ] ): + + ### DataFrame + if isinstance( input , pd.DataFrame ) : + + return input.shape + + ### Dictionary + elif isinstance( input , dict ) : + dataframe_shapes = { } + + for key , value in input.items( ): + if isinstance( value , pd.DataFrame ) : + dataframe_shapes[ key ] = value.shape + elif isinstance( value , dict ) : + dataframe_shapes[ key ] = extract_dataframe_shape( value ) + + return dataframe_shapes + +# ---- Extract dataframe dtypes +def dataframe_dtypes_equal( dataframe: pd.DataFrame , + reference_dictionary: dict ): + + ### Separate evaluation for categorical-type + # ---- Parse expected categorical variables + categorical_columns = [ k for k , v in reference_dictionary.items( ) if isinstance( v , pd.CategoricalDtype ) ] + + # ---- Assert that all categorical columns in the reference dictionary match the categorical + # ----- columns in the tested dataframe + assert np.all( dataframe.select_dtypes( include = [ 'category' ] ).columns.isin( categorical_columns ) ) + + # ---- Remove categorical columns from the dataframe + dataframe = dataframe.copy( ).drop( categorical_columns , axis = 1 ) + + ### Loop through columns to assert that dtypes from the tested dataframe + ### match those expected in a reference dictionary + for column , dtype in dataframe.dtypes.items( ): + assert np.issubdtype( dtype , reference_dictionary.get( column , object ) ) , \ + f"Data type mismatch for column '{ column }'" + + +# ---- Test for evaluating differences in DataFrame `dtypes` +def dataframe_dtypes_equality( input: Union[ pd.DataFrame , dict ] , + reference: dict ): + + ### DataFrame + if isinstance( input , pd.DataFrame ) : + dataframe_dtypes_equal( input , reference ) + + ### Dictionary + elif isinstance( input , dict ) : + for category , data in reference.items( ) : + + # ---- Single Dictionary layer + if isinstance( input[ category ] , pd.DataFrame ): + dataframe_dtypes_equal( input[ category ] , + reference[ category ] ) + + # ---- Nested Dictionary layers + else: + for df_name , _ in data.items( ): + dataframe_dtypes_equal( input[ category ][ df_name ] , reference[ category ][ df_name ] ) + +# ---- Test for evaluating equality between two dataframes +def dataframe_equality( dataframe1: pd.DataFrame , + dataframe2: pd.DataFrame ): + + ### Evaluate equality between numerical values + assert np.allclose( dataframe1.select_dtypes( include = [ 'number' ] ) , + dataframe2.select_dtypes( include = [ 'number' ] ) ) + + ### Evaluate equality between non-numerical values + assert np.all( dataframe1.select_dtypes( exclude = [ 'number' ] ) == dataframe2.select_dtypes( exclude = [ 'number' ] ) ) diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index faf2ccd4..93d0935d 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -1,17 +1,15 @@ import pandas as pd import numpy as np -from echopop.survey import Survey +from echopop.tests.conftest import dictionary_equality ,dataframe_equality +from echopop.tests.conftest import dataframe_shape_equal , dataframe_dtypes_equality def test_fit_binned_length_weight_relationship( mock_survey ): - #### Pull in mock Survey object - objS = mock_survey - - ### Initialize objS for `length_weight` - objS.statistics[ 'length_weight' ] = { } + ### Initialize mock_survey for `length_weight` + mock_survey.statistics[ 'length_weight' ] = { } ### Re-parameterize `specimen_df` with dummy data - objS.biology[ 'specimen_df' ] = pd.DataFrame( + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( { 'stratum_num': [ 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 ] , 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , @@ -24,98 +22,93 @@ def test_fit_binned_length_weight_relationship( mock_survey ): ) ### Re-parameterize `length_bins` with dummy data - objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] = ( + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] = ( [ 2.0 , 5.0 , 8.0 , 11.0 ] ) ### Re-parameterize `length_interval` with dummy data - objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = ( + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = ( [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] ) ### Evaluate object for later comparison - objS.fit_binned_length_weight_relationship( species_id = 8675309 ) + mock_survey.fit_binned_length_weight_relationship( species_id = 8675309 ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- `objS.statistics[ 'length_weight' ][ 'regression_parameters' ]` - # ---- Expected dimensions - expected_dimensions_regression_parameters = tuple( [ 3 , 3 ] ) - # ---- Expected output - expected_output_regression_parameters = pd.DataFrame( - { - 'sex': [ 'all' , 'female' , 'male' ] , - 'rate': [ 2.0 , 2.0 , 2.0 ] , - 'initial': [ 4.7e-16 , -2.2e-16 , 1.1e-15 ] - } - ) - # ---- `objS.statistics[ 'length_weight' ][ 'length_weight_df' ]` - # ---- Expected dimensions - expected_dimensions_length_weight_df = tuple( [ 12 , 10 ] ) + # ---- Expected data types + expected_dtypes = { + 'regression_parameters': { + 'sex': object , + 'rate': np.floating , + 'initial': np.floating , + } , + 'length_weight_df': { + 'length_bin': pd.CategoricalDtype( ) , + 'sex': object , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , + 'rate': np.floating , + 'initial': np.floating , + 'weight_fitted': np.floating , + 'weight_modeled': np.floating , + } , + } # ---- Expected output - expected_output_length_weight_df = pd.DataFrame( - { - 'length_bin': pd.cut( np.repeat( [ 1 , 4 , 7 , 10 ] , 3 ) , - np.array( [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] ) ) , - 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , - 'mean_length': [ 2.5 , 3.0 , 2.0 , 5.0 , 5.0 , 5.0 , - 8.0 , 8.0 , 8.0 , 0.0 , 0.0 , 0.0 ] , - 'n_length': [ 2 , 1 , 1 , 3 , 1 , 2 , - 3 , 2 , 1 , 0 , 0 , 0 ] , - 'mean_weight': [ 6.50 , 9.00 , 4.00 , 25.67 , 25.00 , 26.00 , - 64.67 , 65.00 , 64.00 , 0.00 , 0.00 , 0.00 ] , - 'n_weight': [ 2 , 1 , 1 , 3 , 1 , 2 , - 3 , 2 , 1 , 0 , 0 , 0 ] , - 'rate': np.repeat( 2.0 , 12 ) , - 'initial': np.tile( [ 4.7e-16 , -2.2e-16 , 1.1e-15 ] , 4 ) , - 'weight_fitted': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , - 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , - 'weight_modeled': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , - 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] - } - ) - expected_output_length_weight_df[ 'length_bin' ] = pd.IntervalIndex( expected_output_length_weight_df[ 'length_bin' ] ) - expected_output_length_weight_df[ 'length_bin' ] = pd.Categorical( expected_output_length_weight_df[ 'length_bin' ] , - categories = expected_output_length_weight_df[ 'length_bin' ].unique( ) , - ordered = True ) + expected_output = { + 'regression_parameters': pd.DataFrame( + { + 'sex': [ 'all' , 'female' , 'male' ] , + 'rate': [ 2.0 , 2.0 , 2.0 ] , + 'initial': [ 4.710277e-16 , -2.220446e-16 , 1.110223e-15 ] , + } , + ) , + 'length_weight_df': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 1 , 4 , 7 , 10 ] , 3 ) , + np.array( [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] ) ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'mean_length': [ 2.5 , 3.0 , 2.0 , 5.0 , 5.0 , 5.0 , + 8.0 , 8.0 , 8.0 , 0.0 , 0.0 , 0.0 ] , + 'n_length': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'mean_weight': [ 6.50 , 9.00 , 4.00 , 25.6666667 , 25.00 , 26.00 , + 64.6666667 , 65.00 , 64.00 , 0.00 , 0.00 , 0.00 ] , + 'n_weight': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'rate': np.repeat( 2.0 , 12 ) , + 'initial': np.tile( [ 4.710277e-16 , -2.220446e-16 , 1.110223e-15 ] , 4 ) , + 'weight_fitted': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , + 'weight_modeled': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , + } , + ) , + } #---------------------------------- ### Run tests: `fit_binned_length_weight_relationship` #---------------------------------- - eval_regression_parameters = objS.statistics[ 'length_weight' ][ 'regression_parameters' ] - eval_length_weight_df = objS.statistics[ 'length_weight' ][ 'length_weight_df' ] + eval_dictionary = mock_survey.statistics[ 'length_weight' ] ### Check shape - assert eval_regression_parameters.shape == expected_dimensions_regression_parameters - assert eval_length_weight_df.shape == expected_dimensions_length_weight_df + dataframe_shape_equal( eval_dictionary , expected_output ) ### Check datatypes - assert np.all( eval_regression_parameters.dtypes == expected_output_regression_parameters.dtypes ) - assert np.all( eval_length_weight_df.dtypes == expected_output_length_weight_df.dtypes ) + dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) ### Dataframe equality - assert np.allclose( eval_regression_parameters[ [ 'rate' , 'initial' ] ] , - expected_output_regression_parameters[ [ 'rate' , 'initial' ] ] , - rtol = 1e-1 ) - # ---- Non-float/high-precision - assert eval_length_weight_df[ [ 'length_bin' , 'sex' , 'mean_length' , 'n_length' , 'n_weight' ] ].equals( - expected_output_length_weight_df[ [ 'length_bin' , 'sex' , 'mean_length' , 'n_length' , 'n_weight' ] ] - ) - # ---- Float/high-precision - assert np.allclose( eval_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , - expected_output_length_weight_df[ [ 'mean_weight' , 'rate' , 'initial' ,'weight_fitted' , 'weight_modeled' ] ] , - rtol = 1e-1 ) + dictionary_equality( eval_dictionary , expected_output ) def test_strata_sex_weight_proportions( mock_survey ): - #### Pull in mock Survey object - objS = mock_survey - - ### Initialize objS for `weight` - objS.biology[ 'weight' ] = { } + ### Initialize mock_survey for `weight` + mock_survey.biology[ 'weight' ] = { } - ### Initialize objS for `length_weight` - objS.statistics[ 'length_weight' ] = { } + ### Initialize mock_survey for `length_weight` + mock_survey.statistics[ 'length_weight' ] = { } ### Re-parameterize `specimen_df` with dummy data - objS.biology[ 'specimen_df' ] = pd.DataFrame( + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , @@ -129,7 +122,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ) ### Re-parameterize `length_df` with dummy data - objS.biology[ 'length_df' ] = pd.DataFrame( + mock_survey.biology[ 'length_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , @@ -142,7 +135,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ) ### Re-parameterize `fitted_weight` with dummy data - objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( { 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , np.linspace( 9 , 21 , 3 ) ) , @@ -159,7 +152,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Re-parameterize `length_df` with dummy data - objS.biology[ 'length_df' ] = pd.DataFrame( + mock_survey.biology[ 'length_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , @@ -171,7 +164,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ) ### Re-parameterize `fitted_weight` with dummy data - objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( { 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , np.linspace( 9 , 21 , 3 ) ) , @@ -187,50 +180,57 @@ def test_strata_sex_weight_proportions( mock_survey ): ) ### Re-parameterize `length_bins` with dummy data - objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) ### Evaluate object for later comparison - objS.strata_sex_weight_proportions( species_id = 8675309 ) + mock_survey.strata_sex_weight_proportions( species_id = 8675309 ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 2 , 8 ] ) + # ---- Expected data types + expected_dtypes = { + 'stratum_num': np.integer , + 'proportion_female': np.floating , + 'proportion_male': np.floating , + 'proportion_station_1': np.floating , + 'proportion_station_2': np.floating , + 'average_weight_female': np.floating , + 'average_weight_male': np.floating , + 'average_weight_total': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { 'stratum_num': np.array( [ 0 , 1 ] ).astype( int ) , - 'proportion_female': [ 0.59 , 0.41 ] , - 'proportion_male': [ 0.41 , 0.59 ] , - 'proportion_station_1': [ 0.93 , 0.93 ] , - 'proportion_station_2': [ 0.07 , 0.07 ] , - 'average_weight_female': [ 4.72 , 2.71 ] , - 'average_weight_male': [ 6.64 , 6.30 ] , - 'average_weight_total': [ 3.07 , 2.60 ] - } + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } , ) + #---------------------------------- ### Run tests: `strata_sex_weight_proportions` #---------------------------------- - eval_weight_strata_df = objS.biology[ 'weight' ][ 'weight_strata_df' ] + eval_dataframe = mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] ### Check shape - assert eval_weight_strata_df.shape == expected_dimensions + dataframe_shape_equal( eval_dataframe , expected_dtypes ) ### Check datatypes - assert np.all( eval_weight_strata_df.dtypes == expected_output.dtypes ) + dataframe_dtypes_equality( eval_dataframe , expected_dtypes ) ### Dataframe equality - assert np.allclose( eval_weight_strata_df , expected_output , rtol = 1e-1 ) + dataframe_equality( eval_dataframe , expected_output ) def test_strata_age_binned_weight_proportions( mock_survey ): - #### Pull in mock Survey object - objS = mock_survey - - ### Initialize objS for `weight` - objS.biology[ 'weight' ] = { } + ### Initialize mock_survey for `weight` + mock_survey.biology[ 'weight' ] = { } ### Re-parameterize `specimen_df` with dummy data - objS.biology[ 'specimen_df' ] = pd.DataFrame( + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , @@ -239,40 +239,67 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 'species_id': np.repeat( [ 8675309 ] , 8 ) , 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , - 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] - } + 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] , + } , ) ### Re-parameterize `length_bins` with dummy data - objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) ### Evaluate object for later comparison - objS.strata_age_binned_weight_proportions( species_id = 8675309 ) + mock_survey.strata_age_binned_weight_proportions( species_id = 8675309 ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = { 'age_proportions': tuple( [ 4 , 4 ] ) , - 'age_weight_proportions': tuple( [ 4 , 4 ] ) , - 'sex_age_weight_proportions': tuple( [ 12 , 5 ] ) , - 'length_sex_age_weight_proportions': tuple( [ 24 , 9 ] ) } - + # ---- Expected dtypes + expected_dtypes = { + 'age_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + } , + 'age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'weight_age_proportion_all': np.floating , + 'weight_age_proportion_adult': np.floating , + } , + 'sex_age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'sex': object , + 'weight_sex_proportion_all': np.floating , + 'weight_sex_proportion_adult': np.floating , + } , + 'length_sex_age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'length_bin': pd.CategoricalDtype( ) , + 'sex': object , + 'count': np.floating , + 'weight_total_all': np.floating , + 'weight_total_adult': np.floating , + 'weight_length_sex_proportion_all': np.floating , + 'weight_length_sex_proportion_adult': np.floating , + } , + } # ---- Expected output expected_output = { - 'age_proportions': pd.DataFrame( { + 'age_proportions_df': pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , - 'age_weight_proportions': pd.DataFrame( { + 'age_weight_proportions_df': pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] } ) , - 'sex_age_weight_proportions': pd.DataFrame( { + 'sex_age_weight_proportions_df': pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , @@ -280,7 +307,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) } ) , - 'length_sex_age_weight_proportions': pd.DataFrame( { + 'length_sex_age_weight_proportions_df': pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , @@ -304,73 +331,58 @@ def test_strata_age_binned_weight_proportions( mock_survey ): 0.5 , 0.5 , 0.5 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.5 , 0.5 , 0.5 ] , 'weight_length_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , - 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) - } ) + 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) , + } , ) , } #---------------------------------- ### Run tests: `strata_age_binned_weight_proportions` #---------------------------------- - eval_age_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] - eval_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] - eval_sex_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] - eval_length_sex_age_weight_proportions_df = objS.biology[ 'weight' ][ 'proportions' ][ 'length_sex_age_weight_proportions_df' ] + eval_dictionary = mock_survey.biology[ 'weight' ][ 'proportions' ] ### Check shape - assert eval_age_proportions_df.shape == expected_dimensions[ 'age_proportions' ] - assert eval_age_weight_proportions_df.shape == expected_dimensions[ 'age_weight_proportions' ] - assert eval_sex_age_weight_proportions_df.shape == expected_dimensions[ 'sex_age_weight_proportions' ] - assert eval_length_sex_age_weight_proportions_df.shape == expected_dimensions[ 'length_sex_age_weight_proportions' ] + dataframe_shape_equal( eval_dictionary , expected_output ) ### Check datatypes - assert np.all( eval_age_proportions_df.dtypes == expected_output[ 'age_proportions' ].dtypes ) - assert np.all( eval_age_weight_proportions_df.dtypes == expected_output[ 'age_weight_proportions' ].dtypes ) - assert np.all( eval_sex_age_weight_proportions_df.dtypes == expected_output[ 'sex_age_weight_proportions' ].dtypes ) - assert np.all( eval_length_sex_age_weight_proportions_df.dtypes == expected_output[ 'length_sex_age_weight_proportions' ].dtypes ) + dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) ### Dataframe equality - assert eval_age_proportions_df.equals( expected_output[ 'age_proportions' ] ) - assert eval_age_weight_proportions_df.equals( expected_output[ 'age_weight_proportions' ] ) - assert eval_sex_age_weight_proportions_df.equals( expected_output[ 'sex_age_weight_proportions' ] ) - assert eval_length_sex_age_weight_proportions_df.equals( expected_output[ 'length_sex_age_weight_proportions' ] ) + dictionary_equality( eval_dictionary , expected_output ) def test_nasc_to_biomass_conversion( mock_survey ): - #### Pull in mock Survey object - objS = mock_survey - ### Initialize various attributes - objS.acoustics[ 'sigma_bs' ] = { } - objS.statistics[ 'length_weight' ] = { } - objS.biology[ 'weight' ] = { } - objS.biology[ 'population' ] = { } + mock_survey.acoustics[ 'sigma_bs' ] = { } + mock_survey.statistics[ 'length_weight' ] = { } + mock_survey.biology[ 'weight' ] = { } + mock_survey.biology[ 'population' ] = { } ### Create mock data for `age_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ] = { } - objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ] = { } + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] - } ) + } , ) ### Create mock data for `age_weight_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , - 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] - } ) + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] , + } , ) ### Create mock data for `sex_age_weight_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , - 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) - } ) + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) , + } , ) ### Create mock data for 'length_weight_df' - objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( { 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , np.linspace( 9 , 21 , 3 ) ) , @@ -381,12 +393,12 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , - 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] - } + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + } , ) ### Create mock data for `weight_strata_df` - objS.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'proportion_female': [ 0.592593 , 0.407407 ] , @@ -396,20 +408,20 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'average_weight_female': [ 4.719110 , 2.707892 ] , 'average_weight_male': [ 6.640487 , 6.299942 ] , 'average_weight_total': [ 3.066481 , 2.603519 ] , - } + } , ) ### Create mock data for `strata_mean` (sigma_bs) - objS.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + mock_survey.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'species_id': np.repeat( 8675309 , 2 ) , - 'sigma_bs_mean': 1.630277e-8 - } + 'sigma_bs_mean': 1.630277e-8 , + } , ) ### Create mock data for `nasc_df` - objS.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( { 'transect_num': [ 1 , 2 , 3 , 4] , 'stratum_num': [ 0 , 0 , 1 , 1 ] , @@ -420,131 +432,187 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'transect_spacing': np.repeat( 1.0 , 4 ) , 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , 'haul_num': [ 1 , 1 , 2 , 2 ] , - 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] - } + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , ) ### Create mock data for `strata_df` - objS.spatial[ 'strata_df' ] = pd.DataFrame( + mock_survey.spatial[ 'strata_df' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'haul_num': [ 1 , 2 ] , - 'fraction_hake': [ 1.000 , 0.500 ] - } + 'fraction_hake': [ 1.000 , 0.500 ] , + } , ) ### Evaluate object for later comparison - objS.nasc_to_biomass_conversion( species_id = 8675309 ) + mock_survey.nasc_to_biomass_conversion( species_id = 8675309 ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = { + # ---- Expected dtypes + expected_dtypes = { 'areal_density': { - 'number_density': tuple( [ 32 , 10 ] ) , - 'biomass_density': tuple( [ 32 , 10 ] ) + 'number_density_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'rho_a': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'rho_a_adult': np.floating , + } , + 'biomass_density_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'B_a': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'B_a_adult': np.floating , + } , } , 'abundance': { - 'abundance': tuple( [ 32 , 12 ] ) + 'abundance_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + + 'N': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'N_adult': np.floating , + } , } , 'biomass': { - 'biomass': tuple( [ 32 , 10 ] ) , - 'biomass_age': tuple( [ 24 , 8 ] ) - } + 'biomass_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'B': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'B_adult': np.floating , + } , + 'biomass_age_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'age': np.integer , + 'sex': object , + 'age_proportion': np.floating , + 'B_age': np.floating , + } , + } , } # ----- Expected output expected_output = { 'areal_density': { - 'number_density': pd.DataFrame( { + 'number_density_df': pd.DataFrame( { 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , - 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , 'rho_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , - [ 4.88e7 , 4.88e7 , 1.99e7 , 1.99e7 , 2.90e7 , 2.90e7 , 0.0 , 0.0 , - 2.44e8 , 2.44e8 , 1.45e8 , 1.45e8 , 9.94e7 , 9.94e7 , 0.0 , 0.0 , - 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 ] ] ) , + [ 4.881224e7 , 4.881224e7 , 1.988645e7 , 1.988645e7 , 2.892579e7 , 2.892579e7 , 0.0 , 0.0 , + 2.440612e8 , 2.440612e8 , 1.446290e8 , 1.446290e8 , 9.943224e7 , 9.943224e7 , 0.0 , 0.0 , + 2.440612e9 , 2.440612e9 , 1.446290e9 , 1.446290e9 , 9.943224e8 , 9.943224e8 , 0.0 , 0.0 ] ] ) , 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , 'rho_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , - [ 4.88e7 , 0.0 , 1.99e7 , 0.0 , 2.89e7 , 0.0 , 0.0 , 0.0 , - 2.44e8 , 0.0 , 1.45e8 , 0.0 , 9.94e7 , 0.0 , 0.0 , 0.0 , - 2.44e9 , 0.0 , 1.45e9 , 0.0 , 9.94e8 , 0.0 , 0.0] ] ) , + [ 4.881224e7 , 0.0 , 1.988645e7 , 0.0 , 2.892579e7 , 0.0 , 0.0 , 0.0 , + 2.440612e8 , 0.0 , 1.446290e8 , 0.0 , 9.943224e7 , 0.0 , 0.0 , 0.0 , + 2.440612e9 , 0.0 , 1.446290e9 , 0.0 , 9.943224e8 , 0.0 , 0.0] ] ) , } ) , - 'biomass_density': pd.DataFrame( { + 'biomass_density_df': pd.DataFrame( { 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , - 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , 'B_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , - [ 1.50e8 , 1.50e8 , 1.32e8 , 1.32e8 , 1.37e8 , 1.37e8 , 0.0 , 0.0 , - 6.35e8 , 6.35e8 , 9.11e8 , 9.11e8 , 2.69e8 , 2.69e8 , 0.0 , 0.0 , - 6.35e9 , 6.35e9 , 9.11e9 , 9.11e9 , 2.69e9 , 2.69e9 , 0.0 , 0.0 ] ] ) , + [ 1.496818e8 , 1.496818e8 , 1.320557e8 , 1.320557e8 , 1.365040e8 , 1.365040e8 , 0.0 , 0.0 , + 6.354180e8 , 6.354180e8 , 9.111540e8 , 9.111540e8 , 2.692518e8 , 2.692518e8 , 0.0 , 0.0 , + 6.354180e9 , 6.354180e9 , 9.111540e9 , 9.111540e9 , 2.692518e9 , 2.692518e9 , 0.0 , 0.0 ] ] ) , 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , 'B_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , - [ 1.5e8 , 0.0 , 1.32e8 , 0.0 , 1.36e8 , 0.0 , 0.0 , 0.0 , - 6.35e8 , 0.0 , 9.11e8 , 0.0 , 2.69e8 , 0.0 , 0.0 , 0.0 , - 6.35e9 , 0.0 , 9.11e9 , 0.0 , 2.69e9 , 0.0 , 0.0] ] ) , + [ 1.496818e8 , 0.0 , 1.320557e8 , 0.0 , 1.365040e8 , 0.0 , 0.0 , 0.0 , + 6.354180e8 , 0.0 , 9.111540e8 , 0.0 , 2.692518e8 , 0.0 , 0.0 , 0.0 , + 6.354180e9 , 0.0 , 9.111540e9 , 0.0 , 2.692518e9 , 0.0 , 0.0] ] ) , } ) , } , 'abundance': { - 'abundance': pd.DataFrame( { + 'abundance_df': pd.DataFrame( { 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , - 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , - np.repeat( 1e2 , 16 ) , - np.repeat( 1e3 , 8 ) ] ) , + np.repeat( 1e2 , 16 ) , + np.repeat( 1e3 , 8 ) ] ) , 'NASC_no_age1': np.concatenate( [ np.repeat( 0 , 8 ) , - np.repeat( 1e1 , 8 ) , - np.repeat( 1e2 , 8 ) , - np.repeat( 1e3 , 8 ) ] ) , + np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 8 ) , + np.repeat( 1e3 , 8 ) ] ) , 'N': np.concatenate( [ np.repeat( 0.0 , 8 ) , - [ 4.88e8 , 4.88e8 , 1.99e8 , 1.99e8 , 2.90e8 , 2.90e8 , 0.0 , 0.0 , - 2.44e9 , 2.44e9 , 1.45e9 , 1.45e9 , 9.94e8 , 9.94e8 , 0.0 , 0.0 , - 2.42e10 , 2.42e10 , 1.43e10 , 1.43e10 , 9.84e9 , 9.84e9 , 0.0 , 0.0 ] ] ) , + [ 4.881224e8 , 4.881224e8 , 1.988645e8 , 1.988645e8 , 2.892579e8 , 2.892579e8 , 0.0 , 0.0 , + 2.440612e9 , 2.440612e9 , 1.44629e9 , 1.44629e9 , 9.943224e8 , 9.943224e8 , 0.0 , 0.0 , + 2.416206e10 , 2.416206e10 , 1.431827e10 , 1.431827e10 , 9.843792e9 , 9.843792e9 , 0.0 , 0.0 ] ] ) , 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , 'N_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , - [ 4.88e8 , 0.0 , 1.99e8 , 0.0 , 2.90e8, 0.0 , 0.0 , 0.0 , - 2.44e9 , 0.0 , 1.45e9 , 0.0 , 9.94e8 , 0.0 , 0.0 , 0.0 , - 2.42e10 , 0.0 , 1.43e10 , 0.0 , 9.84e9 , 0.0 , 0.0] ] ) , + [ 4.881224e8 , 0.0 , 1.988645e8 , 0.0 , 2.892579e8, 0.0 , 0.0 , 0.0 , + 2.440612e9 , 0.0 , 1.44629e9 , 0.0 , 9.943224e8 , 0.0 , 0.0 , 0.0 , + 2.416206e10 , 0.0 , 1.431827e10 , 0.0 , 9.843792e9 , 0.0 , 0.0] ] ) , } ) , } , 'biomass': { - 'biomass': pd.DataFrame( { + 'biomass_df': pd.DataFrame( { 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , - 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , 'B': np.concatenate( [ np.repeat( 0.0 , 8 ) , - [ 1.50e9 , 1.50e9 , 1.32e9 , 1.32e9 , 1.37e9 , 1.37e9 , 0.0 , 0.0 , - 6.35e9 , 6.35e9 , 9.11e9 , 9.11e9 , 2.69e9 , 2.69e9 , 0.0 , 0.0 , - 6.29e10 , 6.29e10 , 9.02e10 , 9.02e10 , 2.67e10 , 2.67e10 , 0.0 , 0.0 ] ] ) , + [ 1.496818e9 , 1.496818e9 , 1.320557e9 , 1.320557e9 , 1.365040e9 , 1.365040e9 , 0.0 , 0.0 , + 6.354180e9 , 6.354180e9 , 9.111540e9 , 9.111540e9 , 2.692518e9 , 2.692518e9 , 0.0 , 0.0 , + 6.290638e10 , 6.290638e10 , 9.020425e10 , 9.020425e10 , 2.665593e10 , 2.665593e10 , 0.0 , 0.0 ] ] ) , 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , 'B_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , - [ 1.50e9 , 0.0 , 1.32e9 , 0.0 , 1.37e9 , 0.0 , 0.0 , 0.0 , - 6.35e9 , 0.0 , 9.11e9 , 0.0 , 2.69e9 , 0.0 , 0.0 , 0.0 , - 6.29e10 , 0.0 , 9.02e10 , 0.0 , 2.67e10 , 0.0 , 0.0] ] ) , + [ 1.496818e9 , 0.0 , 1.320557e9 , 0.0 , 1.365040e9 , 0.0 , 0.0 , 0.0 , + 6.354180e9 , 0.0 , 9.111540e9 , 0.0 , 2.692518e9 , 0.0 , 0.0 , 0.0 , + 6.290638e10 , 0.0 , 9.020425e10 , 0.0 , 2.665593e10 , 0.0 , 0.0] ] ) , } ) , - 'biomass_age': pd.DataFrame( { + 'biomass_age_df': pd.DataFrame( { 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 6 ).astype( np.int64 ) , 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 6 ) , 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 6 ) , @@ -553,81 +621,22 @@ def test_nasc_to_biomass_conversion( mock_survey ): 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , 'female' , 'female' ] , 4 ) , 'age_proportion': np.tile( [ 0.0 , 1.0 ] , 12 ) , 'B_age': np.concatenate( [ np.repeat( 0.0 , 7 ) , - [ 1.497e9 , 0.000 , 1.321e9 , 0.000 , 1.365e9 , 0.000 , - 6.354e9 , 0.000 , 9.111e9 , 0.000 , 2.693e9 , 0.000 , - 6.291e10 , 0.000 , 9.020e10 , 0.000 , 2.666e10 ] ] ) , + [ 1.496818e9 , 0.000 , 1.320557e9 , 0.000 , 1.365040e9 , 0.000 , + 6.354180e9 , 0.000 , 9.111540e9 , 0.000 , 2.692518e9 , 0.000 , + 6.290638e10 , 0.000 , 9.020425e10 , 0.000 , 2.665593e10 ] ] ) , } ) , - } + } , } #---------------------------------- ### Run tests: `test_nasc_to_biomass_conversion` #---------------------------------- - # ---- Set evaluated dataframes from function - eval_number_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'number_density_df' ] - # eval_number_density_df.reset_index( drop = True , inplace = True ) - eval_biomass_density_df = objS.biology[ 'population' ][ 'areal_density' ][ 'biomass_density_df' ] - # eval_biomass_density_df.reset_index( drop = True , inplace = True ) - eval_abundance_df = objS.biology[ 'population' ][ 'abundance' ][ 'abundance_df' ] - # eval_abundance_df.reset_index( drop = True , inplace = True ) - eval_biomass_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_df' ] - # eval_biomass_df.reset_index( drop = True , inplace = True ) - eval_biomass_age_df = objS.biology[ 'population' ][ 'biomass' ][ 'biomass_age_df' ] - # eval_biomass_age_df.reset_index( drop = True , inplace = True ) - - # ---- Extract expected dataframes to appropriately set indices - expected_number_density_df = ( - expected_output[ 'areal_density' ][ 'number_density' ] - # .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'number_density' ].index ) ) ) - ) - expected_biomass_density_df = ( - expected_output[ 'areal_density' ][ 'biomass_density' ] - # .set_index( pd.Index( list( expected_output[ 'areal_density' ][ 'biomass_density' ].index ) ) ) - ) - expected_abundance_df = ( - expected_output[ 'abundance' ][ 'abundance' ] - # .set_index( pd.Index( list( expected_output[ 'abundance' ][ 'abundance' ].index ) ) ) - ) - expected_biomass_df = ( - expected_output[ 'biomass' ][ 'biomass' ] - # .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass' ].index ) ) ) - ) - expected_biomass_age_df = ( - expected_output[ 'biomass' ][ 'biomass_age' ] - # .set_index( pd.Index( list( expected_output[ 'biomass' ][ 'biomass_age' ].index ) ) ) - ) + eval_dictionary = mock_survey.biology[ 'population' ] ### Check shape - assert eval_number_density_df.shape == expected_dimensions[ 'areal_density' ][ 'number_density' ] - assert eval_biomass_density_df.shape == expected_dimensions[ 'areal_density' ][ 'biomass_density' ] - assert eval_abundance_df.shape == expected_dimensions[ 'abundance' ][ 'abundance' ] - assert eval_biomass_df.shape == expected_dimensions[ 'biomass' ][ 'biomass' ] - assert eval_biomass_age_df.shape == expected_dimensions[ 'biomass' ][ 'biomass_age' ] + dataframe_shape_equal( eval_dictionary , expected_output ) ### Check datatypes - assert np.all( eval_number_density_df.dtypes == expected_number_density_df.dtypes ) - assert np.all( eval_biomass_density_df.dtypes == expected_biomass_density_df.dtypes ) - assert np.all( eval_abundance_df.dtypes == expected_abundance_df.dtypes ) - assert np.all( eval_biomass_df.dtypes == expected_biomass_df.dtypes ) - assert np.all( eval_biomass_age_df.dtypes == expected_biomass_age_df.dtypes ) - ### Check dataframe equality - assert np.all( eval_number_density_df.sex == expected_number_density_df.sex ) - assert np.allclose( eval_number_density_df[ [ 'rho_a' , 'rho_a_adult' ] ] , - expected_number_density_df[ [ 'rho_a' , 'rho_a_adult' ] ] , - rtol = 1e-1 ) - assert np.all( eval_biomass_density_df.sex == expected_biomass_density_df.sex ) - assert np.allclose( eval_biomass_density_df[ [ 'B_a' , 'B_a_adult' ] ] , - expected_biomass_density_df[ [ 'B_a' , 'B_a_adult' ] ] , - rtol = 1e-1 ) - assert np.all( eval_abundance_df.sex == expected_abundance_df.sex ) - assert np.allclose( eval_abundance_df[ [ 'N' , 'N_adult' ] ] , - expected_abundance_df[ [ 'N' , 'N_adult' ] ] , - rtol = 1e-1 ) - assert np.all( eval_biomass_df.sex == expected_biomass_df.sex ) - assert np.allclose( eval_biomass_df[ [ 'B' , 'B_adult' ] ] , - expected_biomass_df[ [ 'B' , 'B_adult' ] ] , - rtol = 1e-1 ) - assert np.all( eval_biomass_age_df.sex == expected_biomass_age_df.sex ) - assert np.allclose( eval_biomass_age_df[ [ 'B_age' ] ] , - expected_biomass_age_df[ [ 'B_age' ] ] , - rtol = 1e-1 ) + dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) + ### Dataframe equality + dictionary_equality( eval_dictionary , expected_output ) \ No newline at end of file diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 15be99ff..97458ecd 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -1,24 +1,22 @@ import pandas as pd import numpy as np import copy -from echopop.survey import Survey +from echopop.tests.conftest import dictionary_equality , dataframe_equality +from echopop.tests.conftest import dataframe_shape_equal , dataframe_dtypes_equality from echopop.computation.biology import index_transect_age_sex_proportions from echopop.computation.spatial import correct_transect_intervals , calculate_start_end_coordinates , calculate_transect_distance def test_index_transect_age_sex_proportions( mock_survey ): - #### Pull in mock Survey object - objS = mock_survey - ### Initialize various attributes - objS.acoustics[ 'sigma_bs' ] = { } - objS.statistics[ 'length_weight' ] = { } - objS.biology[ 'weight' ] = { } - objS.biology[ 'population' ] = { } + mock_survey.acoustics[ 'sigma_bs' ] = { } + mock_survey.statistics[ 'length_weight' ] = { } + mock_survey.biology[ 'weight' ] = { } + mock_survey.biology[ 'population' ] = { } ### Create mock data for `age_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ] = { } - objS.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ] = { } + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , @@ -26,7 +24,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): } ) ### Create mock data for `age_weight_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , @@ -34,7 +32,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): } ) ### Create mock data for `sex_age_weight_proportions_df` - objS.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , @@ -44,7 +42,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): } ) ### Create mock data for 'length_weight_df' - objS.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( { 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , np.linspace( 9 , 21 , 3 ) ) , @@ -60,7 +58,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) ### Create mock data for `weight_strata_df` - objS.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'proportion_female': [ 0.592593 , 0.407407 ] , @@ -74,7 +72,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) ### Create mock data for `strata_mean` (sigma_bs) - objS.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + mock_survey.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'species_id': np.repeat( 8675309 , 2 ) , @@ -83,7 +81,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) ### Create mock data for `nasc_df` - objS.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( { 'transect_num': [ 1 , 2 , 3 , 4] , 'stratum_num': [ 0 , 0 , 1 , 1 ] , @@ -99,7 +97,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) ### Create mock data for `strata_df` - objS.spatial[ 'strata_df' ] = pd.DataFrame( + mock_survey.spatial[ 'strata_df' ] = pd.DataFrame( { 'stratum_num': [ 0 , 1 ] , 'haul_num': [ 1 , 2 ] , @@ -108,9 +106,9 @@ def test_index_transect_age_sex_proportions( mock_survey ): ) ### Bundle the mocked data into their respective inputs for `index_transect_age_sex_proportions` - test_acoustics_dict = copy.deepcopy( objS.acoustics ) - test_biology_dict = copy.deepcopy( objS.biology ) - test_info_strata = objS.spatial[ 'strata_df' ].copy( ) + test_acoustics_dict = copy.deepcopy( mock_survey.acoustics ) + test_biology_dict = copy.deepcopy( mock_survey.biology ) + test_info_strata = mock_survey.spatial[ 'strata_df' ].copy( ) ### Evaluate object for later comparison eval_nasc_fraction_total_df = index_transect_age_sex_proportions( test_acoustics_dict , @@ -120,8 +118,33 @@ def test_index_transect_age_sex_proportions( mock_survey ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 8 , 24 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'latitude': np.floating , + 'longitude': np.floating , + 'transect_num': np.integer , + 'stratum_num': np.integer , + 'haul_num': np.integer , + 'interval': np.floating , + 'interval_area': np.floating , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + 'fraction_hake': np.floating , + 'species_id': np.integer , + 'sigma_bs_mean': np.floating , + 'proportion_female': np.floating , + 'proportion_male': np.floating , + 'proportion_station_1': np.floating , + 'proportion_station_2': np.floating , + 'average_weight_female': np.floating , + 'average_weight_male': np.floating , + 'average_weight_total': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'weight_age_proportion_all': np.floating , + 'weight_age_proportion_adult': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -148,19 +171,19 @@ def test_index_transect_age_sex_proportions( mock_survey ): 'count_age_proportion_all': np.repeat( 0.5 , 8 ) , 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) , 'weight_age_proportion_all': np.repeat( 0.5 , 8 ) , - 'weight_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) - } + 'weight_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) , + } , ) #---------------------------------- ### Run tests: `index_transect_age_sex_proportions` #---------------------------------- ### Check shape - assert eval_nasc_fraction_total_df.shape == expected_dimensions + dataframe_shape_equal( eval_nasc_fraction_total_df , expected_output ) ### Check datatypes - assert np.all( eval_nasc_fraction_total_df.dtypes == expected_output.dtypes ) + dataframe_dtypes_equality( eval_nasc_fraction_total_df , expected_dtypes ) ### Dataframe equality - assert np.allclose( eval_nasc_fraction_total_df , expected_output , rtol = 1e-1 ) + dataframe_equality( eval_nasc_fraction_total_df , expected_output ) def test_correct_transect_intervals( ): @@ -176,8 +199,8 @@ def test_correct_transect_intervals( ): 'transect_spacing': np.repeat( 1.0 , 4 ) , 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , 'haul_num': [ 1 , 1 , 2 , 2 ] , - 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] - } + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , ) ### Evaluate object for later comparison @@ -186,8 +209,18 @@ def test_correct_transect_intervals( ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 4 , 9 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'latitude': np.floating , + 'longitude': np.floating , + 'transect_num': np.integer , + 'stratum_num': np.integer , + 'haul_num': np.integer , + 'interval': np.floating , + 'interval_area': np.floating , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -200,23 +233,23 @@ def test_correct_transect_intervals( ): 'interval_area': [ 10.0 , 10.0 , 10.0 , 9.9 ] , 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , - } + } , ) #---------------------------------- ### Run tests: `correct_transect_intervals` #---------------------------------- ### Check shape - assert eval_nasc_interval.shape == expected_dimensions + dataframe_shape_equal( eval_nasc_interval , expected_output ) ### Check datatypes - assert np.all( eval_nasc_interval.dtypes == expected_output.dtypes ) + dataframe_dtypes_equality( eval_nasc_interval , expected_dtypes ) ### Dataframe equality - assert np.allclose( eval_nasc_interval , expected_output ) + dataframe_equality( eval_nasc_interval , expected_output ) def test_calculate_start_end_coordinates( ): ### Create mock data for `nasc_df` - test_nasc_df = pd.DataFrame( + test_nasc_dataframe = pd.DataFrame( { 'transect_num': [ 1 , 1 , 2 , 2 ] , 'stratum_num': [ 0 , 0 , 1 , 1 ] , @@ -227,43 +260,48 @@ def test_calculate_start_end_coordinates( ): 'transect_spacing': np.repeat( 1.0 , 4 ) , 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , 'haul_num': [ 1 , 1 , 2 , 2 ] , - 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , } ) ### Evaluate for later comparison - eval_test_nasc_df = calculate_start_end_coordinates( test_nasc_df , + eval_test_nasc_df = calculate_start_end_coordinates( test_nasc_dataframe , 'transect_num' ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 2 , 4 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'transect_num': np.integer , + 'minimum_longitude': np.floating , + 'maximum_longitude': np.floating , + 'center_latitude': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { 'transect_num': [ 1 , 2 ] , 'minimum_longitude': [ -180.0 , -170.0 ] , 'maximum_longitude': [ -120.0 , -110.0 ] , - 'center_latitude': [ 25.0 , 45.0 ] - } + 'center_latitude': [ 25.0 , 45.0 ] , + } , ) #---------------------------------- ### Run tests: `calculate_start_end_coordinates` #---------------------------------- ### Check shape - assert eval_test_nasc_df.shape == expected_dimensions + dataframe_shape_equal( eval_test_nasc_df , expected_output ) ### Check datatypes - assert np.all( eval_test_nasc_df.dtypes == expected_output.dtypes ) + dataframe_dtypes_equality( eval_test_nasc_df , expected_dtypes ) ### Dataframe equality - assert eval_test_nasc_df.equals( expected_output ) + dataframe_equality( eval_test_nasc_df , expected_output ) def test_calculate_transect_distance( ): ### Create mock data for `nasc_df` - test_nasc_df = pd.DataFrame( + test_nasc_dataframe = pd.DataFrame( { 'transect_num': [ 1 , 1 , 2 , 2 ] , 'stratum_num': [ 0 , 0 , 1 , 1 ] , @@ -274,19 +312,26 @@ def test_calculate_transect_distance( ): 'transect_spacing': np.repeat( 2.0 , 4 ) , 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , 'haul_num': [ 1 , 1 , 2 , 2 ] , - 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] - } + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , ) - ### Evaluate for later comparison - eval_test_nasc_df = calculate_transect_distance( test_nasc_df , + eval_test_nasc_df = calculate_transect_distance( test_nasc_dataframe , 'transect_num' ) ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 2 , 7 ] ) + # ---- Expected dtypes + expected_dtypes= { + 'transect_num': np.integer , + 'minimum_longitude': np.floating , + 'maximum_longitude': np.floating , + 'center_latitude': np.floating , + 'transect_distance': np.floating , + 'transect_spacing': np.floating , + 'transect_area': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -296,16 +341,16 @@ def test_calculate_transect_distance( ): 'center_latitude': [ 25.0 , 45.0 ] , 'transect_distance': [ 3241.273891 , 2493.203304 ] , 'transect_spacing': [ 2.0 , 2.0 ] , - 'transect_area': [ 6482.547781 , 4986.406609 ] - } + 'transect_area': [ 6482.547781 , 4986.406609 ] , + } , ) #---------------------------------- ### Run tests: `calculate_start_end_coordinates` #---------------------------------- ### Check shape - assert eval_test_nasc_df.shape == expected_dimensions + dataframe_shape_equal( eval_test_nasc_df , expected_output ) ### Check datatypes - assert np.all( eval_test_nasc_df.dtypes == expected_output.dtypes ) + dataframe_dtypes_equality( eval_test_nasc_df , expected_dtypes ) ### Dataframe equality - assert np.allclose( eval_test_nasc_df , expected_output ) \ No newline at end of file + dataframe_equality( eval_test_nasc_df , expected_output ) \ No newline at end of file From d5cd5b0253475fb13c6bd42637c7f81ac8334c69 Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 15 Apr 2024 19:44:51 -0700 Subject: [PATCH 32/35] Amended module call --- echopop/tests/test_data_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopop/tests/test_data_loader.py b/echopop/tests/test_data_loader.py index 53aa2f25..71c76237 100644 --- a/echopop/tests/test_data_loader.py +++ b/echopop/tests/test_data_loader.py @@ -5,7 +5,7 @@ from echopop import Survey from echopop.core import LAYER_NAME_MAP from echopop.utils.data_file_validation import load_configuration -from echopop.tests.utility_testing_functions import dictionary_shape_equal +from echopop.tests.conftest import dictionary_shape_equal def test_load_configuration(test_path, tmp_path): init_params = yaml.safe_load( From 6bf31d57940b1977caa745415b71924dcdcc53ae Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Mon, 15 Apr 2024 19:47:12 -0700 Subject: [PATCH 33/35] echopop.tests.conftest moved --- echopop/tests/test_stratified_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py index f9185128..7fd68c80 100644 --- a/echopop/tests/test_stratified_summary.py +++ b/echopop/tests/test_stratified_summary.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from echopop.tests.utility_testing_functions import dictionary_shape_equal +from echopop.tests.conftest import dictionary_shape_equal from echopop.computation.statistics import stratified_transect_statistic , confidence_interval def test_stratified_transect_statistic( ): From 40465ea060549bdc3538b30ff519d251f2119e8c Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Tue, 16 Apr 2024 12:03:13 -0700 Subject: [PATCH 34/35] Updated utility test functions --- echopop/computation/operations.py | 3 +- echopop/tests/conftest.py | 207 +++++++--- echopop/tests/test_data_loader.py | 104 +++-- echopop/tests/test_data_transect_analysis.py | 33 +- echopop/tests/test_operations.py | 411 ++++++++++++------- echopop/tests/test_stratified_summary.py | 193 ++++----- echopop/tests/test_transect_functions.py | 35 +- 7 files changed, 559 insertions(+), 427 deletions(-) diff --git a/echopop/computation/operations.py b/echopop/computation/operations.py index 5e4a9f78..8a1c4f4f 100644 --- a/echopop/computation/operations.py +++ b/echopop/computation/operations.py @@ -92,7 +92,8 @@ def bin_stats( dataframe: pd.DataFrame , return ( dataframe # input dataframe .bin_variable( bin_values , bin_variable ) # discretize variable into bins ) - .groupby( [f'{bin_variable}_bin'] + con_lst ) # group by these variables/contrasts + .groupby( [f'{bin_variable}_bin'] + con_lst , + observed = False ) # group by these variables/contrasts .agg( aggregation_dict ) # apply specified functions .replace( np.nan , 0 ) # replace NaN w/ 0's .droplevel( level = 0 , axis = 1 ) # drop the column indices diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index 90dacb8d..6a9fd9d7 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -43,7 +43,8 @@ def pytest_assertrepr_compare( config , op , left , right ): return assertrepr_compare( config , op , left , right) ### Utility functions -# ---- Dictionary shape comparison utility function +# ---- DICTIONARY +# ++++ Shape and structure def dictionary_shape( dictionary: dict ) : """ A utility test function that extracts the shape of a nested dictionary @@ -53,17 +54,41 @@ def dictionary_shape( dictionary: dict ) : return( { i: dictionary_shape( dictionary[ i ] ) for i in dictionary } ) else: return None + +# ---- DATAFRAME +# ++++ Shape +def dataframe_shape( input: Union[ pd.DataFrame , dict ] ): + + ### DataFrame + if isinstance( input , pd.DataFrame ) : + + return input.shape + + ### Dictionary (bundled dataframes) + elif isinstance( input , dict ) : + dataframe_shapes = { } + + for key , value in input.items( ): + if isinstance( value , pd.DataFrame ) : + dataframe_shapes[ key ] = value.shape + elif isinstance( value , dict ) : + dataframe_shapes[ key ] = extract_dataframe_shape( value ) + + return dataframe_shapes -# ---- Test for comparing Dictionary shapes/dimensions -def dictionary_shape_equal( dictionary1: dict , - dictionary2: dict ): +### Assertion functions +# ---- DICTIONARY +# ---- Shape and dimensions +def assert_dictionary_structure_equal( dictionary1: dict , + dictionary2: dict ) : """ Tests equality between the shapes of two nested dictionaries """ + result = dictionary_shape( dictionary1 ) == dictionary_shape( dictionary2 ) if result : - return result + assert result else: if set( dictionary_shape( dictionary1 ) ) <= set( dictionary_shape( dictionary2 ) ) : tracked_true = [ ] @@ -73,15 +98,53 @@ def dictionary_shape_equal( dictionary1: dict , tracked_true.append( test ) if np.all( tracked_true ) : - return True + assert True else : - return result + assert result else : - return result + assert result +# ---- dtypes +def assert_dictionary_dtypes_equal( dictionary , + reference_dictionary ) : + + for key in reference_dictionary : + if isinstance( reference_dictionary[ key ] , dict ) : + assert isinstance( dictionary[ key ] , dict ) , \ + f"Key '{ key }' has different types in the dictionaries." + assert_dictionary_dtypes_equal( dictionary[ key ] , + reference_dictionary[ key ] ) + elif isinstance( dictionary[ key ] , type ) : + assert np.issubdtype( type( dictionary[ key ] ) , + reference_dictionary[ key ] ) , \ + f"Datatype for key '{ key }' is not a subdtype of the reference datatype." + elif isinstance( reference_dictionary[ key ] , np.ndarray ) : + assert isinstance( dictionary[ key ] , np.ndarray ) , \ + f"Datatype for key '{ key }' is not the same as in reference dictionary." + assert np.issubdtype( dictionary[ key ].dtype , + reference_dictionary[ key ].dtype ) , \ + f"Dtype for key '{ key }' is not a subdtype of the reference dtype." +# ---- Values +def assert_dictionary_values_equal( dictionary , + reference_dictionary ) : + for key in dictionary : + if isinstance( dictionary[ key ] , dict ) : + assert isinstance( reference_dictionary[ key ] , dict ) , \ + f"Key '{ key }' has different types in the dictionaries." + assert_dictionary_values_equal( dictionary[ key ] , + reference_dictionary[ key ] ) + elif isinstance( dictionary[ key ] , np.ndarray ) : + assert np.allclose( dictionary[ key ] , + reference_dictionary[ key ] ) , \ + f"Arrays for key '{key}' are not close." + else: + assert np.isclose( dictionary[ key ] , + reference_dictionary[ key ] ) , \ + f"Values for key '{key}' are not close." -# ---- Test for dataframe shape equality -def dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , - reference: Union[ tuple , dict ] ): +# ---- DATAFRAME +# ---- Shape and dimensions +def assert_dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , + reference: Union[ tuple , dict ] ): ### DataFrame if ( isinstance( input , pd.DataFrame ) ) & ( isinstance( reference , tuple ) ) : @@ -90,46 +153,10 @@ def dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , ### Dictionary elif ( isinstance( input , dict ) ) & ( isinstance( reference , dict ) ): assert extract_dataframe_shape( input ) == extract_dataframe_shape( reference ) - - -# ---- Test for comparing Dictionary equality (including nested DataFrames) -def dictionary_equality( dictionary1: dict , - dictionary2: dict ): - - ### Iterate through nested DataFrames within each dictionary - for key , expected_df in dictionary2.items( ) : - - if isinstance( dictionary1[ key ] , pd.DataFrame ) : - dataframe_equality( dictionary1[ key ] , expected_df ) - - else : - for sub_key , _ in dictionary2[ key ].items( ): - dataframe_equality( dictionary1[ key ][ sub_key ] , - expected_df[ sub_key ] ) - -# ---- Extract dataframe shape -def extract_dataframe_shape( input: Union[ pd.DataFrame , dict ] ): - - ### DataFrame - if isinstance( input , pd.DataFrame ) : - - return input.shape - - ### Dictionary - elif isinstance( input , dict ) : - dataframe_shapes = { } - - for key , value in input.items( ): - if isinstance( value , pd.DataFrame ) : - dataframe_shapes[ key ] = value.shape - elif isinstance( value , dict ) : - dataframe_shapes[ key ] = extract_dataframe_shape( value ) - - return dataframe_shapes - -# ---- Extract dataframe dtypes -def dataframe_dtypes_equal( dataframe: pd.DataFrame , - reference_dictionary: dict ): +# ---- dtypes +# ~~~~ !!!! ATTN: this is a nested function within `assert_dataframe_dtypes_equal`! +def _assert_dataframe_dtypes_equal( dataframe: pd.DataFrame , + reference_dictionary: dict ): ### Separate evaluation for categorical-type # ---- Parse expected categorical variables @@ -147,15 +174,13 @@ def dataframe_dtypes_equal( dataframe: pd.DataFrame , for column , dtype in dataframe.dtypes.items( ): assert np.issubdtype( dtype , reference_dictionary.get( column , object ) ) , \ f"Data type mismatch for column '{ column }'" - - -# ---- Test for evaluating differences in DataFrame `dtypes` -def dataframe_dtypes_equality( input: Union[ pd.DataFrame , dict ] , - reference: dict ): +# ~~~~ dtypes --> compatible with direct DataFrame or bundled DataFrames within a dictionary +def assert_dataframe_dtypes_equal( input: Union[ pd.DataFrame , dict ] , + reference: dict ): ### DataFrame if isinstance( input , pd.DataFrame ) : - dataframe_dtypes_equal( input , reference ) + _assert_dataframe_dtypes_equal( input , reference ) ### Dictionary elif isinstance( input , dict ) : @@ -163,21 +188,71 @@ def dataframe_dtypes_equality( input: Union[ pd.DataFrame , dict ] , # ---- Single Dictionary layer if isinstance( input[ category ] , pd.DataFrame ): - dataframe_dtypes_equal( input[ category ] , - reference[ category ] ) + _assert_dataframe_dtypes_equal( input[ category ] , + reference[ category ] ) # ---- Nested Dictionary layers else: for df_name , _ in data.items( ): - dataframe_dtypes_equal( input[ category ][ df_name ] , reference[ category ][ df_name ] ) - -# ---- Test for evaluating equality between two dataframes -def dataframe_equality( dataframe1: pd.DataFrame , - dataframe2: pd.DataFrame ): + _assert_dataframe_dtypes_equal( input[ category ][ df_name ] , reference[ category ][ df_name ] ) +# ---- Values +# ~~~~ !!!! ATTN: this is a nested function within `assert_dataframe_equal`! +def _aassert_dataframe_values_equal( dataframe1: pd.DataFrame , + dataframe2: pd.DataFrame ): ### Evaluate equality between numerical values assert np.allclose( dataframe1.select_dtypes( include = [ 'number' ] ) , - dataframe2.select_dtypes( include = [ 'number' ] ) ) + dataframe2.select_dtypes( include = [ 'number' ] ) , + equal_nan = True ) ### Evaluate equality between non-numerical values - assert np.all( dataframe1.select_dtypes( exclude = [ 'number' ] ) == dataframe2.select_dtypes( exclude = [ 'number' ] ) ) + # ---- Mask out "NaN" + dataframe1_nan_mask = dataframe1.isna( ).any( axis = 1 ) + dataframe2_nan_mask = dataframe2.isna( ).any( axis = 1 ) + # ---- Evaluate equality + dataframe1_nan_mask == dataframe2_nan_mask + # ---- Evaluate equality among "real" values + dataframe1_masked = dataframe1[ ~ dataframe1_nan_mask ] + dataframe2_masked = dataframe2[ ~ dataframe2_nan_mask ] + assert np.all( dataframe1_masked.select_dtypes( exclude = [ 'number' ] ) == dataframe2_masked.select_dtypes( exclude = [ 'number' ] ) ) +# ~~~~ Values --> compatible with direct DataFrame or bundled DataFrames within a dictionary +def assert_dataframe_values_equal( input: Union[ pd.DataFrame , dict ] , + reference: Union[ pd.DataFrame , dict ]): + + ### Direct DataFrame + if ( isinstance( input , pd.DataFrame ) & ( isinstance( reference , pd.DataFrame ) ) ) : + _aassert_dataframe_values_equal( input , reference ) + + ### Iterate through nested DataFrames within each dictionary + else : + for key , expected_df in reference.items( ) : + + if isinstance( input[ key ] , pd.DataFrame ) : + _aassert_dataframe_values_equal( input[ key ] , expected_df ) + + else : + for sub_key , _ in reference[ key ].items( ): + _aassert_dataframe_values_equal( input[ key ][ sub_key ] , + expected_df[ sub_key ] ) +# ++++ DICTIONARY + DATAFRAME BUNDLING +# ---> Dictionary +def assert_dictionary_equal( input: dict , + reference_dtypes: dict , + reference_values: dict , ) : + + ### Shape + assert_dictionary_structure_equal( input , reference_values ) + ### dtypes + assert_dictionary_dtypes_equal( input , reference_dtypes ) + ### Values + assert_dictionary_values_equal( input , reference_values ) +# ---> DataFrame +def assert_dataframe_equal( input: Union[ pd.DataFrame , dict ] , + reference_dtypes: dict , + reference_values: Union[ pd.DataFrame , dict ] , ) : + ### Shape + assert_dataframe_shape_equal( input , reference_values ) + ### dtypes + assert_dataframe_dtypes_equal( input , reference_dtypes ) + ### Values + assert_dataframe_values_equal( input , reference_values ) \ No newline at end of file diff --git a/echopop/tests/test_data_loader.py b/echopop/tests/test_data_loader.py index 71c76237..9fe62504 100644 --- a/echopop/tests/test_data_loader.py +++ b/echopop/tests/test_data_loader.py @@ -5,7 +5,7 @@ from echopop import Survey from echopop.core import LAYER_NAME_MAP from echopop.utils.data_file_validation import load_configuration -from echopop.tests.conftest import dictionary_shape_equal +from echopop.tests.conftest import assert_dictionary_structure_equal def test_load_configuration(test_path, tmp_path): init_params = yaml.safe_load( @@ -43,22 +43,19 @@ def test_init( mock_survey ): def test_load_survey_data( mock_survey , test_path ): - - ### Initialize Survey object (objS) - objS = mock_survey - + ### Pull in configuration values - objS.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , - Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + mock_survey.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) ### Initialize data attributes - objS.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) - objS.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) - objS.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) - objS.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + mock_survey.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + mock_survey.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + mock_survey.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + mock_survey.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) ### Load in data using the `load_survey_data` method - objS.load_survey_data( ) + mock_survey.load_survey_data( ) # ----------------- ### Evaluate results @@ -66,71 +63,68 @@ def test_load_survey_data( mock_survey , ### Dictionary structure # !!! TODO: based on the original data structure -- will need to be updated once the core data structure is also updated # ---- Check attributes - assert set( [ 'acoustics' , 'biology' , 'spatial' , 'statistics' ] ) <= set( dir( objS ) ) + assert set( [ 'acoustics' , 'biology' , 'spatial' , 'statistics' ] ) <= set( dir( mock_survey ) ) # ---- Check sub-directory keys - assert dictionary_shape_equal( objS.acoustics , LAYER_NAME_MAP['NASC']['data_tree'] ) - assert dictionary_shape_equal( objS.biology , LAYER_NAME_MAP['biological']['data_tree'] ) - assert dictionary_shape_equal( objS.spatial , LAYER_NAME_MAP['stratification']['data_tree'] ) - assert dictionary_shape_equal( objS.statistics , LAYER_NAME_MAP['kriging']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.acoustics , LAYER_NAME_MAP['NASC']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.biology , LAYER_NAME_MAP['biological']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.spatial , LAYER_NAME_MAP['stratification']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.statistics , LAYER_NAME_MAP['kriging']['data_tree'] ) ### Data structure # ++++ acoustics - assert objS.acoustics[ 'nasc' ][ 'nasc_df' ].shape == tuple( [ 1 , 10 ] ) + assert mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ].shape == tuple( [ 1 , 10 ] ) # ++++ biology - assert objS.biology[ 'catch_df' ].shape == tuple( [ 2 , 7 ] ) - assert objS.biology[ 'distributions' ][ 'age_bins_arr' ].shape == tuple( [ 0 , ] ) - assert objS.biology[ 'distributions' ][ 'length_bins_arr' ].shape == tuple( [ 0 , ] ) - assert objS.biology[ 'haul_to_transect_df' ].shape == tuple( [ 2 , 5 ] ) - assert objS.biology[ 'length_df' ].shape == tuple( [ 2 , 10 ] ) - assert objS.biology[ 'specimen_df' ].shape == tuple( [ 2 , 11 ] ) + assert mock_survey.biology[ 'catch_df' ].shape == tuple( [ 2 , 7 ] ) + assert mock_survey.biology[ 'distributions' ][ 'age_bins_arr' ].shape == tuple( [ 0 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length_bins_arr' ].shape == tuple( [ 0 , ] ) + assert mock_survey.biology[ 'haul_to_transect_df' ].shape == tuple( [ 2 , 5 ] ) + assert mock_survey.biology[ 'length_df' ].shape == tuple( [ 2 , 10 ] ) + assert mock_survey.biology[ 'specimen_df' ].shape == tuple( [ 2 , 11 ] ) # ++++ spatial - assert objS.spatial[ 'strata_df' ].shape == tuple( [ 1 , 3 ] ) - assert objS.spatial[ 'geo_strata_df' ].shape == tuple( [ 1 , 2 ] ) - assert objS.spatial[ 'inpfc_strata_df' ].shape == tuple( [ 1 , 2 ] ) + assert mock_survey.spatial[ 'strata_df' ].shape == tuple( [ 1 , 3 ] ) + assert mock_survey.spatial[ 'geo_strata_df' ].shape == tuple( [ 1 , 2 ] ) + assert mock_survey.spatial[ 'inpfc_strata_df' ].shape == tuple( [ 1 , 2 ] ) # ++++ statistics - assert objS.statistics[ 'kriging' ][ 'mesh_df' ].shape == tuple( [ 19843 , 3 ] ) - assert objS.statistics[ 'kriging' ][ 'isobath_200m_df' ].shape == tuple( [ 147 , 2 ] ) - assert len( objS.statistics[ 'kriging' ][ 'model_config' ] ) == 39 - assert len( objS.statistics[ 'variogram' ][ 'model_config' ] ) == 13 + assert mock_survey.statistics[ 'kriging' ][ 'mesh_df' ].shape == tuple( [ 19843 , 3 ] ) + assert mock_survey.statistics[ 'kriging' ][ 'isobath_200m_df' ].shape == tuple( [ 147 , 2 ] ) + assert len( mock_survey.statistics[ 'kriging' ][ 'model_config' ] ) == 39 + assert len( mock_survey.statistics[ 'variogram' ][ 'model_config' ] ) == 13 ### Test merged outputs - assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'catch_df' ].columns ) - assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'length_df' ].columns ) - assert set( objS.biology[ 'haul_to_transect_df' ].columns ) <= set( objS.biology[ 'specimen_df' ].columns ) + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'catch_df' ].columns ) + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'length_df' ].columns ) + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'specimen_df' ].columns ) ### Test biological data (sex definition) - assert np.all( ( objS.biology[ 'length_df' ].sex == 'female' ) & ( objS.biology[ 'length_df' ].group == 'sexed' ) ) - assert np.all( ( objS.biology[ 'specimen_df' ].sex == [ 'male' , 'female' ] ) & ( objS.biology[ 'specimen_df' ].group == 'sexed' ) ) + assert np.all( ( mock_survey.biology[ 'length_df' ].sex == 'female' ) & ( mock_survey.biology[ 'length_df' ].group == 'sexed' ) ) + assert np.all( ( mock_survey.biology[ 'specimen_df' ].sex == [ 'male' , 'female' ] ) & ( mock_survey.biology[ 'specimen_df' ].group == 'sexed' ) ) def test_biometric_distributions( mock_survey , test_path ): - ### Initialize Survey object (objS) - objS = mock_survey - ### Pull in configuration values - objS.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , - Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + mock_survey.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) ### Initialize data attributes - objS.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) - objS.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) - objS.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) - objS.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + mock_survey.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + mock_survey.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + mock_survey.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + mock_survey.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) ### Load in data using the `load_survey_data` method - objS.load_survey_data( ) + mock_survey.load_survey_data( ) ### Generate length and age distributions - objS.biometric_distributions( ) + mock_survey.biometric_distributions( ) # ----------------- ### Evaluate results # ----------------- ### Data structure - assert objS.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ].shape == tuple( [ 23 , ] ) - assert objS.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ].shape == tuple( [ 22 , ] ) - assert objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ].shape == tuple( [ 41 , ] ) - assert objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ].shape == tuple( [ 40 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ].shape == tuple( [ 23 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ].shape == tuple( [ 22 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ].shape == tuple( [ 41 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ].shape == tuple( [ 40 , ] ) ### Data equality - assert np.all( objS.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ] == np.linspace( 0.5 , 22.5 , 23 ) ) - assert np.all( objS.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ] == np.linspace( 1 , 22 , 22 ) ) - assert np.all( objS.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] == np.linspace( 1 , 81 , 41 ) ) - assert np.all( objS.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] == np.linspace( 2 , 80 , 40 ) ) \ No newline at end of file + assert np.all( mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ] == np.linspace( 0.5 , 22.5 , 23 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ] == np.linspace( 1 , 22 , 22 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] == np.linspace( 1 , 81 , 41 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] == np.linspace( 2 , 80 , 40 ) ) \ No newline at end of file diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py index 93d0935d..2be90b21 100644 --- a/echopop/tests/test_data_transect_analysis.py +++ b/echopop/tests/test_data_transect_analysis.py @@ -1,7 +1,6 @@ import pandas as pd import numpy as np -from echopop.tests.conftest import dictionary_equality ,dataframe_equality -from echopop.tests.conftest import dataframe_shape_equal , dataframe_dtypes_equality +from echopop.tests.conftest import assert_dataframe_equal def test_fit_binned_length_weight_relationship( mock_survey ): @@ -92,13 +91,8 @@ def test_fit_binned_length_weight_relationship( mock_survey ): ### Run tests: `fit_binned_length_weight_relationship` #---------------------------------- eval_dictionary = mock_survey.statistics[ 'length_weight' ] - ### Check shape - dataframe_shape_equal( eval_dictionary , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) - ### Dataframe equality - dictionary_equality( eval_dictionary , expected_output ) - + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) + def test_strata_sex_weight_proportions( mock_survey ): ### Initialize mock_survey for `weight` @@ -217,12 +211,7 @@ def test_strata_sex_weight_proportions( mock_survey ): ### Run tests: `strata_sex_weight_proportions` #---------------------------------- eval_dataframe = mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] - ### Check shape - dataframe_shape_equal( eval_dataframe , expected_dtypes ) - ### Check datatypes - dataframe_dtypes_equality( eval_dataframe , expected_dtypes ) - ### Dataframe equality - dataframe_equality( eval_dataframe , expected_output ) + assert_dataframe_equal( eval_dataframe , expected_dtypes , expected_output ) def test_strata_age_binned_weight_proportions( mock_survey ): @@ -339,12 +328,7 @@ def test_strata_age_binned_weight_proportions( mock_survey ): ### Run tests: `strata_age_binned_weight_proportions` #---------------------------------- eval_dictionary = mock_survey.biology[ 'weight' ][ 'proportions' ] - ### Check shape - dataframe_shape_equal( eval_dictionary , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) - ### Dataframe equality - dictionary_equality( eval_dictionary , expected_output ) + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) def test_nasc_to_biomass_conversion( mock_survey ): @@ -632,11 +616,6 @@ def test_nasc_to_biomass_conversion( mock_survey ): ### Run tests: `test_nasc_to_biomass_conversion` #---------------------------------- eval_dictionary = mock_survey.biology[ 'population' ] - ### Check shape - dataframe_shape_equal( eval_dictionary , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_dictionary , expected_dtypes ) - ### Dataframe equality - dictionary_equality( eval_dictionary , expected_output ) + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) \ No newline at end of file diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py index 35385d12..e5da960b 100644 --- a/echopop/tests/test_operations.py +++ b/echopop/tests/test_operations.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd from echopop.computation.operations import bin_variable , bin_stats , count_variable , meld , stretch , group_merge +from echopop.tests.conftest import assert_dataframe_equal def test_bin_variable( ): @@ -24,8 +25,12 @@ def test_bin_variable( ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 3 , 3 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'animal': object , + 'length': np.floating , + 'length_bin': pd.CategoricalDtype( ) , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -39,12 +44,8 @@ def test_bin_variable( ): #---------------------------------- ### Run tests: `bin_variable` #---------------------------------- - ### Check shape - assert eval_dataframe_monkey.shape == expected_dimensions - assert eval_dataframe_function.shape == expected_dimensions - ### Check output - assert eval_dataframe_monkey.equals( expected_output ) - assert eval_dataframe_function.equals( expected_output ) + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) def test_bin_stats( ): @@ -81,72 +82,143 @@ def test_bin_stats( ): eval_dataframe_monkey_lwc = test_dataframe.bin_stats( 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) # ---- Normal function eval_dataframe_function_lwc = bin_stats( test_dataframe , 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) - + # ++++ Bundle together for evaluation + eval_dictionary = { + 'monkey_lwnc': eval_dataframe_monkey_lwnc , + 'function_lwnc': eval_dataframe_function_lwnc , + 'monkey_lnc': eval_dataframe_monkey_lnc , + 'function_lnc': eval_dataframe_function_lnc , + 'monkey_lncm': eval_dataframe_monkey_lncm , + 'function_lncm': eval_dataframe_function_lncm , + 'monkey_lwc': eval_dataframe_monkey_lwc , + 'function_lwc': eval_dataframe_function_lwc , + } ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions_lwnc = tuple( [ 4 , 5 ] ) - expected_dimensions_lnc = tuple( [ 4 , 3 ] ) - expected_dimensions_lncm = tuple( [ 4 , 2 ] ) - expected_dimensions_lwc = tuple( [ 8 , 4 ] ) - # ---- Expected output - expected_output_lwnc = pd.DataFrame( - { - 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , - np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , - 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , - 'n_length': [ 2 , 1 , 2 , 1 ] , - 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , - 'n_weight': [ 2 , 1 , 2 , 1 ] , + # ---- Expected dtypes + expected_dtypes = { + 'monkey_lwnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , } , - ) - expected_output_lnc = pd.DataFrame( - { - 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , - np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , - 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , - 'n_length': [ 2 , 1 , 2 , 1 ] , + 'function_lwnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , } , - ) - expected_output_lncm = pd.DataFrame( - { - 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , - np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , - 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] + 'monkey_lnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , } , - ) - expected_output_lwc = pd.DataFrame( - { - 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , - np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , - 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , - 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , - 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + 'function_lnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , } , - ) - + 'monkey_lncm': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'function_lncm': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + } , + 'monkey_lwc': { + 'length_bin': pd.CategoricalDtype( ) , + 'location': object , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'function_lwc': { + 'length_bin': pd.CategoricalDtype( ) , + 'location': object , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + } + # ---- Expected outputs + expected_output = { + 'monkey_lwnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , + 'n_weight': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'function_lwnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , + 'n_weight': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'monkey_lnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'function_lnc': pd.DataFrame( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'monkey_lncm': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + } , + ) , + 'function_lncm': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + } , + ) , + 'monkey_lwc': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , + 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , + 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + } , + ) , + 'function_lwc': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , + 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , + 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + } , + ) , + } #---------------------------------- ### Run tests: `bin_stats` #---------------------------------- - ### Check shape - assert eval_dataframe_monkey_lwnc.shape == expected_dimensions_lwnc - assert eval_dataframe_function_lwnc.shape == expected_dimensions_lwnc - assert eval_dataframe_monkey_lnc.shape == expected_dimensions_lnc - assert eval_dataframe_function_lnc.shape == expected_dimensions_lnc - assert eval_dataframe_monkey_lncm.shape == expected_dimensions_lncm - assert eval_dataframe_function_lncm.shape == expected_dimensions_lncm - assert eval_dataframe_monkey_lwc.shape == expected_dimensions_lwc - assert eval_dataframe_function_lwc.shape == expected_dimensions_lwc - ### Check output - assert eval_dataframe_monkey_lwnc.equals( expected_output_lwnc ) - assert eval_dataframe_function_lwnc.equals( expected_output_lwnc ) - assert eval_dataframe_monkey_lnc.equals( expected_output_lnc ) - assert eval_dataframe_function_lnc.equals( expected_output_lnc ) - assert eval_dataframe_monkey_lncm.equals( expected_output_lncm ) - assert eval_dataframe_function_lncm.equals( expected_output_lncm ) - assert eval_dataframe_monkey_lwc.equals( expected_output_lwc ) - assert eval_dataframe_function_lwc.equals( expected_output_lwc ) + assert_dataframe_equal( eval_dictionary, expected_dtypes , expected_output ) def test_count_variable( ): @@ -177,8 +249,12 @@ def test_count_variable( ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 12 , 3 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'location': object , + 'animal': object , + 'count': np.integer , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -193,12 +269,8 @@ def test_count_variable( ): #---------------------------------- ### Run tests: `count_variable` #---------------------------------- - ### Check shape - assert eval_dataframe_monkey.shape == expected_dimensions - assert eval_dataframe_function.shape == expected_dimensions - ### Check dataframe equality - assert eval_dataframe_monkey.equals( expected_output ) - assert eval_dataframe_function.equals( expected_output ) + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) def test_meld( ): @@ -242,8 +314,17 @@ def test_meld( ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 16 , 8 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'stratum_num': np.integer , + 'species_id': object , + 'sex': object , + 'group': object , + 'station': object , + 'length': np.floating , + 'length_bin': pd.CategoricalDtype( ) , + 'length_count': np.integer , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -271,12 +352,8 @@ def test_meld( ): #---------------------------------- ### Run tests: `count_variable` #---------------------------------- - ### Check shape - assert eval_dataframe_monkey.shape == expected_dimensions - assert eval_dataframe_function.shape == expected_dimensions - ### Check output - assert np.all( eval_dataframe_monkey == expected_output ) - assert np.all( eval_dataframe_function == expected_output ) + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) def test_stretch( ): @@ -301,8 +378,15 @@ def test_stretch( ): ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions = tuple( [ 8 , 6 ] ) + # ---- Expected dtypes + expected_dtypes = { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'load_a': np.floating , + } # ---- Expected output expected_output = pd.DataFrame( { @@ -319,12 +403,8 @@ def test_stretch( ): #---------------------------------- ### Run tests: `count_variable` #---------------------------------- - ### Check shape - assert eval_dataframe_monkey.shape == expected_dimensions - assert eval_dataframe_function.shape == expected_dimensions - ### Check output - assert np.all( eval_dataframe_monkey == expected_output ) - assert np.all( eval_dataframe_function == expected_output ) + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) def test_group_merge( ): @@ -387,64 +467,115 @@ def test_group_merge( ): inner_on = 'group' , outer_on = [ 'stratum_num' ] , drop_na = False ) - + # ++++ Bundle! + eval_dictionary = { + 'monkey_dropna': eval_dataframe_monkey_dropna , + 'function_dropna': eval_dataframe_function_dropna , + 'monkey_keepna': eval_dataframe_monkey_keepna , + 'function_keepna': eval_dataframe_function_keepna , + } ###-------------------------------- ### Expected outcomes ###-------------------------------- - # ---- Expected dimensions - expected_dimensions_dropna = tuple( [ 12 , 6 ] ) - expected_dimensions_keepna = tuple( [ 14 , 6 ] ) - # ---- Expected output - expected_output_dropna = pd.DataFrame( - { - 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , - 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , - 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , - 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , - 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , - 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , - 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , - 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , - 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + # ---- Expected dtypes + expected_dtypes = { + 'monkey_dropna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , } , - ) - expected_output_keepna = pd.DataFrame( - { - 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , - 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , - 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , None ] , 2 ).astype( object ) , - 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 'function_dropna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + 'monkey_keepna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + 'function_keepna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + } + # ---- Expected output + expected_output = { + 'monkey_dropna': pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + } , + ) , + 'function_dropna': pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + } , + ) , + 'monkey_keepna': pd.DataFrame( + { + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , np.nan ] , 2 ).astype( object ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + np.nan , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + np.nan ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , np.nan , - 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , np.nan ] , - 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , - 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , - np.nan , - 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , - np.nan ] , - 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , - } , - ) - + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , + } , + ) , + 'function_keepna': pd.DataFrame( + { + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , np.nan ] , 2 ).astype( object ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + np.nan , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + np.nan ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + np.nan , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , + np.nan ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , + } , + ) , + } #---------------------------------- ### Run tests: `count_variable` #---------------------------------- - ### Check shape - # ++++ NaN removed - assert eval_dataframe_monkey_dropna.shape == expected_dimensions_dropna - assert eval_dataframe_function_dropna.shape == expected_dimensions_dropna - # ++++ NaN kept - assert eval_dataframe_monkey_keepna.shape == expected_dimensions_keepna - assert eval_dataframe_function_keepna.shape == expected_dimensions_keepna - ### Check output - # ++++ NaN removed - assert np.all( eval_dataframe_monkey_dropna == expected_output_dropna ) - assert np.all( eval_dataframe_function_dropna == expected_output_dropna ) - # ++++ NaN kept - eval_nan_value_mask_monkey = pd.isnull( eval_dataframe_monkey_keepna.insert_metric_here ) - eval_nan_value_mask_function = pd.isnull( eval_dataframe_monkey_keepna.insert_metric_here ) - expected_nan_value_mask = pd.isnull( expected_output_keepna.insert_metric_here ) - assert len( eval_dataframe_monkey_keepna[ eval_nan_value_mask_monkey ] ) == 2 - assert len( eval_dataframe_function_keepna[ eval_nan_value_mask_function ] ) == 2 - assert eval_dataframe_monkey_keepna[ ~ eval_nan_value_mask_monkey ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) - assert eval_dataframe_function_keepna[ ~ eval_nan_value_mask_function ].equals( expected_output_keepna[ ~ expected_nan_value_mask ] ) \ No newline at end of file + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) \ No newline at end of file diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py index 7fd68c80..cf244d86 100644 --- a/echopop/tests/test_stratified_summary.py +++ b/echopop/tests/test_stratified_summary.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from echopop.tests.conftest import dictionary_shape_equal +from echopop.tests.conftest import assert_dictionary_equal from echopop.computation.statistics import stratified_transect_statistic , confidence_interval def test_stratified_transect_statistic( ): @@ -16,8 +16,8 @@ def test_stratified_transect_statistic( ): 'transect_spacing': [ 2.0 , 2.0 , 2.0 , 2.0 ] , 'transect_area': [ 355.201900 , 944.140986 , 469.532550 , 701.473710 ] , 'B_adult': [ 1e2 , 1e3 , 1e5 , 1e4 ] , - 'stratum_inpfc': [ 1 , 1 , 2 , 2 ] - } + 'stratum_inpfc': [ 1 , 1 , 2 , 2 ] , + } , ) ### Create mock data for `strata_summary` @@ -26,7 +26,7 @@ def test_stratified_transect_statistic( ): 'stratum_inpfc': [ 1 , 2 ] , 'num_transects': [ 2 , 2 ] , 'total_transect_area': [ 1299.342886 , 1171.006260 ] , - } + } , ) ### Evaluate for later comparison @@ -71,121 +71,91 @@ def test_stratified_transect_statistic( ): test_transect_replicates , parameter = 'B_adult' ) + # ++++ Bundle! + eval_dictionary = { + 'single': eval_single_stratified_results , + 'single_rep': eval_single_rep_stratified_results , + 'single_sub': eval_single_sub_stratified_results , + 'single_rep_sub': eval_single_sub_rep_stratified_results , + } + ###-------------------------------- ### Expected outcomes ###-------------------------------- + # ---- Expected dtypes + + # ---- Expected output expected_output = { - 'biomass': { - 'mean': { - 'estimate': 1 , - 'confidence_interval': np.array( [ 1 , 1 ] ) , + 'single': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.456292756 , + 'confidence_interval': np.array( [ 54846534.45629276 , 54846534.45629276 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_rep': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.45629276 , + 'confidence_interval': np.array( [ 54846534.45629275 , 54846534.45629278 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_sub': { + 'biomass': { + 'mean': { + 'estimate': 117230560.28860001 , + 'confidence_interval': np.array( [ 1.1723056e08 , 1.1723056e8 ] ) , + } , + 'variance': { + 'estimate': 116601900.95605445 , + 'confidence_interval': np.array( [ 1.16601901e8 , 1.16601901e8 ] ) , + } , + 'CV': { + 'estimate': 0.994637410833848 , + 'confidence_interval': np.array( [ 0.99463741 , 0.99463741 ] ) , + } , } , - 'variance': { - 'estimate': 1 , - 'confidence_interval': np.array( [ 1 , 1 ] ) , + } , + 'single_rep_sub': { + 'biomass': { + 'mean': { + 'estimate': 54463985.68756001 , + 'confidence_interval': np.array( [ -4.69233576e7 , 1.55851329e8 ] ) , + } , + 'variance': { + 'estimate': 53662832.43264915 , + 'confidence_interval': np.array( [ -4.70645276e7 , 1.54390192e8 ] ) , + } , + 'CV': { + 'estimate': 0.9710233886235905 , + 'confidence_interval': np.array( [ 0.90408889 , 1.03795788 ] ) , + } , } , - 'CV': { - 'estimate': 1 , - 'confidence_interval': np.array( [ 1 , 1 ] ) , - } , - } + } , } #---------------------------------- ### Run tests: `stratified_transect_statistic` #---------------------------------- - ### Dictionary structure - # !!! TODO: based on the original data structure -- will need to be updated once the core data structure is also updated - # ---- Check attributes - assert set( eval_single_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) - assert set( eval_single_rep_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) - assert set( eval_single_sub_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) - assert set( eval_single_sub_rep_stratified_results[ 'biomass' ].keys( ) ) == ( set( [ 'mean' , 'variance' , 'CV' ] ) ) - # ---- Check sub-directory keys and structure - assert dictionary_shape_equal( eval_single_stratified_results , expected_output ) - assert dictionary_shape_equal( eval_single_rep_stratified_results , expected_output ) - assert dictionary_shape_equal( eval_single_sub_stratified_results , expected_output ) - assert dictionary_shape_equal( eval_single_sub_rep_stratified_results , expected_output ) - ### Data outputs - # ++++ mean - # ---- > estimate - assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , - 54947653.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , - 54947653.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , - 117230560.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'estimate' ] , - 54463985.0 , - rtol = 1e-2 ) - # ---- > confidence interval - assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , - np.array( [ 54947653.28 , 54947653.28 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , - np.array( [ 54947653.28 , 54947653.28 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , - np.array( [ 1.17e8 , 1.172e8 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'mean' ][ 'confidence_interval' ] , - np.array( [ -4.69e7 , 1.57e8 ] ) , - rtol = 1e-2 ) - # ++++ variance - assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , - 54846534.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , - 54846534.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , - 116601900.0 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'estimate' ] , - 53662832.0 , - rtol = 1e-2 ) - # ---- > confidence interval - assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , - np.array( [ 54846534.0 , 54846534.0 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , - np.array( [ 54846534.0 , 54846534.0 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , - np.array( [ 1.17e8 , 1.17e8] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'variance' ][ 'confidence_interval' ] , - np.array( [ -4.71e7 , 1.53e8 ] ) , - rtol = 1e-2 ) - # ++++ CV - assert np.isclose( eval_single_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , - 0.998 , - rtol = 1e-2 ) - assert np.isclose( eval_single_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , - 0.998 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , - 0.995 , - rtol = 1e-2 ) - assert np.isclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'estimate' ] , - 0.971 , - rtol = 1e-2 ) - # ---- > confidence interval - assert np.allclose( eval_single_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , - np.array( [ 0.998 , 0.998 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , - np.array( [ 0.998 , 0.998 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , - np.array( [ 0.995 , 0.995 ] ) , - rtol = 1e-2 ) - assert np.allclose( eval_single_sub_rep_stratified_results[ 'biomass' ][ 'CV' ][ 'confidence_interval' ] , - np.array( [ 0.904 , 1.038 ] ) , - rtol = 1e-2 ) + assert_dictionary_equal( eval_dictionary , expected_output ) def test_confidence_interval( ): @@ -200,14 +170,17 @@ def test_confidence_interval( ): ###-------------------------------- # ---- Expected dimensions expected_dimensions = tuple( [ 2 , ] ) + # --- Expected dtype # ---- Expected output - expected_output = np.array( [ 0.201 , 5.355 ] ) + expected_output = np.array( [ 0.20104371 , 5.35451185 ] ) #---------------------------------- ### Run tests: `confidence_interval` #---------------------------------- ### Check shape assert eval_ci_values.shape == expected_dimensions + ### Check dtype + assert np.issubdtype( eval_ci_values.dtype , np.floating ) ### Check output - assert np.allclose( eval_ci_values , expected_output , rtol = 1e-2 ) + assert np.allclose( eval_ci_values , expected_output ) diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py index 97458ecd..c86b821e 100644 --- a/echopop/tests/test_transect_functions.py +++ b/echopop/tests/test_transect_functions.py @@ -1,8 +1,7 @@ import pandas as pd import numpy as np import copy -from echopop.tests.conftest import dictionary_equality , dataframe_equality -from echopop.tests.conftest import dataframe_shape_equal , dataframe_dtypes_equality +from echopop.tests.conftest import assert_dataframe_equal from echopop.computation.biology import index_transect_age_sex_proportions from echopop.computation.spatial import correct_transect_intervals , calculate_start_end_coordinates , calculate_transect_distance @@ -178,12 +177,7 @@ def test_index_transect_age_sex_proportions( mock_survey ): #---------------------------------- ### Run tests: `index_transect_age_sex_proportions` #---------------------------------- - ### Check shape - dataframe_shape_equal( eval_nasc_fraction_total_df , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_nasc_fraction_total_df , expected_dtypes ) - ### Dataframe equality - dataframe_equality( eval_nasc_fraction_total_df , expected_output ) + assert_dataframe_equal( eval_nasc_fraction_total_df , expected_dtypes , expected_output ) def test_correct_transect_intervals( ): @@ -239,12 +233,7 @@ def test_correct_transect_intervals( ): #---------------------------------- ### Run tests: `correct_transect_intervals` #---------------------------------- - ### Check shape - dataframe_shape_equal( eval_nasc_interval , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_nasc_interval , expected_dtypes ) - ### Dataframe equality - dataframe_equality( eval_nasc_interval , expected_output ) + assert_dataframe_equal( eval_nasc_interval , expected_dtypes , expected_output ) def test_calculate_start_end_coordinates( ): @@ -261,7 +250,7 @@ def test_calculate_start_end_coordinates( ): 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , 'haul_num': [ 1 , 1 , 2 , 2 ] , 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , - } + } , ) ### Evaluate for later comparison @@ -291,12 +280,7 @@ def test_calculate_start_end_coordinates( ): #---------------------------------- ### Run tests: `calculate_start_end_coordinates` #---------------------------------- - ### Check shape - dataframe_shape_equal( eval_test_nasc_df , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_test_nasc_df , expected_dtypes ) - ### Dataframe equality - dataframe_equality( eval_test_nasc_df , expected_output ) + assert_dataframe_equal( eval_test_nasc_df , expected_dtypes , expected_output ) def test_calculate_transect_distance( ): @@ -346,11 +330,6 @@ def test_calculate_transect_distance( ): ) #---------------------------------- - ### Run tests: `calculate_start_end_coordinates` + ### Run tests: `calculate_transect_distance` #---------------------------------- - ### Check shape - dataframe_shape_equal( eval_test_nasc_df , expected_output ) - ### Check datatypes - dataframe_dtypes_equality( eval_test_nasc_df , expected_dtypes ) - ### Dataframe equality - dataframe_equality( eval_test_nasc_df , expected_output ) \ No newline at end of file + assert_dataframe_equal( eval_test_nasc_df , expected_dtypes , expected_output ) \ No newline at end of file From 4378029c3f0e1bfb2ccb2808a58ce559832ec19e Mon Sep 17 00:00:00 2001 From: Brandyn Lucca Date: Tue, 16 Apr 2024 12:10:45 -0700 Subject: [PATCH 35/35] Further changes to assertion funcs --- echopop/tests/conftest.py | 4 +- echopop/tests/test_stratified_summary.py | 69 +++++++++++++++++++++++- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index 6a9fd9d7..e1c9415d 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -72,7 +72,7 @@ def dataframe_shape( input: Union[ pd.DataFrame , dict ] ): if isinstance( value , pd.DataFrame ) : dataframe_shapes[ key ] = value.shape elif isinstance( value , dict ) : - dataframe_shapes[ key ] = extract_dataframe_shape( value ) + dataframe_shapes[ key ] = dataframe_shape( value ) return dataframe_shapes @@ -152,7 +152,7 @@ def assert_dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , ### Dictionary elif ( isinstance( input , dict ) ) & ( isinstance( reference , dict ) ): - assert extract_dataframe_shape( input ) == extract_dataframe_shape( reference ) + assert dataframe_shape( input ) == dataframe_shape( reference ) # ---- dtypes # ~~~~ !!!! ATTN: this is a nested function within `assert_dataframe_dtypes_equal`! def _assert_dataframe_dtypes_equal( dataframe: pd.DataFrame , diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py index cf244d86..98d649da 100644 --- a/echopop/tests/test_stratified_summary.py +++ b/echopop/tests/test_stratified_summary.py @@ -83,7 +83,72 @@ def test_stratified_transect_statistic( ): ### Expected outcomes ###-------------------------------- # ---- Expected dtypes - + expected_dtypes = { + 'single': { + 'biomass': { + 'mean': { + 'estimate': np.floating , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.456292756 , + 'confidence_interval': np.array( [ 54846534.45629276 , 54846534.45629276 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_rep': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.45629276 , + 'confidence_interval': np.array( [ 54846534.45629275 , 54846534.45629278 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_sub': { + 'biomass': { + 'mean': { + 'estimate': 117230560.28860001 , + 'confidence_interval': np.array( [ 1.1723056e08 , 1.1723056e8 ] ) , + } , + 'variance': { + 'estimate': 116601900.95605445 , + 'confidence_interval': np.array( [ 1.16601901e8 , 1.16601901e8 ] ) , + } , + 'CV': { + 'estimate': 0.994637410833848 , + 'confidence_interval': np.array( [ 0.99463741 , 0.99463741 ] ) , + } , + } , + } , + 'single_rep_sub': { + 'biomass': { + 'mean': { + 'estimate': 54463985.68756001 , + 'confidence_interval': np.array( [ -4.69233576e7 , 1.55851329e8 ] ) , + } , + 'variance': { + 'estimate': 53662832.43264915 , + 'confidence_interval': np.array( [ -4.70645276e7 , 1.54390192e8 ] ) , + } , + 'CV': { + 'estimate': 0.9710233886235905 , + 'confidence_interval': np.array( [ 0.90408889 , 1.03795788 ] ) , + } , + } , + } , + } # ---- Expected output expected_output = { 'single': { @@ -155,7 +220,7 @@ def test_stratified_transect_statistic( ): #---------------------------------- ### Run tests: `stratified_transect_statistic` #---------------------------------- - assert_dictionary_equal( eval_dictionary , expected_output ) + assert_dictionary_equal( eval_dictionary , expected_dtypes , expected_output ) def test_confidence_interval( ):