diff --git a/docs/glossary.md b/docs/glossary.md index 58004f75..62fb2516 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -33,4 +33,4 @@ \ No newline at end of file +![ text ](images/symbols.jpeg) --> diff --git a/echopop/computation/operations.py b/echopop/computation/operations.py index 0a62d51c..862a6756 100644 --- a/echopop/computation/operations.py +++ b/echopop/computation/operations.py @@ -104,7 +104,8 @@ def bin_stats( dataframe: pd.DataFrame , return ( dataframe # input dataframe .bin_variable( bin_values , bin_variable ) # discretize variable into bins ) - .groupby( [f'{bin_variable}_bin'] + con_lst ) # group by these variables/contrasts + .groupby( [f'{bin_variable}_bin'] + con_lst , + observed = False ) # group by these variables/contrasts .agg( aggregation_dict ) # apply specified functions .replace( np.nan , 0 ) # replace NaN w/ 0's .droplevel( level = 0 , axis = 1 ) # drop the column indices @@ -132,7 +133,7 @@ def count_variable( dataframe: pd.DataFrame , return ( dataframe # input dataframe .reset_index( drop=True ) - .groupby( contrasts ) + .groupby( contrasts , observed = False ) .agg({variable: [('count' , fun)]}) .replace(np.nan, 0 ) .droplevel( level = 0 , axis = 1 ) @@ -158,15 +159,16 @@ def meld( specimen_dataframe: pd.DataFrame , specimen_stacked = ( specimen_dataframe .copy() - .groupby(['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ]) - .apply(lambda x: len(x['length'])) + .groupby( ['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ] , + observed = False )[ [ 'length' ] ] + .apply(lambda x: len( x ) , include_groups = True ) .reset_index(name='length_count') ) # Concatenate the data frames and return return pd.concat( [ specimen_stacked , length_dataframe ] , - join = 'inner' ) + join = 'inner' ).reset_index( drop = True ) @patch_method_to_DataFrame( pd.DataFrame ) def stretch( dataframe , diff --git a/echopop/survey.py b/echopop/survey.py index 812e128b..a62f8e98 100644 --- a/echopop/survey.py +++ b/echopop/survey.py @@ -672,7 +672,7 @@ def strata_sex_weight_proportions( self , station_length_aggregate = ( station_sex_length # calculate the within-sample sum and proportions (necessary for the downstream dot product calculation) - .pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( sum ) , + .pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( 'sum' ) , within_station_p = lambda x: x[ 'count' ] / x[ 'within_station_n' ] ) ) .replace( np.nan, 0 ) # remove erroneous NaN (divide by 0 or invalid values) .merge( total_n , on = 'stratum_num' ) # merge station_sex_length with total_n @@ -688,8 +688,9 @@ def strata_sex_weight_proportions( self , .loc[ station_length_aggregate.sex.isin( [ 'male' , 'female' ] ) ] # only parse 'male' and 'female' # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , - columns = [ 'stratum_num' ] , - values = [ 'overall_station_p' ] ) + columns = [ 'stratum_num' ] , + values = [ 'overall_station_p' ] , + observed = False ) .groupby( 'sex' ) .sum( ) ) @@ -701,7 +702,8 @@ def strata_sex_weight_proportions( self , # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , columns = 'stratum_num' , - values = 'overall_station_p' ) + values = 'overall_station_p' , + observed = False ) .groupby( 'station' ) .sum() ) @@ -713,7 +715,8 @@ def strata_sex_weight_proportions( self , # create a pivot that will reorient data to the desired shape .pivot_table( index = [ 'sex' , 'station' ] , columns = 'stratum_num' , - values = 'overall_station_p' ) + values = 'overall_station_p' , + observed = False ) .groupby( [ 'sex' , 'station' ] ) .sum() ) @@ -728,7 +731,8 @@ def strata_sex_weight_proportions( self , .reset_index( name = 'stn_p' ) , on = [ 'stratum_num' , 'station' ] ) .pivot_table( columns = 'stratum_num' , index = [ 'station' , 'sex' ] , - values = [ 'stn_p' , 'sex_stn_p' ] ) + values = [ 'stn_p' , 'sex_stn_p' ] , + observed = False ) ) ### Format the length bin proportions so they resemble a similar table/matrix shape as the above metrics @@ -737,7 +741,8 @@ def strata_sex_weight_proportions( self , station_length_aggregate .pivot_table( columns = [ 'sex' , 'station' , 'stratum_num' ] , index = [ 'length_bin' ] , - values = [ 'within_station_p' ] )[ 'within_station_p' ] + values = [ 'within_station_p' ] , + observed = False )[ 'within_station_p' ] ) ### Calculate combined station fraction means @@ -837,13 +842,13 @@ def strata_age_binned_weight_proportions( self , .count_variable( variable = 'length' , contrasts = [ 'stratum_num' , 'age' ] , fun = 'size' ) - .pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) , - stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) ) ) - .groupby( [ 'stratum_num' , 'age' ] ) + .pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) , + stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) ) ) + .groupby( [ 'stratum_num' , 'age' ] , observed = False )[ [ 'age' , 'count' , 'stratum_count_all' , 'stratum_count_total' ] ] .apply( lambda df: pd.Series( { 'count_age_proportion_all': ( df[ 'count' ] / df.stratum_count_all ).sum() , 'count_age_proportion_adult': ( df.loc[ df.age > 1 ][ 'count' ] / df.stratum_count_total ).sum( ) - } ) ) + } ) , include_groups = True ) .reset_index( ) ) @@ -864,17 +869,17 @@ def strata_age_binned_weight_proportions( self , .dropna( how = 'any' ) .pipe( lambda df: df.assign( weight_stratum_all = df .groupby( [ 'stratum_num' ] )[ 'weight' ] - .transform( sum ) , + .transform( 'sum' ) , weight_stratum_adult = df .loc[ lambda x: x.age > 1 ] .groupby( [ 'stratum_num' ] )[ 'weight' ] - .transform( sum ) ) ) + .transform( 'sum' ) ) ) .groupby( [ 'stratum_num' , 'age' ] ) .apply( lambda df: pd.Series( { 'weight_age_proportion_all': ( df.weight / df.weight_stratum_all ).sum( ) , 'weight_age_proportion_adult': ( df.weight / df.weight_stratum_adult ).sum( ) - } ) ) - .reset_index() + } ) , include_groups = False ) + .reset_index( ) ) # Calculate adult proportions/contributions (in terms of summed weight) for each stratum @@ -888,14 +893,15 @@ def strata_age_binned_weight_proportions( self , .count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] , variable = 'weight' , fun = 'sum' ) - .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) , - weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) + .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) , + weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) ) .groupby( [ 'stratum_num' , 'age' , 'sex' ] ) .apply( lambda x: pd.Series( { 'weight_sex_proportion_all': ( x[ 'count' ] / x.weight_total_all ).sum() , 'weight_sex_proportion_adult': ( x[ 'count' ] / x.weight_total_adult ).sum() - } ) ) + } ) , include_groups = False ) .reset_index( ) + .fillna( 0 ) ) length_sex_age_weight_proportions = ( @@ -908,10 +914,11 @@ def strata_age_binned_weight_proportions( self , .count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] , variable = 'weight' , fun = 'sum' ) - .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) , - weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) ) + .pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) , + weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) ) .assign( weight_length_sex_proportion_all = lambda x: x[ 'count' ] / x.weight_total_all , weight_length_sex_proportion_adult = lambda x: x[ 'count' ] / x.weight_total_adult ) + .replace( np.nan , 0 ) ) ### Add these dataframes to the appropriate data attribute diff --git a/echopop/tests/conftest.py b/echopop/tests/conftest.py index bfe9ed1f..e1c9415d 100644 --- a/echopop/tests/conftest.py +++ b/echopop/tests/conftest.py @@ -1,65 +1,258 @@ import pytest +from typing import Union +import numpy as np +import pandas as pd from pathlib import Path from echopop import Survey +from _pytest.assertion.util import assertrepr_compare - -# Set up path to test_data folder +### Set up path to the `test_data` folder HERE = Path(__file__).parent.absolute() TEST_DATA_ROOT = HERE.parent / "test_data" -@pytest.fixture(scope="session") -def test_path(): +### Fixtures +# ---- Test root/config/input file paths +@pytest.fixture( scope = "session" ) +def test_path( ) : + return { - "ROOT": TEST_DATA_ROOT, - "CONFIG": TEST_DATA_ROOT / "config_files", - "INPUT": TEST_DATA_ROOT / "input_files", # this doesn't exist yet + "ROOT" : TEST_DATA_ROOT , + "CONFIG" : TEST_DATA_ROOT / "config_files" , + "INPUT" : TEST_DATA_ROOT / "input_files" , } +# ---- Mock `Survey` class object +@pytest.fixture( scope = "session") +def mock_survey( test_path ) -> Survey : -@pytest.fixture(scope="session") -def mock_survey(test_path) -> Survey: return Survey( - init_config_path=Path(test_path["CONFIG"] / "config_init.yml"), - survey_year_config_path=Path(test_path["CONFIG"] / "config_survey.yml"), + init_config_path =Path( test_path[ "CONFIG" ] / "config_init.yml" ) , + survey_year_config_path =Path( test_path[ "CONFIG" ] / "config_survey.yml" ) , ) - -# ============ below from previous version, remove after revamping is complete ============ -@pytest.fixture(scope="session") -def config_base_path() -> Path: +### Hook functions +def pytest_assertrepr_compare( config , op , left , right ): """ - Defines the base directory path for the - configuration files. - - Returns - ------- - pathlib.Path - The base directory path for the configuration files + Hook function that always shows the full `diff` on assertion + failures by increasing the verbosity (`config.option.verbose`) """ - return HERE / "../config_files" + ### Adjust configuration `diff` verbosity + config.option.verbose = 2 -@pytest.fixture(scope="session") -def reports_base_path() -> Path: + return assertrepr_compare( config , op , left , right) + +### Utility functions +# ---- DICTIONARY +# ++++ Shape and structure +def dictionary_shape( dictionary: dict ) : """ - Defines the base directory path were all reports - generated should be saved. - Returns - ------- - pathlib.Path - The base directory path for the reports + A utility test function that extracts the shape of a nested dictionary """ - return HERE / "tests/reports/echopop_python_output" + if isinstance( dictionary , dict ) : + return( { i: dictionary_shape( dictionary[ i ] ) for i in dictionary } ) + else: + return None + +# ---- DATAFRAME +# ++++ Shape +def dataframe_shape( input: Union[ pd.DataFrame , dict ] ): + + ### DataFrame + if isinstance( input , pd.DataFrame ) : -@pytest.fixture(scope="session") -def matlab_output_base_path() -> Path: + return input.shape + + ### Dictionary (bundled dataframes) + elif isinstance( input , dict ) : + dataframe_shapes = { } + + for key , value in input.items( ): + if isinstance( value , pd.DataFrame ) : + dataframe_shapes[ key ] = value.shape + elif isinstance( value , dict ) : + dataframe_shapes[ key ] = dataframe_shape( value ) + + return dataframe_shapes + +### Assertion functions +# ---- DICTIONARY +# ---- Shape and dimensions +def assert_dictionary_structure_equal( dictionary1: dict , + dictionary2: dict ) : """ - Defines the base directory path for the - Matlab output files. - Returns - ------- - pathlib.Path - The base directory path for the Matlab output files + Tests equality between the shapes of two nested dictionaries """ - return Path("") + + result = dictionary_shape( dictionary1 ) == dictionary_shape( dictionary2 ) + + if result : + assert result + else: + if set( dictionary_shape( dictionary1 ) ) <= set( dictionary_shape( dictionary2 ) ) : + tracked_true = [ ] + + for j in dictionary2.keys( ) : + test = set( dictionary1[ j ].keys( ) ) <= ( dictionary2[ j ].keys( ) ) + tracked_true.append( test ) + + if np.all( tracked_true ) : + assert True + else : + assert result + else : + assert result +# ---- dtypes +def assert_dictionary_dtypes_equal( dictionary , + reference_dictionary ) : + + for key in reference_dictionary : + if isinstance( reference_dictionary[ key ] , dict ) : + assert isinstance( dictionary[ key ] , dict ) , \ + f"Key '{ key }' has different types in the dictionaries." + assert_dictionary_dtypes_equal( dictionary[ key ] , + reference_dictionary[ key ] ) + elif isinstance( dictionary[ key ] , type ) : + assert np.issubdtype( type( dictionary[ key ] ) , + reference_dictionary[ key ] ) , \ + f"Datatype for key '{ key }' is not a subdtype of the reference datatype." + elif isinstance( reference_dictionary[ key ] , np.ndarray ) : + assert isinstance( dictionary[ key ] , np.ndarray ) , \ + f"Datatype for key '{ key }' is not the same as in reference dictionary." + assert np.issubdtype( dictionary[ key ].dtype , + reference_dictionary[ key ].dtype ) , \ + f"Dtype for key '{ key }' is not a subdtype of the reference dtype." +# ---- Values +def assert_dictionary_values_equal( dictionary , + reference_dictionary ) : + for key in dictionary : + if isinstance( dictionary[ key ] , dict ) : + assert isinstance( reference_dictionary[ key ] , dict ) , \ + f"Key '{ key }' has different types in the dictionaries." + assert_dictionary_values_equal( dictionary[ key ] , + reference_dictionary[ key ] ) + elif isinstance( dictionary[ key ] , np.ndarray ) : + assert np.allclose( dictionary[ key ] , + reference_dictionary[ key ] ) , \ + f"Arrays for key '{key}' are not close." + else: + assert np.isclose( dictionary[ key ] , + reference_dictionary[ key ] ) , \ + f"Values for key '{key}' are not close." + +# ---- DATAFRAME +# ---- Shape and dimensions +def assert_dataframe_shape_equal( input: Union[ pd.DataFrame , dict ] , + reference: Union[ tuple , dict ] ): + + ### DataFrame + if ( isinstance( input , pd.DataFrame ) ) & ( isinstance( reference , tuple ) ) : + assert input.shape == reference + + ### Dictionary + elif ( isinstance( input , dict ) ) & ( isinstance( reference , dict ) ): + assert dataframe_shape( input ) == dataframe_shape( reference ) +# ---- dtypes +# ~~~~ !!!! ATTN: this is a nested function within `assert_dataframe_dtypes_equal`! +def _assert_dataframe_dtypes_equal( dataframe: pd.DataFrame , + reference_dictionary: dict ): + + ### Separate evaluation for categorical-type + # ---- Parse expected categorical variables + categorical_columns = [ k for k , v in reference_dictionary.items( ) if isinstance( v , pd.CategoricalDtype ) ] + + # ---- Assert that all categorical columns in the reference dictionary match the categorical + # ----- columns in the tested dataframe + assert np.all( dataframe.select_dtypes( include = [ 'category' ] ).columns.isin( categorical_columns ) ) + + # ---- Remove categorical columns from the dataframe + dataframe = dataframe.copy( ).drop( categorical_columns , axis = 1 ) + + ### Loop through columns to assert that dtypes from the tested dataframe + ### match those expected in a reference dictionary + for column , dtype in dataframe.dtypes.items( ): + assert np.issubdtype( dtype , reference_dictionary.get( column , object ) ) , \ + f"Data type mismatch for column '{ column }'" +# ~~~~ dtypes --> compatible with direct DataFrame or bundled DataFrames within a dictionary +def assert_dataframe_dtypes_equal( input: Union[ pd.DataFrame , dict ] , + reference: dict ): + + ### DataFrame + if isinstance( input , pd.DataFrame ) : + _assert_dataframe_dtypes_equal( input , reference ) + + ### Dictionary + elif isinstance( input , dict ) : + for category , data in reference.items( ) : + + # ---- Single Dictionary layer + if isinstance( input[ category ] , pd.DataFrame ): + _assert_dataframe_dtypes_equal( input[ category ] , + reference[ category ] ) + + # ---- Nested Dictionary layers + else: + for df_name , _ in data.items( ): + _assert_dataframe_dtypes_equal( input[ category ][ df_name ] , reference[ category ][ df_name ] ) +# ---- Values +# ~~~~ !!!! ATTN: this is a nested function within `assert_dataframe_equal`! +def _aassert_dataframe_values_equal( dataframe1: pd.DataFrame , + dataframe2: pd.DataFrame ): + + ### Evaluate equality between numerical values + assert np.allclose( dataframe1.select_dtypes( include = [ 'number' ] ) , + dataframe2.select_dtypes( include = [ 'number' ] ) , + equal_nan = True ) + + ### Evaluate equality between non-numerical values + # ---- Mask out "NaN" + dataframe1_nan_mask = dataframe1.isna( ).any( axis = 1 ) + dataframe2_nan_mask = dataframe2.isna( ).any( axis = 1 ) + # ---- Evaluate equality + dataframe1_nan_mask == dataframe2_nan_mask + # ---- Evaluate equality among "real" values + dataframe1_masked = dataframe1[ ~ dataframe1_nan_mask ] + dataframe2_masked = dataframe2[ ~ dataframe2_nan_mask ] + assert np.all( dataframe1_masked.select_dtypes( exclude = [ 'number' ] ) == dataframe2_masked.select_dtypes( exclude = [ 'number' ] ) ) +# ~~~~ Values --> compatible with direct DataFrame or bundled DataFrames within a dictionary +def assert_dataframe_values_equal( input: Union[ pd.DataFrame , dict ] , + reference: Union[ pd.DataFrame , dict ]): + + ### Direct DataFrame + if ( isinstance( input , pd.DataFrame ) & ( isinstance( reference , pd.DataFrame ) ) ) : + _aassert_dataframe_values_equal( input , reference ) + + ### Iterate through nested DataFrames within each dictionary + else : + for key , expected_df in reference.items( ) : + + if isinstance( input[ key ] , pd.DataFrame ) : + _aassert_dataframe_values_equal( input[ key ] , expected_df ) + + else : + for sub_key , _ in reference[ key ].items( ): + _aassert_dataframe_values_equal( input[ key ][ sub_key ] , + expected_df[ sub_key ] ) +# ++++ DICTIONARY + DATAFRAME BUNDLING +# ---> Dictionary +def assert_dictionary_equal( input: dict , + reference_dtypes: dict , + reference_values: dict , ) : + + ### Shape + assert_dictionary_structure_equal( input , reference_values ) + ### dtypes + assert_dictionary_dtypes_equal( input , reference_dtypes ) + ### Values + assert_dictionary_values_equal( input , reference_values ) +# ---> DataFrame +def assert_dataframe_equal( input: Union[ pd.DataFrame , dict ] , + reference_dtypes: dict , + reference_values: Union[ pd.DataFrame , dict ] , ) : + ### Shape + assert_dataframe_shape_equal( input , reference_values ) + ### dtypes + assert_dataframe_dtypes_equal( input , reference_dtypes ) + ### Values + assert_dataframe_values_equal( input , reference_values ) \ No newline at end of file diff --git a/echopop/tests/test_data_loader.py b/echopop/tests/test_data_loader.py index a9e9a70b..9fe62504 100644 --- a/echopop/tests/test_data_loader.py +++ b/echopop/tests/test_data_loader.py @@ -1,7 +1,11 @@ import yaml +import numpy as np from pathlib import Path +import copy from echopop import Survey -from echopop.utils.data_file_validation import load_configuration , validate_data_columns +from echopop.core import LAYER_NAME_MAP +from echopop.utils.data_file_validation import load_configuration +from echopop.tests.conftest import assert_dictionary_structure_equal def test_load_configuration(test_path, tmp_path): init_params = yaml.safe_load( @@ -32,6 +36,95 @@ def test_load_configuration(test_path, tmp_path): ) -def test_init(mock_survey): +def test_init( mock_survey ): objS = mock_survey - assert isinstance(objS, Survey) \ No newline at end of file + assert isinstance( objS , Survey ) + + +def test_load_survey_data( mock_survey , + test_path ): + + ### Pull in configuration values + mock_survey.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + + ### Initialize data attributes + mock_survey.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + mock_survey.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + mock_survey.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + mock_survey.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + + ### Load in data using the `load_survey_data` method + mock_survey.load_survey_data( ) + + # ----------------- + ### Evaluate results + # ----------------- + ### Dictionary structure + # !!! TODO: based on the original data structure -- will need to be updated once the core data structure is also updated + # ---- Check attributes + assert set( [ 'acoustics' , 'biology' , 'spatial' , 'statistics' ] ) <= set( dir( mock_survey ) ) + # ---- Check sub-directory keys + assert_dictionary_structure_equal( mock_survey.acoustics , LAYER_NAME_MAP['NASC']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.biology , LAYER_NAME_MAP['biological']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.spatial , LAYER_NAME_MAP['stratification']['data_tree'] ) + assert_dictionary_structure_equal( mock_survey.statistics , LAYER_NAME_MAP['kriging']['data_tree'] ) + ### Data structure + # ++++ acoustics + assert mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ].shape == tuple( [ 1 , 10 ] ) + # ++++ biology + assert mock_survey.biology[ 'catch_df' ].shape == tuple( [ 2 , 7 ] ) + assert mock_survey.biology[ 'distributions' ][ 'age_bins_arr' ].shape == tuple( [ 0 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length_bins_arr' ].shape == tuple( [ 0 , ] ) + assert mock_survey.biology[ 'haul_to_transect_df' ].shape == tuple( [ 2 , 5 ] ) + assert mock_survey.biology[ 'length_df' ].shape == tuple( [ 2 , 10 ] ) + assert mock_survey.biology[ 'specimen_df' ].shape == tuple( [ 2 , 11 ] ) + # ++++ spatial + assert mock_survey.spatial[ 'strata_df' ].shape == tuple( [ 1 , 3 ] ) + assert mock_survey.spatial[ 'geo_strata_df' ].shape == tuple( [ 1 , 2 ] ) + assert mock_survey.spatial[ 'inpfc_strata_df' ].shape == tuple( [ 1 , 2 ] ) + # ++++ statistics + assert mock_survey.statistics[ 'kriging' ][ 'mesh_df' ].shape == tuple( [ 19843 , 3 ] ) + assert mock_survey.statistics[ 'kriging' ][ 'isobath_200m_df' ].shape == tuple( [ 147 , 2 ] ) + assert len( mock_survey.statistics[ 'kriging' ][ 'model_config' ] ) == 39 + assert len( mock_survey.statistics[ 'variogram' ][ 'model_config' ] ) == 13 + ### Test merged outputs + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'catch_df' ].columns ) + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'length_df' ].columns ) + assert set( mock_survey.biology[ 'haul_to_transect_df' ].columns ) <= set( mock_survey.biology[ 'specimen_df' ].columns ) + ### Test biological data (sex definition) + assert np.all( ( mock_survey.biology[ 'length_df' ].sex == 'female' ) & ( mock_survey.biology[ 'length_df' ].group == 'sexed' ) ) + assert np.all( ( mock_survey.biology[ 'specimen_df' ].sex == [ 'male' , 'female' ] ) & ( mock_survey.biology[ 'specimen_df' ].group == 'sexed' ) ) + +def test_biometric_distributions( mock_survey , + test_path ): + + ### Pull in configuration values + mock_survey.config = load_configuration( Path( test_path[ 'CONFIG' ] / 'config_init.yml' ) , + Path( test_path[ 'CONFIG' ] / 'config_survey.yml' ) ) + + ### Initialize data attributes + mock_survey.acoustics = copy.deepcopy( LAYER_NAME_MAP['NASC']['data_tree'] ) + mock_survey.biology = copy.deepcopy( LAYER_NAME_MAP['biological']['data_tree'] ) + mock_survey.spatial = copy.deepcopy( LAYER_NAME_MAP['stratification']['data_tree'] ) + mock_survey.statistics = copy.deepcopy( LAYER_NAME_MAP['kriging']['data_tree'] ) + + ### Load in data using the `load_survey_data` method + mock_survey.load_survey_data( ) + + ### Generate length and age distributions + mock_survey.biometric_distributions( ) + + # ----------------- + ### Evaluate results + # ----------------- + ### Data structure + assert mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ].shape == tuple( [ 23 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ].shape == tuple( [ 22 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ].shape == tuple( [ 41 , ] ) + assert mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ].shape == tuple( [ 40 , ] ) + ### Data equality + assert np.all( mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_interval_arr' ] == np.linspace( 0.5 , 22.5 , 23 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'age' ][ 'age_bins_arr' ] == np.linspace( 1 , 22 , 22 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] == np.linspace( 1 , 81 , 41 ) ) + assert np.all( mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] == np.linspace( 2 , 80 , 40 ) ) \ No newline at end of file diff --git a/echopop/tests/test_data_transect_analysis.py b/echopop/tests/test_data_transect_analysis.py new file mode 100644 index 00000000..2be90b21 --- /dev/null +++ b/echopop/tests/test_data_transect_analysis.py @@ -0,0 +1,621 @@ +import pandas as pd +import numpy as np +from echopop.tests.conftest import assert_dataframe_equal + +def test_fit_binned_length_weight_relationship( mock_survey ): + + ### Initialize mock_survey for `length_weight` + mock_survey.statistics[ 'length_weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 9.0 ] , + 'weight': [ 4.0 , 9.0 , 16.0 , 25.0 , 36.0 , 49.0 , 64.0 , 81.0 ] , + } + ) + + ### Re-parameterize `length_bins` with dummy data + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_bins_arr' ] = ( + [ 2.0 , 5.0 , 8.0 , 11.0 ] + ) + + ### Re-parameterize `length_interval` with dummy data + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = ( + [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] + ) + + ### Evaluate object for later comparison + mock_survey.fit_binned_length_weight_relationship( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected data types + expected_dtypes = { + 'regression_parameters': { + 'sex': object , + 'rate': np.floating , + 'initial': np.floating , + } , + 'length_weight_df': { + 'length_bin': pd.CategoricalDtype( ) , + 'sex': object , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , + 'rate': np.floating , + 'initial': np.floating , + 'weight_fitted': np.floating , + 'weight_modeled': np.floating , + } , + } + # ---- Expected output + expected_output = { + 'regression_parameters': pd.DataFrame( + { + 'sex': [ 'all' , 'female' , 'male' ] , + 'rate': [ 2.0 , 2.0 , 2.0 ] , + 'initial': [ 4.710277e-16 , -2.220446e-16 , 1.110223e-15 ] , + } , + ) , + 'length_weight_df': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 1 , 4 , 7 , 10 ] , 3 ) , + np.array( [ 0.5 , 3.5 , 6.5 , 9.5 , 12.5 ] ) ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'mean_length': [ 2.5 , 3.0 , 2.0 , 5.0 , 5.0 , 5.0 , + 8.0 , 8.0 , 8.0 , 0.0 , 0.0 , 0.0 ] , + 'n_length': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'mean_weight': [ 6.50 , 9.00 , 4.00 , 25.6666667 , 25.00 , 26.00 , + 64.6666667 , 65.00 , 64.00 , 0.00 , 0.00 , 0.00 ] , + 'n_weight': [ 2 , 1 , 1 , 3 , 1 , 2 , + 3 , 2 , 1 , 0 , 0 , 0 ] , + 'rate': np.repeat( 2.0 , 12 ) , + 'initial': np.tile( [ 4.710277e-16 , -2.220446e-16 , 1.110223e-15 ] , 4 ) , + 'weight_fitted': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , + 'weight_modeled': [ 4.0 , 4.0 , 4.0 , 25.0 , 25.0 , 25.0 , + 64.0 , 64.0 , 64.0 , 121.0 , 121.0 , 121.0 ] , + } , + ) , + } + #---------------------------------- + ### Run tests: `fit_binned_length_weight_relationship` + #---------------------------------- + eval_dictionary = mock_survey.statistics[ 'length_weight' ] + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) + +def test_strata_sex_weight_proportions( mock_survey ): + + ### Initialize mock_survey for `weight` + mock_survey.biology[ 'weight' ] = { } + + ### Initialize mock_survey for `length_weight` + mock_survey.statistics[ 'length_weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'haul_num': np.tile( [ 1 , 2 ] , 4 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , + 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , + 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] + } + ) + + ### Re-parameterize `length_df` with dummy data + mock_survey.biology[ 'length_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12 , 12 , 19 , 19 , 12 , 12 , 19 , 19 ] , + 'length_count': [ 5 , 10 , 15 , 20 , 20 , 15 , 10 , 5 ] + } + ) + + ### Re-parameterize `fitted_weight` with dummy data + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + + ### Re-parameterize `length_df` with dummy data + mock_survey.biology[ 'length_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12 , 12 , 19 , 19 , 12 , 12 , 19 , 19 ] , + 'length_count': [ 5 , 10 , 15 , 20 , 20 , 15 , 10 , 5 ] + } + ) + + ### Re-parameterize `fitted_weight` with dummy data + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Re-parameterize `length_bins` with dummy data + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + + ### Evaluate object for later comparison + mock_survey.strata_sex_weight_proportions( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected data types + expected_dtypes = { + 'stratum_num': np.integer , + 'proportion_female': np.floating , + 'proportion_male': np.floating , + 'proportion_station_1': np.floating , + 'proportion_station_2': np.floating , + 'average_weight_female': np.floating , + 'average_weight_male': np.floating , + 'average_weight_total': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'stratum_num': np.array( [ 0 , 1 ] ).astype( int ) , + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } , + ) + + #---------------------------------- + ### Run tests: `strata_sex_weight_proportions` + #---------------------------------- + eval_dataframe = mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] + assert_dataframe_equal( eval_dataframe , expected_dtypes , expected_output ) + +def test_strata_age_binned_weight_proportions( mock_survey ): + + ### Initialize mock_survey for `weight` + mock_survey.biology[ 'weight' ] = { } + + ### Re-parameterize `specimen_df` with dummy data + mock_survey.biology[ 'specimen_df' ] = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'group': np.repeat( 'sexed' , 8 ) , + 'haul_num': [ 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 ] , + 'species_id': np.repeat( [ 8675309 ] , 8 ) , + 'length': [ 12.0 , 12.0 , 19.0 , 19.0 , 12.0 , 12.0 , 19.0 , 19.0 ] , + 'weight': [ 2.0 , 3.0 , 3.0 , 2.0 , 2.0 , 3.0 , 2.0 , 3.0 ] , + 'age': [ 1 , 1 , 2 , 2 , 1 , 1 , 2 , 2 ] , + } , + ) + + ### Re-parameterize `length_bins` with dummy data + mock_survey.biology[ 'distributions' ][ 'length' ][ 'length_interval_arr' ] = np.linspace( 9 , 21 , 3 ) + + ### Evaluate object for later comparison + mock_survey.strata_age_binned_weight_proportions( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'age_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + } , + 'age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'weight_age_proportion_all': np.floating , + 'weight_age_proportion_adult': np.floating , + } , + 'sex_age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'sex': object , + 'weight_sex_proportion_all': np.floating , + 'weight_sex_proportion_adult': np.floating , + } , + 'length_sex_age_weight_proportions_df': { + 'stratum_num': np.integer , + 'age': np.integer , + 'length_bin': pd.CategoricalDtype( ) , + 'sex': object , + 'count': np.floating , + 'weight_total_all': np.floating , + 'weight_total_adult': np.floating , + 'weight_length_sex_proportion_all': np.floating , + 'weight_length_sex_proportion_adult': np.floating , + } , + } + # ---- Expected output + expected_output = { + 'age_proportions_df': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) , + 'age_weight_proportions_df': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) , + 'sex_age_weight_proportions_df': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) , + 'length_sex_age_weight_proportions_df': pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 1 , 1 , 1 , + 2 , 2 , 2 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'length_bin': pd.cut( np.tile( [ 12.0 , 12.0 , 12.0 , 18.0 , 18.0 , 18.0 ] , 4 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 8 ) , + 'count': [ 5.0 , 3.0 , 2.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 5.0 , 2.0 , 3.0 , + 5.0 , 3.0 , 2.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 5.0 , 3.0 , 2.0 ] , + 'weight_total_all': [ 10.0 , 5.0 , 5.0 , 10.0 , 5.0 , 5.0 , + 10.0 , 5.0 , 5.0 , 10.0 , 5.0 , 5.0 , + 10.0 , 6.0 , 4.0 , 10.0 , 6.0 , 4.0 , + 10.0 , 6.0 , 4.0 , 10.0 , 6.0 , 4.0 ] , + 'weight_total_adult': [ 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 5.0 , 2.0 , 3.0 , 5.0 , 2.0 , 3.0 , + 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 5.0 , 3.0 , 2.0 , 5.0 , 3.0 , 2.0 ] , + 'weight_length_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 0.5 , 0.5 , 0.5 ] , + 'weight_length_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , + 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) , + } , ) , + } + + #---------------------------------- + ### Run tests: `strata_age_binned_weight_proportions` + #---------------------------------- + eval_dictionary = mock_survey.biology[ 'weight' ][ 'proportions' ] + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) + +def test_nasc_to_biomass_conversion( mock_survey ): + + ### Initialize various attributes + mock_survey.acoustics[ 'sigma_bs' ] = { } + mock_survey.statistics[ 'length_weight' ] = { } + mock_survey.biology[ 'weight' ] = { } + mock_survey.biology[ 'population' ] = { } + + ### Create mock data for `age_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ] = { } + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } , ) + + ### Create mock data for `age_weight_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] , + } , ) + + ### Create mock data for `sex_age_weight_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) , + } , ) + + ### Create mock data for 'length_weight_df' + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + } , + ) + + ### Create mock data for `weight_strata_df` + mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } , + ) + + ### Create mock data for `strata_mean` (sigma_bs) + mock_survey.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'species_id': np.repeat( 8675309 , 2 ) , + 'sigma_bs_mean': 1.630277e-8 , + } , + ) + + ### Create mock data for `nasc_df` + mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , + ) + + ### Create mock data for `strata_df` + mock_survey.spatial[ 'strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'haul_num': [ 1 , 2 ] , + 'fraction_hake': [ 1.000 , 0.500 ] , + } , + ) + + ### Evaluate object for later comparison + mock_survey.nasc_to_biomass_conversion( species_id = 8675309 ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'areal_density': { + 'number_density_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'rho_a': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'rho_a_adult': np.floating , + } , + 'biomass_density_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'B_a': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'B_a_adult': np.floating , + } , + } , + 'abundance': { + 'abundance_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + + 'N': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'N_adult': np.floating , + } , + } , + 'biomass': { + 'biomass_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'B': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'B_adult': np.floating , + } , + 'biomass_age_df': { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'age': np.integer , + 'sex': object , + 'age_proportion': np.floating , + 'B_age': np.floating , + } , + } , + } + # ----- Expected output + expected_output = { + 'areal_density': { + 'number_density_df': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'rho_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 4.881224e7 , 4.881224e7 , 1.988645e7 , 1.988645e7 , 2.892579e7 , 2.892579e7 , 0.0 , 0.0 , + 2.440612e8 , 2.440612e8 , 1.446290e8 , 1.446290e8 , 9.943224e7 , 9.943224e7 , 0.0 , 0.0 , + 2.440612e9 , 2.440612e9 , 1.446290e9 , 1.446290e9 , 9.943224e8 , 9.943224e8 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'rho_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 4.881224e7 , 0.0 , 1.988645e7 , 0.0 , 2.892579e7 , 0.0 , 0.0 , 0.0 , + 2.440612e8 , 0.0 , 1.446290e8 , 0.0 , 9.943224e7 , 0.0 , 0.0 , 0.0 , + 2.440612e9 , 0.0 , 1.446290e9 , 0.0 , 9.943224e8 , 0.0 , 0.0] ] ) , + } ) , + 'biomass_density_df': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'B_a': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 1.496818e8 , 1.496818e8 , 1.320557e8 , 1.320557e8 , 1.365040e8 , 1.365040e8 , 0.0 , 0.0 , + 6.354180e8 , 6.354180e8 , 9.111540e8 , 9.111540e8 , 2.692518e8 , 2.692518e8 , 0.0 , 0.0 , + 6.354180e9 , 6.354180e9 , 9.111540e9 , 9.111540e9 , 2.692518e9 , 2.692518e9 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'B_a_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 1.496818e8 , 0.0 , 1.320557e8 , 0.0 , 1.365040e8 , 0.0 , 0.0 , 0.0 , + 6.354180e8 , 0.0 , 9.111540e8 , 0.0 , 2.692518e8 , 0.0 , 0.0 , 0.0 , + 6.354180e9 , 0.0 , 9.111540e9 , 0.0 , 2.692518e9 , 0.0 , 0.0] ] ) , + } ) , + } , + 'abundance': { + 'abundance_df': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'NASC_all_ages': np.concatenate( [ np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 16 ) , + np.repeat( 1e3 , 8 ) ] ) , + 'NASC_no_age1': np.concatenate( [ np.repeat( 0 , 8 ) , + np.repeat( 1e1 , 8 ) , + np.repeat( 1e2 , 8 ) , + np.repeat( 1e3 , 8 ) ] ) , + + 'N': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 4.881224e8 , 4.881224e8 , 1.988645e8 , 1.988645e8 , 2.892579e8 , 2.892579e8 , 0.0 , 0.0 , + 2.440612e9 , 2.440612e9 , 1.44629e9 , 1.44629e9 , 9.943224e8 , 9.943224e8 , 0.0 , 0.0 , + 2.416206e10 , 2.416206e10 , 1.431827e10 , 1.431827e10 , 9.843792e9 , 9.843792e9 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'N_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 4.881224e8 , 0.0 , 1.988645e8 , 0.0 , 2.892579e8, 0.0 , 0.0 , 0.0 , + 2.440612e9 , 0.0 , 1.44629e9 , 0.0 , 9.943224e8 , 0.0 , 0.0 , 0.0 , + 2.416206e10 , 0.0 , 1.431827e10 , 0.0 , 9.843792e9 , 0.0 , 0.0] ] ) , + } ) , + } , + 'biomass': { + 'biomass_df': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 8 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 8 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 8 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 16 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , + 'female' , 'female' , 'unsexed' , 'unsexed' ] , 4 ) , + 'B': np.concatenate( [ np.repeat( 0.0 , 8 ) , + [ 1.496818e9 , 1.496818e9 , 1.320557e9 , 1.320557e9 , 1.365040e9 , 1.365040e9 , 0.0 , 0.0 , + 6.354180e9 , 6.354180e9 , 9.111540e9 , 9.111540e9 , 2.692518e9 , 2.692518e9 , 0.0 , 0.0 , + 6.290638e10 , 6.290638e10 , 9.020425e10 , 9.020425e10 , 2.665593e10 , 2.665593e10 , 0.0 , 0.0 ] ] ) , + 'age': np.tile( [ 1 , 2 ] , 16 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 32 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 16 ) , + 'B_adult': np.concatenate( [ np.repeat( 0.0 , 9 ) , + [ 1.496818e9 , 0.0 , 1.320557e9 , 0.0 , 1.365040e9 , 0.0 , 0.0 , 0.0 , + 6.354180e9 , 0.0 , 9.111540e9 , 0.0 , 2.692518e9 , 0.0 , 0.0 , 0.0 , + 6.290638e10 , 0.0 , 9.020425e10 , 0.0 , 2.665593e10 , 0.0 , 0.0] ] ) , + } ) , + 'biomass_age_df': pd.DataFrame( { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 6 ).astype( np.int64 ) , + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 6 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 6 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 12 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 12 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'all' , 'male' , 'male' , 'female' , 'female' ] , 4 ) , + 'age_proportion': np.tile( [ 0.0 , 1.0 ] , 12 ) , + 'B_age': np.concatenate( [ np.repeat( 0.0 , 7 ) , + [ 1.496818e9 , 0.000 , 1.320557e9 , 0.000 , 1.365040e9 , 0.000 , + 6.354180e9 , 0.000 , 9.111540e9 , 0.000 , 2.692518e9 , 0.000 , + 6.290638e10 , 0.000 , 9.020425e10 , 0.000 , 2.665593e10 ] ] ) , + } ) , + } , + } + + #---------------------------------- + ### Run tests: `test_nasc_to_biomass_conversion` + #---------------------------------- + eval_dictionary = mock_survey.biology[ 'population' ] + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) + + \ No newline at end of file diff --git a/echopop/tests/test_operations.py b/echopop/tests/test_operations.py new file mode 100644 index 00000000..e5da960b --- /dev/null +++ b/echopop/tests/test_operations.py @@ -0,0 +1,581 @@ +import numpy as np +import pandas as pd +from echopop.computation.operations import bin_variable , bin_stats , count_variable , meld , stretch , group_merge +from echopop.tests.conftest import assert_dataframe_equal + +def test_bin_variable( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' ] , + 'length': [ 2.0 , 4.0 , 8.0 ] , + } , + ) + + ### Mock bin_values + test_bin_values = np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.bin_variable( test_bin_values , 'length' ) + # ---- Normal function + eval_dataframe_function = bin_variable( test_dataframe , test_bin_values , 'length' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'animal': object , + 'length': np.floating , + 'length_bin': pd.CategoricalDtype( ) , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' ] , + 'length': [ 2.0 , 4.0 , 8.0 ] , + 'length_bin': pd.cut( [ 2.0 , 4.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + } , + ) + + #---------------------------------- + ### Run tests: `bin_variable` + #---------------------------------- + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) + +def test_bin_stats( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' ] , + 'length': [ 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 ] , + 'weight': [ 100.0 , 200.0 , 300.0 , 300.0 , 200.0 , 100.0 ] , + 'location': [ 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'lost city of z' , 'lost city of z' , 'lost city of z' ] , + } , + ) + + ### Mock bin_values + test_bin_values = np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) + + ### Evaluate for later comparison + # ++++ No contrast | length + weight + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_lwnc = test_dataframe.bin_stats( 'length' , test_bin_values ) + # ---- Normal function + eval_dataframe_function_lwnc = bin_stats( test_dataframe , 'length' , test_bin_values ) + # ++++ No contrast | length + eval_dataframe_monkey_lnc = test_dataframe.bin_stats( 'length' , test_bin_values , variables = 'length' ) + # ---- Normal function + eval_dataframe_function_lnc = bin_stats( test_dataframe , 'length' , test_bin_values , variables = 'length' ) + # ++++ No contrast | length ~ function: just mean + eval_dataframe_monkey_lncm = test_dataframe.bin_stats( 'length' , test_bin_values , variables = 'length' , functions = [ 'mean' ] ) + # ---- Normal function + eval_dataframe_function_lncm = bin_stats( test_dataframe , 'length' , test_bin_values , variables = 'length' , functions = [ 'mean' ] ) + # ++++ No contrast | length ~ function: just mean + eval_dataframe_monkey_lwc = test_dataframe.bin_stats( 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) + # ---- Normal function + eval_dataframe_function_lwc = bin_stats( test_dataframe , 'length' , test_bin_values , contrasts = [ 'location' ] , variables = 'length' ) + # ++++ Bundle together for evaluation + eval_dictionary = { + 'monkey_lwnc': eval_dataframe_monkey_lwnc , + 'function_lwnc': eval_dataframe_function_lwnc , + 'monkey_lnc': eval_dataframe_monkey_lnc , + 'function_lnc': eval_dataframe_function_lnc , + 'monkey_lncm': eval_dataframe_monkey_lncm , + 'function_lncm': eval_dataframe_function_lncm , + 'monkey_lwc': eval_dataframe_monkey_lwc , + 'function_lwc': eval_dataframe_function_lwc , + } + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'monkey_lwnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , + } , + 'function_lwnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + 'mean_weight': np.floating , + 'n_weight': np.integer , + } , + 'monkey_lnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'function_lnc': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'monkey_lncm': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'function_lncm': { + 'length_bin': pd.CategoricalDtype( ) , + 'mean_length': np.floating , + } , + 'monkey_lwc': { + 'length_bin': pd.CategoricalDtype( ) , + 'location': object , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + 'function_lwc': { + 'length_bin': pd.CategoricalDtype( ) , + 'location': object , + 'mean_length': np.floating , + 'n_length': np.integer , + } , + } + # ---- Expected outputs + expected_output = { + 'monkey_lwnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , + 'n_weight': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'function_lwnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + 'mean_weight': [ 200.0 , 200.0 , 150.0 , 300.0 ] , + 'n_weight': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'monkey_lnc': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'function_lnc': pd.DataFrame( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + 'n_length': [ 2 , 1 , 2 , 1 ] , + } , + ) , + 'monkey_lncm': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + } , + ) , + 'function_lncm': pd.DataFrame ( + { + 'length_bin': pd.cut( [ 2.0 , 4.0 , 6.0 , 8.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'mean_length': [ 2.5 , 4.0 , 6.5 , 8.0 ] , + } , + ) , + 'monkey_lwc': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , + 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , + 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + } , + ) , + 'function_lwc': pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 2.0 , 4.0 , 6.0 , 8.0 ] , 2 ) , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'location': np.tile( [ 'lost city of z' , 'timbuktu' ] , 4 ) , + 'mean_length': [ 3.0 , 2.0 , 0.0 , 4.0 , 6.5 , 0.0 , 0.0 , 8.0 ] , + 'n_length': [ 1 , 1 , 0 , 1 , 2 , 0 , 0 , 1 ] , + } , + ) , + } + #---------------------------------- + ### Run tests: `bin_stats` + #---------------------------------- + assert_dataframe_equal( eval_dictionary, expected_dtypes , expected_output ) + +def test_count_variable( ): + + ### Mock dataframe + test_dataframe = pd.DataFrame( + { + 'animal': [ 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' , + 'pretty pink pony' , 'big blue bass' , 'silly silver silkworm' , + 'gnarly green grouse' , 'roudy red rabbit' , 'magenta mad manatee' ] , + 'length': [ 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 , + 2.0 , 4.0 , 8.0 , 3.0 , 6.0 , 7.0 ] , + 'location': [ 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'timbuktu' , 'timbuktu' , 'timbuktu' , + 'lost city of z' , 'lost city of z' , 'lost city of z' , + 'lost city of z' , 'lost city of z' , 'lost city of z' ] , + 'length_count': [ 10 , 20 , 30 , 40 , 50 , 60 , + 60 , 50 , 40 , 30 , 20 , 10 ] , + } , + ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.count_variable( [ 'location' , 'animal' ] , 'length_count' , 'sum' ) + # ---- Normal function + eval_dataframe_function = count_variable( test_dataframe , [ 'location' , 'animal' ] , 'length_count' , 'sum' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'location': object , + 'animal': object , + 'count': np.integer , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'location': np.repeat( [ 'lost city of z' , 'timbuktu' ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'count': [ 50 , 30 , 10 , 60 , 20 , 40 , + 20 , 40 , 60 , 10 , 50 , 30 ] , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) + +def test_meld( ): + + ### Mock specimen dataframe + test_specimen_dataframe = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 12 ) , + 'species_id': np.tile( [ 'big blue bass' , 'pretty pink pony' , 'silly silver silkworm' ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 6 ) , + 'group': np.repeat( 'sexed' , 12 ) , + 'station': np.repeat( 'clouds' , 12 ) , + 'length': [ 5.0 , 4.0 , 6.0 , 5.0 , 5.0 , 4.0 , + 5.0 , 5.0 , 7.0 , 4.0 , 5.0 , 6.0 ] , + 'length_bin': pd.cut( [ 5.0 , 4.0 , 6.0 , 5.0 , 5.0 , 4.0 , + 5.0 , 5.0 , 7.0 , 4.0 , 5.0 , 6.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + } , + ) + + ### Mock length dataframe + test_length_dataframe = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 6 ) , + 'species_id': np.tile( [ 'big blue bass' , 'pretty pink pony' , 'silly silver silkworm' ] , 2 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 3 ) , + 'group': np.repeat( 'sexed' , 6 ) , + 'station': np.repeat( 'waves' , 6 ) , + 'length': [ 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + 'length_bin': pd.cut( [ 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'length_count': [ 10 , 20 , 30 , 30 , 20 , 10 ] , + } , + ) + + ### Evaluate for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_specimen_dataframe.meld( test_length_dataframe ) + # ---- Normal function + eval_dataframe_function = meld( test_specimen_dataframe , test_length_dataframe ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'stratum_num': np.integer , + 'species_id': object , + 'sex': object , + 'group': object , + 'station': object , + 'length': np.floating , + 'length_bin': pd.CategoricalDtype( ) , + 'length_count': np.integer , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'stratum_num': np.repeat( 1 , 16 ) , + 'species_id': np.concatenate( [ np.repeat( 'big blue bass' , 3 ) , + np.repeat( 'pretty pink pony' , 3 ) , + np.repeat( 'silly silver silkworm' , 4 ) , + np.tile( [ 'big blue bass' , 'pretty pink pony' , + 'silly silver silkworm' ] , 2 ) ] ) , + 'sex': [ 'female' , 'female' , 'male' , 'female' , 'female' , 'male' , 'female' , 'female' , + 'male' , 'male' , 'male' , 'female' , 'male' , 'female' , 'male' , 'female' ] , + 'group': np.repeat( 'sexed' , 16 ) , + 'station': np.concatenate( [ np.repeat( 'clouds' , 10 ) , + np.repeat( 'waves' , 6 ) ] ) , + 'length': [ 4.0 , 5.0 , 5.0 , 4.0 , 5.0 , 5.0 , 4.0 , 6.0 , + 6.0 , 7.0 , 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + 'length_bin': pd.cut( [ 4.0 , 5.0 , 5.0 , 4.0 , 5.0 , 5.0 , 4.0 , 6.0 , + 6.0 , 7.0 , 2.0 , 4.0 , 3.0 , 2.0 , 4.0 , 3.0 ] , + np.array( [ 1.0 , 3.0 , 5.0 , 7.0 , 9.0 ] ) ) , + 'length_count': [ 1 , 1 , 2 , 1 , 1 , 2 , 1 , 1 , 1 , 1 , + 10 , 20 , 30 , 30 , 20 , 10 ] , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) + +def test_stretch( ): + + ### Create mock dataframe + test_dataframe = pd.DataFrame( + { + 'stratum_num': [ 1 , 1 , 2 , 2 ] , + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'latitude': [ 0.0 , 1.0 , 3.0 , 4.0 ] , + 'longitude': [ -1.0 , 0.0 , 1.0 , 2.0 ] , + 'load_a_male': [ 5.0 , 4.0 , 2.0 , 1.0 ] , + 'load_a_female': [ 10.0 , 3.0 , 5.0 , 6.0 ] , + } , + ) + + ### Eval for later comparison + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey = test_dataframe.stretch( variable = 'load_a' ) + # ---- Normal function + eval_dataframe_function = stretch( test_dataframe , variable = 'load_a' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'transect_num': np.integer , + 'latitude': np.floating , + 'longitude': np.floating , + 'stratum_num': np.integer , + 'sex': object , + 'load_a': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 2 ) , + 'latitude': np.repeat( [ 0.0 , 1.0 , 3.0 , 4.0 ] , 2 ) , + 'longitude': np.repeat( [ -1.0 , 0.0 , 1.0 , 2.0 ] , 2 ) , + 'stratum_num': np.repeat( [ 1 , 2 ] , 4 ) , + 'sex': np.tile( [ 'male' , 'female' ] , 4 ) , + 'load_a': [ 5.0 , 10.0 , 4.0 , 3.0 , + 2.0 , 5.0 , 1.0 , 6.0 ] , + } , + ) + + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + assert_dataframe_equal( eval_dataframe_monkey , expected_dtypes , expected_output ) + assert_dataframe_equal( eval_dataframe_function , expected_dtypes , expected_output ) + +def test_group_merge( ): + + ### Create mock dataframe 1 + test_dataframe_a = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + + } , + ) + + ### Create mock dataframe 2 + test_dataframe_b = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + + } , + ) + + ### Create mock dataframe 3 + test_dataframe_c = pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 2 ) , + 'group': np.tile( [ 'sleepy' , 'alert' ] , 2 ) , + 'categorical_metric': np.tile( [ 'zippity' , 'doo' ] , 2 ) , + } , + ) + + ### Evaluate for later comparison + # ++++ Drop NA + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_dropna = test_dataframe_a.group_merge( [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = True ) + # ---- Normal function + eval_dataframe_function_dropna = group_merge( test_dataframe_a , + [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = True ) + # ++++ Don't drop NA + # ---- Monkey patch method (TEMPORARY) + eval_dataframe_monkey_keepna = test_dataframe_a.group_merge( [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = False ) + # ---- Normal function + eval_dataframe_function_keepna = group_merge( test_dataframe_a , + [ test_dataframe_b , test_dataframe_c ] , + inner_on = 'group' , + outer_on = [ 'stratum_num' ] , + drop_na = False ) + # ++++ Bundle! + eval_dictionary = { + 'monkey_dropna': eval_dataframe_monkey_dropna , + 'function_dropna': eval_dataframe_function_dropna , + 'monkey_keepna': eval_dataframe_monkey_keepna , + 'function_keepna': eval_dataframe_function_keepna , + } + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'monkey_dropna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + 'function_dropna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + 'monkey_keepna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + 'function_keepna': { + 'stratum_num': np.integer , + 'animal': object , + 'insert_metric_here': np.floating , + 'group': object , + 'new_metric_here': np.floating , + 'categorical_metric': object , + } , + } + # ---- Expected output + expected_output = { + 'monkey_dropna': pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + } , + ) , + 'function_dropna': pd.DataFrame( + { + 'stratum_num': np.repeat( [ 1 , 2 ] , 6 ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' ] , 2 ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 6 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 6 ) , + } , + ) , + 'monkey_keepna': pd.DataFrame( + { + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , np.nan ] , 2 ).astype( object ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + np.nan , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + np.nan ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + np.nan , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , + np.nan ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , + } , + ) , + 'function_keepna': pd.DataFrame( + { + 'stratum_num': np.concatenate( [ np.repeat( [ 1 , 2 ] , 7 ) ] ) , + 'animal': np.tile( [ 'big blue bass' , 'gnarly green grouse' , 'magenta mad manatee' , + 'pretty pink pony' , 'roudy red rabbit' , 'silly silver silkworm' , np.nan ] , 2 ).astype( object ) , + 'insert_metric_here': [ 1.00 , 1.00 , 1.00 , 0.75 , 0.75 , 0.75 , + np.nan , + 0.50 , 0.50 , 0.50 , 0.75 , 0.75 , 1.00 , + np.nan ] , + 'group': np.repeat( [ 'sleepy' , 'alert' ] , 7 ) , + 'new_metric_here': [ 0.1 , 0.1 , 0.2 , 0.2 , 0.3 , 0.3 , + np.nan , + 0.5 , 0.2 , 0.2 , 0.4 , 0.4 , 0.5 , + np.nan ] , + 'categorical_metric': np.repeat( [ 'zippity' , 'doo' ] , 7 ) , + } , + ) , + } + #---------------------------------- + ### Run tests: `count_variable` + #---------------------------------- + assert_dataframe_equal( eval_dictionary , expected_dtypes , expected_output ) \ No newline at end of file diff --git a/echopop/tests/test_stratified_summary.py b/echopop/tests/test_stratified_summary.py new file mode 100644 index 00000000..98d649da --- /dev/null +++ b/echopop/tests/test_stratified_summary.py @@ -0,0 +1,251 @@ +import numpy as np +import pandas as pd +from echopop.tests.conftest import assert_dictionary_equal +from echopop.computation.statistics import stratified_transect_statistic , confidence_interval + +def test_stratified_transect_statistic( ): + + ### Create mock data for `transect_summary` + test_transect_summary = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'minimum_longitude': [ -5.0 , -3.0 , -1.0 , 1.0 ] , + 'maxmum_longitude': [ -2.0 , 5.0 , 3.0 , 7.0 ] , + 'center_latitude': [ 10.0 , 11.0 , 12.5 , 13.5 ] , + 'transect_distance': [ 177.600950 , 472.070493 , 234.766275 , 350.736855 ] , + 'transect_spacing': [ 2.0 , 2.0 , 2.0 , 2.0 ] , + 'transect_area': [ 355.201900 , 944.140986 , 469.532550 , 701.473710 ] , + 'B_adult': [ 1e2 , 1e3 , 1e5 , 1e4 ] , + 'stratum_inpfc': [ 1 , 1 , 2 , 2 ] , + } , + ) + + ### Create mock data for `strata_summary` + test_strata_summary = pd.DataFrame( + { + 'stratum_inpfc': [ 1 , 2 ] , + 'num_transects': [ 2 , 2 ] , + 'total_transect_area': [ 1299.342886 , 1171.006260 ] , + } , + ) + + ### Evaluate for later comparison + # ---- Replicates == 1 + # ---- Transect sample proportion == 100% + test_transect_sample = 1.0 + test_transect_replicates = 1 + eval_single_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + # ---- Replicates == 10 + # ---- Transect sample proportion == 100% + test_transect_sample = 1.0 + test_transect_replicates = 10 + eval_single_rep_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + # ---- Replicates == 1 + # ---- Transect sample proportion == 50% + test_transect_sample = 0.5 + test_transect_replicates = 1 + np.random.seed( 10 ) + eval_single_sub_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + # ---- Replicates == 1 + # ---- Transect sample proportion == 50% + test_transect_sample = 0.5 + test_transect_replicates = 10 + np.random.seed( 1800 ) + eval_single_sub_rep_stratified_results = stratified_transect_statistic( test_transect_summary , + test_strata_summary , + test_transect_sample , + test_transect_replicates , + parameter = 'B_adult' ) + + # ++++ Bundle! + eval_dictionary = { + 'single': eval_single_stratified_results , + 'single_rep': eval_single_rep_stratified_results , + 'single_sub': eval_single_sub_stratified_results , + 'single_rep_sub': eval_single_sub_rep_stratified_results , + } + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'single': { + 'biomass': { + 'mean': { + 'estimate': np.floating , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.456292756 , + 'confidence_interval': np.array( [ 54846534.45629276 , 54846534.45629276 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_rep': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.45629276 , + 'confidence_interval': np.array( [ 54846534.45629275 , 54846534.45629278 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_sub': { + 'biomass': { + 'mean': { + 'estimate': 117230560.28860001 , + 'confidence_interval': np.array( [ 1.1723056e08 , 1.1723056e8 ] ) , + } , + 'variance': { + 'estimate': 116601900.95605445 , + 'confidence_interval': np.array( [ 1.16601901e8 , 1.16601901e8 ] ) , + } , + 'CV': { + 'estimate': 0.994637410833848 , + 'confidence_interval': np.array( [ 0.99463741 , 0.99463741 ] ) , + } , + } , + } , + 'single_rep_sub': { + 'biomass': { + 'mean': { + 'estimate': 54463985.68756001 , + 'confidence_interval': np.array( [ -4.69233576e7 , 1.55851329e8 ] ) , + } , + 'variance': { + 'estimate': 53662832.43264915 , + 'confidence_interval': np.array( [ -4.70645276e7 , 1.54390192e8 ] ) , + } , + 'CV': { + 'estimate': 0.9710233886235905 , + 'confidence_interval': np.array( [ 0.90408889 , 1.03795788 ] ) , + } , + } , + } , + } + # ---- Expected output + expected_output = { + 'single': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.456292756 , + 'confidence_interval': np.array( [ 54846534.45629276 , 54846534.45629276 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_rep': { + 'biomass': { + 'mean': { + 'estimate': 54947653.27600001 , + 'confidence_interval': np.array( [ 54947653.27600001 , 54947653.27600001 ] ) , + } , + 'variance': { + 'estimate': 54846534.45629276 , + 'confidence_interval': np.array( [ 54846534.45629275 , 54846534.45629278 ] ) , + } , + 'CV': { + 'estimate': 0.9981597245072626 , + 'confidence_interval': np.array( [ 0.99815972 , 0.99815972 ] ) , + } , + } , + } , + 'single_sub': { + 'biomass': { + 'mean': { + 'estimate': 117230560.28860001 , + 'confidence_interval': np.array( [ 1.1723056e08 , 1.1723056e8 ] ) , + } , + 'variance': { + 'estimate': 116601900.95605445 , + 'confidence_interval': np.array( [ 1.16601901e8 , 1.16601901e8 ] ) , + } , + 'CV': { + 'estimate': 0.994637410833848 , + 'confidence_interval': np.array( [ 0.99463741 , 0.99463741 ] ) , + } , + } , + } , + 'single_rep_sub': { + 'biomass': { + 'mean': { + 'estimate': 54463985.68756001 , + 'confidence_interval': np.array( [ -4.69233576e7 , 1.55851329e8 ] ) , + } , + 'variance': { + 'estimate': 53662832.43264915 , + 'confidence_interval': np.array( [ -4.70645276e7 , 1.54390192e8 ] ) , + } , + 'CV': { + 'estimate': 0.9710233886235905 , + 'confidence_interval': np.array( [ 0.90408889 , 1.03795788 ] ) , + } , + } , + } , + } + + #---------------------------------- + ### Run tests: `stratified_transect_statistic` + #---------------------------------- + assert_dictionary_equal( eval_dictionary , expected_dtypes , expected_output ) + +def test_confidence_interval( ): + + ### Mock values + test_values = [ 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 4.0 , 3.0 , 2.0 , 1.0 ] + + ### Evaluate for comparison later + eval_ci_values = confidence_interval( test_values ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dimensions + expected_dimensions = tuple( [ 2 , ] ) + # --- Expected dtype + # ---- Expected output + expected_output = np.array( [ 0.20104371 , 5.35451185 ] ) + + #---------------------------------- + ### Run tests: `confidence_interval` + #---------------------------------- + ### Check shape + assert eval_ci_values.shape == expected_dimensions + ### Check dtype + assert np.issubdtype( eval_ci_values.dtype , np.floating ) + ### Check output + assert np.allclose( eval_ci_values , expected_output ) + diff --git a/echopop/tests/test_transect_functions.py b/echopop/tests/test_transect_functions.py new file mode 100644 index 00000000..c86b821e --- /dev/null +++ b/echopop/tests/test_transect_functions.py @@ -0,0 +1,335 @@ +import pandas as pd +import numpy as np +import copy +from echopop.tests.conftest import assert_dataframe_equal +from echopop.computation.biology import index_transect_age_sex_proportions +from echopop.computation.spatial import correct_transect_intervals , calculate_start_end_coordinates , calculate_transect_distance + +def test_index_transect_age_sex_proportions( mock_survey ): + + ### Initialize various attributes + mock_survey.acoustics[ 'sigma_bs' ] = { } + mock_survey.statistics[ 'length_weight' ] = { } + mock_survey.biology[ 'weight' ] = { } + mock_survey.biology[ 'population' ] = { } + + ### Create mock data for `age_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ] = { } + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 4 ) , + 'count_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `age_weight_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 2 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 2 ] , 2 ).astype( np.int64 ) , + 'weight_age_proportion_all': [ 0.50 , 0.50 , 0.50 , 0.50 ] , + 'weight_age_proportion_adult': [ 0.0 , 1.0 , 0.0 , 1.0 ] + } ) + + ### Create mock data for `sex_age_weight_proportions_df` + mock_survey.biology[ 'weight' ][ 'proportions' ][ 'sex_age_weight_proportions_df' ] = pd.DataFrame( { + 'stratum_num': np.repeat( [ 0 , 1 ] , 6 ).astype( np.int64 ) , + 'age': np.tile( [ 1 , 1 , 1 , 2 , 2 , 2 ] , 2 ).astype( np.int64 ) , + 'sex': np.tile( [ 'all' , 'female' , 'male' ] , 4 ) , + 'weight_sex_proportion_all': [ 0.5 , 0.6 , 0.4 , 0.5 , 0.4 , 0.6 , + 0.5 , 0.5 , 0.5 , 0.5 , 0.5 , 0.5 ] , + 'weight_sex_proportion_adult': np.tile( [ 0.0 , 0.0 , 0.0 , 1.0 , 1.0 , 1.0 ] , 2 ) + } ) + + ### Create mock data for 'length_weight_df' + mock_survey.statistics[ 'length_weight' ][ 'length_weight_df' ] = pd.DataFrame( + { + 'length_bin': pd.cut( np.repeat( [ 12 , 18 ] , 3 ) , + np.linspace( 9 , 21 , 3 ) ) , + 'sex': np.repeat( [ 'all' , 'female' , 'male' ] , 2 ) , + 'n_length': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'mean_weight': [ 2.5 , 3.5 , 1.5 , 7.5 , 6.5 , 8.5 ] , + 'n_weight': [ 4 , 2 , 2 , 4 , 2 , 2 ] , + 'rate': [ 2.63 , 1.36 , 3.90 , 2.63 , 1.36 , 3.90 ] , + 'initial': [ -2.49 , -0.93 , -4.06 , -2.49 , -0.93 , -4.06 ] , + 'weight_fitted': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] , + 'weight_modeled': [ 2.21 , 3.46 , 1.41 , 6.43 , 6.02 , 6.87 ] + } + ) + + ### Create mock data for `weight_strata_df` + mock_survey.biology[ 'weight' ][ 'weight_strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'proportion_female': [ 0.592593 , 0.407407 ] , + 'proportion_male': [ 0.407407 , 0.592593 ] , + 'proportion_station_1': [ 0.925926 , 0.925926 ] , + 'proportion_station_2': [ 0.074074 , 0.074074 ] , + 'average_weight_female': [ 4.719110 , 2.707892 ] , + 'average_weight_male': [ 6.640487 , 6.299942 ] , + 'average_weight_total': [ 3.066481 , 2.603519 ] , + } + ) + + ### Create mock data for `strata_mean` (sigma_bs) + mock_survey.acoustics[ 'sigma_bs' ][ 'strata_mean' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'species_id': np.repeat( 8675309 , 2 ) , + 'sigma_bs_mean': 1.630277e-8 + } + ) + + ### Create mock data for `nasc_df` + mock_survey.acoustics[ 'nasc' ][ 'nasc_df' ] = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] + } + ) + + ### Create mock data for `strata_df` + mock_survey.spatial[ 'strata_df' ] = pd.DataFrame( + { + 'stratum_num': [ 0 , 1 ] , + 'haul_num': [ 1 , 2 ] , + 'fraction_hake': [ 1.000 , 0.500 ] + } + ) + + ### Bundle the mocked data into their respective inputs for `index_transect_age_sex_proportions` + test_acoustics_dict = copy.deepcopy( mock_survey.acoustics ) + test_biology_dict = copy.deepcopy( mock_survey.biology ) + test_info_strata = mock_survey.spatial[ 'strata_df' ].copy( ) + + ### Evaluate object for later comparison + eval_nasc_fraction_total_df = index_transect_age_sex_proportions( test_acoustics_dict , + test_biology_dict , + test_info_strata ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'latitude': np.floating , + 'longitude': np.floating , + 'transect_num': np.integer , + 'stratum_num': np.integer , + 'haul_num': np.integer , + 'interval': np.floating , + 'interval_area': np.floating , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + 'fraction_hake': np.floating , + 'species_id': np.integer , + 'sigma_bs_mean': np.floating , + 'proportion_female': np.floating , + 'proportion_male': np.floating , + 'proportion_station_1': np.floating , + 'proportion_station_2': np.floating , + 'average_weight_female': np.floating , + 'average_weight_male': np.floating , + 'average_weight_total': np.floating , + 'age': np.integer , + 'count_age_proportion_all': np.floating , + 'count_age_proportion_adult': np.floating , + 'weight_age_proportion_all': np.floating , + 'weight_age_proportion_adult': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'latitude': np.repeat( [ 20.0 , 30.0 , 40.0 , 50.0 ] , 2 ) , + 'longitude': np.repeat( [ -180.0 , -120.0 , -170.0 , -110.0 ] , 2 ) , + 'transect_num': np.repeat( [ 1 , 2 , 3 , 4 ] , 2 ).astype( np.int64 ) , + 'stratum_num': np.repeat( [ 0 , 1 ] , 4 ).astype( np.int64 ) , + 'haul_num': np.repeat( [ 1 , 2 ] , 4 ).astype( np.int64 ) , + 'interval': np.repeat( [ 10.0 , 10.0 , 10.0 , 9.9 ] , 2 ) , + 'interval_area': np.repeat( [ 10.0 , 10.0 , 10.0 , 9.9 ] , 2 ) , + 'NASC_all_ages': np.repeat( [ 1e1 , 1e2 , 1e2 , 1e3 ] , 2 ) , + 'NASC_no_age1': np.repeat( [ 0.0 , 1e1 , 1e2 , 1e3 ] , 2 ) , + 'fraction_hake': np.repeat( [ 1.0 , 0.5 ] , 4 ) , + 'species_id': np.repeat( 8675309 , 8 ).astype( np.int64 ) , + 'sigma_bs_mean': np.repeat( 1.630277e-8 , 8 ) , + 'proportion_female': np.repeat( [ 0.592593 , 0.407407 ] , 4 ) , + 'proportion_male': np.repeat( [ 0.407407 , 0.592593 ] , 4 ) , + 'proportion_station_1': np.repeat( 0.925926 , 8 ) , + 'proportion_station_2': np.repeat( 0.074074 , 8 ) , + 'average_weight_female': np.repeat( [ 4.719110 , 2.707892 ] , 4 ) , + 'average_weight_male': np.repeat( [ 6.640487 , 6.299942 ] , 4 ) , + 'average_weight_total': np.repeat( [ 3.066481 , 2.603519 ] , 4 ) , + 'age': np.tile( [ 1 , 2 ] , 4 ).astype( np.int64 ) , + 'count_age_proportion_all': np.repeat( 0.5 , 8 ) , + 'count_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) , + 'weight_age_proportion_all': np.repeat( 0.5 , 8 ) , + 'weight_age_proportion_adult': np.tile( [ 0.0 , 1.0 ] , 4 ) , + } , + ) + + #---------------------------------- + ### Run tests: `index_transect_age_sex_proportions` + #---------------------------------- + assert_dataframe_equal( eval_nasc_fraction_total_df , expected_dtypes , expected_output ) + +def test_correct_transect_intervals( ): + + ### Create mock data for `nasc_df` + test_nasc_dataframe = pd.DataFrame( + { + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , + ) + + ### Evaluate object for later comparison + eval_nasc_interval = correct_transect_intervals( test_nasc_dataframe ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'latitude': np.floating , + 'longitude': np.floating , + 'transect_num': np.integer , + 'stratum_num': np.integer , + 'haul_num': np.integer , + 'interval': np.floating , + 'interval_area': np.floating , + 'NASC_all_ages': np.floating , + 'NASC_no_age1': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_num': [ 1 , 2 , 3 , 4 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'interval': [ 10.0 , 10.0 , 10.0 , 9.9 ] , + 'interval_area': [ 10.0 , 10.0 , 10.0 , 9.9 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + } , + ) + + #---------------------------------- + ### Run tests: `correct_transect_intervals` + #---------------------------------- + assert_dataframe_equal( eval_nasc_interval , expected_dtypes , expected_output ) + +def test_calculate_start_end_coordinates( ): + + ### Create mock data for `nasc_df` + test_nasc_dataframe = pd.DataFrame( + { + 'transect_num': [ 1 , 1 , 2 , 2 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 1.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , + ) + + ### Evaluate for later comparison + eval_test_nasc_df = calculate_start_end_coordinates( test_nasc_dataframe , + 'transect_num' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes = { + 'transect_num': np.integer , + 'minimum_longitude': np.floating , + 'maximum_longitude': np.floating , + 'center_latitude': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': [ 1 , 2 ] , + 'minimum_longitude': [ -180.0 , -170.0 ] , + 'maximum_longitude': [ -120.0 , -110.0 ] , + 'center_latitude': [ 25.0 , 45.0 ] , + } , + ) + + #---------------------------------- + ### Run tests: `calculate_start_end_coordinates` + #---------------------------------- + assert_dataframe_equal( eval_test_nasc_df , expected_dtypes , expected_output ) + +def test_calculate_transect_distance( ): + + ### Create mock data for `nasc_df` + test_nasc_dataframe = pd.DataFrame( + { + 'transect_num': [ 1 , 1 , 2 , 2 ] , + 'stratum_num': [ 0 , 0 , 1 , 1 ] , + 'vessel_log_start': [ 0.0 , 10.1 , 20.1 , 30.1 ] , + 'vessel_log_end': [ 10.0 , 20.0 , 30.0 , 40.0 ] , + 'latitude': [ 20.0 , 30.0 , 40.0 , 50.0 ] , + 'longitude': [ -180.0 , -120.0 , -170.0 , -110.0 ] , + 'transect_spacing': np.repeat( 2.0 , 4 ) , + 'NASC_no_age1': [ 0.0 , 1e1 , 1e2 , 1e3 ] , + 'haul_num': [ 1 , 1 , 2 , 2 ] , + 'NASC_all_ages': [ 1e1 , 1e2 , 1e2 , 1e3 ] , + } , + ) + ### Evaluate for later comparison + eval_test_nasc_df = calculate_transect_distance( test_nasc_dataframe , + 'transect_num' ) + + ###-------------------------------- + ### Expected outcomes + ###-------------------------------- + # ---- Expected dtypes + expected_dtypes= { + 'transect_num': np.integer , + 'minimum_longitude': np.floating , + 'maximum_longitude': np.floating , + 'center_latitude': np.floating , + 'transect_distance': np.floating , + 'transect_spacing': np.floating , + 'transect_area': np.floating , + } + # ---- Expected output + expected_output = pd.DataFrame( + { + 'transect_num': [ 1 , 2 ] , + 'minimum_longitude': [ -180.0 , -170.0 ] , + 'maximum_longitude': [ -120.0 , -110.0 ] , + 'center_latitude': [ 25.0 , 45.0 ] , + 'transect_distance': [ 3241.273891 , 2493.203304 ] , + 'transect_spacing': [ 2.0 , 2.0 ] , + 'transect_area': [ 6482.547781 , 4986.406609 ] , + } , + ) + + #---------------------------------- + ### Run tests: `calculate_transect_distance` + #---------------------------------- + assert_dataframe_equal( eval_test_nasc_df , expected_dtypes , expected_output ) \ No newline at end of file