Skip to content

Commit

Permalink
Merge pull request #221 from brandynlucca/brandynlucca_WIP_transect_a…
Browse files Browse the repository at this point in the history
…nalysis_tests

Add unit tests for `init`, `transect_analysis`, and `stratified_summary` Survey-class methods
  • Loading branch information
brandynlucca authored Apr 16, 2024
2 parents 19e6a98 + 4378029 commit 9548528
Show file tree
Hide file tree
Showing 9 changed files with 2,153 additions and 70 deletions.
2 changes: 1 addition & 1 deletion docs/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@
<!-- Glossary of all symbols, indices, and notations used for mathematical equations and variables contained within the `Survey` class object.
![ text ](images/symbols.jpeg) -->
![ text ](images/symbols.jpeg) -->
12 changes: 7 additions & 5 deletions echopop/computation/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def bin_stats( dataframe: pd.DataFrame ,
return (
dataframe # input dataframe
.bin_variable( bin_values , bin_variable ) # discretize variable into bins )
.groupby( [f'{bin_variable}_bin'] + con_lst ) # group by these variables/contrasts
.groupby( [f'{bin_variable}_bin'] + con_lst ,
observed = False ) # group by these variables/contrasts
.agg( aggregation_dict ) # apply specified functions
.replace( np.nan , 0 ) # replace NaN w/ 0's
.droplevel( level = 0 , axis = 1 ) # drop the column indices
Expand Down Expand Up @@ -132,7 +133,7 @@ def count_variable( dataframe: pd.DataFrame ,
return (
dataframe # input dataframe
.reset_index( drop=True )
.groupby( contrasts )
.groupby( contrasts , observed = False )
.agg({variable: [('count' , fun)]})
.replace(np.nan, 0 )
.droplevel( level = 0 , axis = 1 )
Expand All @@ -158,15 +159,16 @@ def meld( specimen_dataframe: pd.DataFrame ,
specimen_stacked = (
specimen_dataframe
.copy()
.groupby(['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ])
.apply(lambda x: len(x['length']))
.groupby( ['stratum_num' , 'species_id' , 'sex' , 'group' , 'station' , 'length' , 'length_bin' ] ,
observed = False )[ [ 'length' ] ]
.apply(lambda x: len( x ) , include_groups = True )
.reset_index(name='length_count')
)

# Concatenate the data frames and return
return pd.concat( [ specimen_stacked ,
length_dataframe ] ,
join = 'inner' )
join = 'inner' ).reset_index( drop = True )

@patch_method_to_DataFrame( pd.DataFrame )
def stretch( dataframe ,
Expand Down
47 changes: 27 additions & 20 deletions echopop/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ def strata_sex_weight_proportions( self ,
station_length_aggregate = (
station_sex_length
# calculate the within-sample sum and proportions (necessary for the downstream dot product calculation)
.pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( sum ) ,
.pipe( lambda x: x.assign( within_station_n = x.groupby( [ 'sex' , 'station' , 'stratum_num' ] )[ 'count' ].transform( 'sum' ) ,
within_station_p = lambda x: x[ 'count' ] / x[ 'within_station_n' ] ) )
.replace( np.nan, 0 ) # remove erroneous NaN (divide by 0 or invalid values)
.merge( total_n , on = 'stratum_num' ) # merge station_sex_length with total_n
Expand All @@ -688,8 +688,9 @@ def strata_sex_weight_proportions( self ,
.loc[ station_length_aggregate.sex.isin( [ 'male' , 'female' ] ) ] # only parse 'male' and 'female'
# create a pivot that will reorient data to the desired shape
.pivot_table( index = [ 'sex' , 'station' ] ,
columns = [ 'stratum_num' ] ,
values = [ 'overall_station_p' ] )
columns = [ 'stratum_num' ] ,
values = [ 'overall_station_p' ] ,
observed = False )
.groupby( 'sex' )
.sum( )
)
Expand All @@ -701,7 +702,8 @@ def strata_sex_weight_proportions( self ,
# create a pivot that will reorient data to the desired shape
.pivot_table( index = [ 'sex' , 'station' ] ,
columns = 'stratum_num' ,
values = 'overall_station_p' )
values = 'overall_station_p' ,
observed = False )
.groupby( 'station' )
.sum()
)
Expand All @@ -713,7 +715,8 @@ def strata_sex_weight_proportions( self ,
# create a pivot that will reorient data to the desired shape
.pivot_table( index = [ 'sex' , 'station' ] ,
columns = 'stratum_num' ,
values = 'overall_station_p' )
values = 'overall_station_p' ,
observed = False )
.groupby( [ 'sex' , 'station' ] )
.sum()
)
Expand All @@ -728,7 +731,8 @@ def strata_sex_weight_proportions( self ,
.reset_index( name = 'stn_p' ) , on = [ 'stratum_num' , 'station' ] )
.pivot_table( columns = 'stratum_num' ,
index = [ 'station' , 'sex' ] ,
values = [ 'stn_p' , 'sex_stn_p' ] )
values = [ 'stn_p' , 'sex_stn_p' ] ,
observed = False )
)

### Format the length bin proportions so they resemble a similar table/matrix shape as the above metrics
Expand All @@ -737,7 +741,8 @@ def strata_sex_weight_proportions( self ,
station_length_aggregate
.pivot_table( columns = [ 'sex' , 'station' , 'stratum_num' ] ,
index = [ 'length_bin' ] ,
values = [ 'within_station_p' ] )[ 'within_station_p' ]
values = [ 'within_station_p' ] ,
observed = False )[ 'within_station_p' ]
)

### Calculate combined station fraction means
Expand Down Expand Up @@ -837,13 +842,13 @@ def strata_age_binned_weight_proportions( self ,
.count_variable( variable = 'length' ,
contrasts = [ 'stratum_num' , 'age' ] ,
fun = 'size' )
.pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) ,
stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( sum ) ) )
.groupby( [ 'stratum_num' , 'age' ] )
.pipe( lambda x: x.assign( stratum_count_all = x.groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) ,
stratum_count_total = x.loc[ x.age > 1 ].groupby( [ 'stratum_num' ] )[ 'count' ].transform( 'sum' ) ) )
.groupby( [ 'stratum_num' , 'age' ] , observed = False )[ [ 'age' , 'count' , 'stratum_count_all' , 'stratum_count_total' ] ]
.apply( lambda df: pd.Series( {
'count_age_proportion_all': ( df[ 'count' ] / df.stratum_count_all ).sum() ,
'count_age_proportion_adult': ( df.loc[ df.age > 1 ][ 'count' ] / df.stratum_count_total ).sum( )
} ) )
} ) , include_groups = True )
.reset_index( )
)

Expand All @@ -864,17 +869,17 @@ def strata_age_binned_weight_proportions( self ,
.dropna( how = 'any' )
.pipe( lambda df: df.assign( weight_stratum_all = df
.groupby( [ 'stratum_num' ] )[ 'weight' ]
.transform( sum ) ,
.transform( 'sum' ) ,
weight_stratum_adult = df
.loc[ lambda x: x.age > 1 ]
.groupby( [ 'stratum_num' ] )[ 'weight' ]
.transform( sum ) ) )
.transform( 'sum' ) ) )
.groupby( [ 'stratum_num' , 'age' ] )
.apply( lambda df: pd.Series( {
'weight_age_proportion_all': ( df.weight / df.weight_stratum_all ).sum( ) ,
'weight_age_proportion_adult': ( df.weight / df.weight_stratum_adult ).sum( )
} ) )
.reset_index()
} ) , include_groups = False )
.reset_index( )
)

# Calculate adult proportions/contributions (in terms of summed weight) for each stratum
Expand All @@ -888,14 +893,15 @@ def strata_age_binned_weight_proportions( self ,
.count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] ,
variable = 'weight' ,
fun = 'sum' )
.pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ,
weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) )
.pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ,
weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) )
.groupby( [ 'stratum_num' , 'age' , 'sex' ] )
.apply( lambda x: pd.Series( {
'weight_sex_proportion_all': ( x[ 'count' ] / x.weight_total_all ).sum() ,
'weight_sex_proportion_adult': ( x[ 'count' ] / x.weight_total_adult ).sum()
} ) )
} ) , include_groups = False )
.reset_index( )
.fillna( 0 )
)

length_sex_age_weight_proportions = (
Expand All @@ -908,10 +914,11 @@ def strata_age_binned_weight_proportions( self ,
.count_variable( contrasts = [ 'stratum_num' , 'age' , 'length_bin' , 'sex' ] ,
variable = 'weight' ,
fun = 'sum' )
.pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ,
weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( sum ) ) )
.pipe( lambda df: df.assign( weight_total_all = df.groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ,
weight_total_adult = df.loc[ df.age > 1 ].groupby( [ 'stratum_num' , 'sex' ] )[ 'count' ].transform( 'sum' ) ) )
.assign( weight_length_sex_proportion_all = lambda x: x[ 'count' ] / x.weight_total_all ,
weight_length_sex_proportion_adult = lambda x: x[ 'count' ] / x.weight_total_adult )
.replace( np.nan , 0 )
)

### Add these dataframes to the appropriate data attribute
Expand Down
Loading

0 comments on commit 9548528

Please sign in to comment.