Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare series + fix correlations #620

Merged
merged 10 commits into from
Oct 18, 2024
35 changes: 21 additions & 14 deletions pyleoclim/core/ensembleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',

The significance level (0.05 by default)

method : str, {'ttest','built-in','ar1sim','phaseran'}
method for significance testing. Default is 'ttest'
method : str, {'ttest','built-in','ar1sim','phaseran','CN'}
method for significance testing. Default is 'ttest' to lower computational cost, but this is not always the best choice

statistic : str
The name of the statistic used to measure the association, to be chosen from a subset of
Expand Down Expand Up @@ -576,12 +576,13 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',

See also
--------

pyleoclim.utils.correlation.corr_sig : Correlation function

pyleoclim.core.series.Series.correlation : pairwise correlations

pyleoclim.utils.correlation.fdr : False Discovery Rate

pyleoclim.core.correns.CorrEns : The correlation ensemble object

pyleoclim.core.surrogateseries.SurrogateSeries : surrogate series

Examples
--------
Expand Down Expand Up @@ -612,7 +613,7 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
print(corr_res)

The `print` function tabulates the output, and conveys the p-value according
to the correlation test applied ("isospec", by default). To plot the result:
to the correlation test applied ("ttest", by default). To plot the result:

.. jupyter-execute::

Expand All @@ -630,16 +631,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, method = 'ttest',
if hasattr(target, 'series_list'):
nEns = np.size(target.series_list)
if idx < nEns:
value2 = target.series_list[idx].value
time2 = target.series_list[idx].time
ts2 = target.series_list[idx].copy()
#value2 = target.series_list[idx].value
#time2 = target.series_list[idx].time

else:
value2 = target.series_list[idx-nEns].value
time2 = target.series_list[idx-nEns].time
#value2 = target.series_list[idx-nEns].value
#time2 = target.series_list[idx-nEns].time
ts2 = target.series_list[idx-nEns].copy()
else:
value2 = target.value
time2 = target.time

ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
ts2 = target.copy()
#value2 = target.value
#time2 = target.time

#ts2 = target.series_list[idx].copy()
#ts2.time = time2; ts2.value = value2
#ts2 = Series(time=time2, value=value2, verbose=idx==0, auto_time_params=False)
corr_res = ts1.correlation(ts2, timespan=timespan, method=method,number=number,
statistic=statistic,
settings=settings, mute_pbar=True,
Expand Down
48 changes: 34 additions & 14 deletions pyleoclim/core/multipleseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import numpy as np
from copy import deepcopy

from matplotlib.ticker import (MultipleLocator, AutoMinorLocator, FormatStrFormatter)
from matplotlib.ticker import (AutoMinorLocator, FormatStrFormatter)
import matplotlib.transforms as transforms
import matplotlib as mpl
import matplotlib.pyplot as plt
Expand Down Expand Up @@ -735,6 +735,8 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
fdr_kwargs=None, common_time_kwargs=None, mute_pbar=False, seed=None):
''' Calculate the correlation between a MultipleSeries and a target Series

This function recursively applies Series.correlation() to members of the MultipleSeries object.

The significance of the correlation is assessed using one of the following methods:

1. 'ttest': T-test adjusted for effective sample size, see [1]
Expand All @@ -748,8 +750,9 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
The T-test is a parametric test, hence computationally cheap, but can only be performed in ideal circumstances.
The others are non-parametric, but their computational requirements scale with the number of simulations.

The choise of significance test and associated number of Monte-Carlo simulations are passed through the `settings` parameter.

The False Discovery Rate method is applied to the assessment of significance when plotting the result.

If the computation fails, a diagnostic message returns the index and label of the incriminated series.

Parameters
----------
Expand Down Expand Up @@ -809,12 +812,23 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
-------
corr : CorrEns

the result object
the result object, containing the following:
- statistic r (array of real numbers)
- p-values pval (array of real numbers)
- signif (array of booleans)
- alpha (significance level)


See also
--------

pyleoclim.core.series.Series.correlation : Series-level correlation

pyleoclim.utils.correlation.corr_sig : Correlation function
pyleoclim.utils.correlation.association : SciPy measures of association between variables

pyleoclim.series.surrogates : parametric and non-parametric surrogates of any Series object

pyleoclim.multipleseries.common_time : Aligning time axes

pyleoclim.utils.correlation.fdr : FDR function

Expand All @@ -825,7 +839,6 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson

.. jupyter-execute::

import pyleoclim as pyleo
from pyleoclim.utils.tsmodel import colored_noise
import numpy as np

Expand Down Expand Up @@ -866,15 +879,22 @@ def correlation(self, target=None, timespan=None, alpha=0.05, statistic='pearson
if target is None:
target = self.series_list[0]

print("Looping over "+ str(len(self.series_list)) +" Series in collection")
print(f"Looping over {len(self.series_list)} Series in collection")
for idx, ts in tqdm(enumerate(self.series_list), total=len(self.series_list), disable=mute_pbar):
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
p_list.append(corr_res.p)

try:
corr_res = ts.correlation(target, timespan=timespan, alpha=alpha, settings=settings,
method=method, number=number, statistic=statistic,
common_time_kwargs=common_time_kwargs, seed=seed)
r_list.append(corr_res.r)
signif_list.append(corr_res.signif)
p_list.append(corr_res.p)
except:
ovrlp = ts.overlap(target)
print(f"Computation failed for series #{idx}, {ts.label}; overlap is {ovrlp} {ts.time_unit}")
r_list.append(np.nan)
signif_list.append(None)
p_list.append(np.nan)

r_list = np.array(r_list)
signif_fdr_list = []
fdr_kwargs = {} if fdr_kwargs is None else fdr_kwargs.copy()
Expand Down
Loading
Loading