-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.py
96 lines (78 loc) · 3.09 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import numpy as np
from statsmodels.stats.weightstats import DescrStatsW
def geweke(x, weights=None, first=.1, last=.5, intervals=20):
R"""Return z-scores for convergence diagnostics.
Compare the mean of the first % of series with the mean of the last % of
series. x is divided into a number of segments for which this difference is
computed. If the series is converged, this score should oscillate between
-1 and 1.
Parameters
----------
x : array-like
The trace of some stochastic parameter.
first : float
The fraction of series at the beginning of the trace.
last : float
The fraction of series at the end to be compared with the section
at the beginning.
intervals : int
The number of segments.
Returns
-------
scores : list [[]]
Return a list of [i, score], where i is the starting index for each
interval and score the Geweke score on the interval.
Notes
-----
The Geweke score on some series x is computed by:
.. math:: \frac{E[x_s] - E[x_e]}{\sqrt{V[x_s] + V[x_e]}}
where :math:`E` stands for the mean, :math:`V` the variance,
:math:`x_s` a section at the start of the series and
:math:`x_e` a section at the end of the series.
References
----------
Geweke (1992)
"""
if np.ndim(x) > 1:
return [geweke(y, weights, first, last, intervals) for y in np.transpose(x)]
# Filter out invalid intervals
for interval in (first, last):
if interval <= 0 or interval >= 1:
raise ValueError(
"Invalid intervals for Geweke convergence analysis",
(first,
last))
if first + last >= 1:
raise ValueError(
"Invalid intervals for Geweke convergence analysis",
(first,
last))
# Initialize list of z-scores
zscores = []
# Last index value
end = len(x) - 1
# Start intervals going up to the <last>% of the chain
last_start_idx = (1 - last) * end
# Calculate starting indices
start_indices = np.arange(0, int(last_start_idx), step=int(
(last_start_idx) / (intervals - 1)))
# Loop over start indices
if weights is None:
weights = np.ones(len(x))
for start in start_indices:
# Calculate slices
first_slice = x[start: start + int(first * (end - start))]
first_weights = weights[start: start + int(first * (end - start))]
last_slice = x[int(end - last * (end - start)):]
last_weights = weights[int(end - last * (end - start)):]
#z = first_slice.mean() - last_slice.mean()
#z /= np.sqrt(first_slice.var() + last_slice.var())
first_slice_weighted = DescrStatsW(first_slice, weights=first_weights, ddof=0)
last_slice_weighted = DescrStatsW(last_slice, weights=last_weights, ddof=0)
z = first_slice_weighted.mean - last_slice_weighted.mean
z /= np.sqrt(first_slice_weighted.var + last_slice_weighted.var)
zscores.append([start, z])
if intervals is None:
return np.array(zscores[0])
else:
return np.array(zscores)