-
Notifications
You must be signed in to change notification settings - Fork 4
/
example_vs_nsamples_GMD.py
97 lines (75 loc) · 2.91 KB
/
example_vs_nsamples_GMD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
""" Compute the l1-based tests in the Gaussian Mean Difference Problem.
"""
import numpy as np
import l1_two_sample_test
alpha = 0.01
J = 5
method = "GMD"
dim = 100
num_samples_test = np.arange(500, 6000, 500)
labels = ["L1_opt_J_ME", "L1_grid_J_ME", "L1_opt_J_SCF", "L1_grid_J_SCF"]
num_of_tests = len(labels)
def proba_above_tresh_GMD(seed, num_sample_test, dim, num_of_tests, my=1):
"""
Compute the type-II error in the GMD problem:
P = N(0, I) and Q = N( (my,0,0, 000), I).
Only the first dimension of the means differ.
----------
num_sample_test : integer
Test sample size
dim : integer
Dimension of the problem
num_of_tests : integer
Number of l1-based tests
my : float
The difference between the first coordinates of the two distributions.
Return
-------
tests_error : array-like, shape = [num_of_tests]
Indicate if the tests rejected correctly the null hypothesis
"""
tests_error = np.zeros(num_of_tests)
np.random.seed(seed)
X = np.random.multivariate_normal(np.zeros(dim), np.eye(dim), 2 * num_sample_test)
mean_shift = np.zeros(dim)
mean_shift[0] = my
Y = np.random.multivariate_normal(mean_shift, np.eye(dim), 2 * num_sample_test)
Itr = np.zeros(2 * num_sample_test, dtype=bool)
tr_ind = np.random.choice(2 * num_sample_test, int(num_sample_test), replace=False)
Itr[tr_ind] = True
Ite = np.logical_not(Itr)
X_tr, Y_tr = X[Itr, :], Y[Itr, :]
X_te, Y_te = X[Ite, :], Y[Ite, :]
# L1_opt_J_ME
test_locs, gwidth2 = l1_two_sample_test.solver_ME(X_tr, Y_tr, J)
test = l1_two_sample_test.test_asymptotic_ME(X_te, Y_te, test_locs, gwidth2, alpha)
if test["h0_rejected"] == False:
tests_error[0] = 1
# L_1_grid_J_ME
test_locs, gwidth2 = l1_two_sample_test.initial2_T_gwidth2(X_tr, Y_tr, J)
test = l1_two_sample_test.test_asymptotic_ME(X_te, Y_te, test_locs, gwidth2, alpha)
if test["h0_rejected"] == False:
tests_error[1] = 1
# L1_opt_J_SCF
test_locs, gwidth2 = l1_two_sample_test.solver_SCF(X_tr, Y_tr, J)
test = l1_two_sample_test.test_asymptotic_SCF(X_te, Y_te, test_locs, gwidth2, alpha)
if test["h0_rejected"] == False:
tests_error[2] = 1
# L1_grid_J_SCF
test_locs, gwidth2 = l1_two_sample_test.initial4_T_gwidth2(X_tr, Y_tr, J)
test = l1_two_sample_test.test_asymptotic_SCF(X_te, Y_te, test_locs, gwidth2, alpha)
if test["h0_rejected"] == True:
tests_error[3] = 1
return tests_error
with open("l1_test_vs_nsample.csv", "w") as file:
for num_sample_test in num_samples_test:
result = proba_above_tresh_GMD(
seed=500,
num_sample_test=num_sample_test,
dim=dim,
num_of_tests=num_of_tests,
)
s1 = ",".join(str(e) for e in result)
s = method + "," + str(num_sample_test) + "," + s1 + "\n"
file.write(s)
file.flush()