-
Notifications
You must be signed in to change notification settings - Fork 3
/
postprocess.py
66 lines (52 loc) · 2.08 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pandas as pd, numpy as np
import os
from cogs.distance import gower_distance
from cogs import util
from robust_cfe.dataproc import gimme
"""
This script goes through all "result_fold_x.csv" files in the sub-folders of "result/" and:
- appends the Gower distance between x and z
- appends a sparsity score (% features that remained identical)
- appends the type of blackbox (if not already present)
"""
''' Load datasets (they include meta-info) '''
datasets = dict()
for dataset_name in ['credit','adult','boston','garments','compas']:
datasets[dataset_name] = gimme(dataset_name, datasets_folder="datasets")
print(dataset_name, datasets[dataset_name]['X'].shape)
# read all results
for f in os.listdir("results"):
# get folder that applies
if not f.startswith("dataset_"):
continue
# get all log files in that folder
for r in os.listdir(os.path.join("results",f)):
filepath = os.path.join("results",f,r)
# if it is an old ".postproc.", delete it
if ".postproc." in r:
os.remove(filepath)
continue
df = pd.read_csv(filepath)
# append blackbox info
if "blackbox" not in df.columns:
blackbox = f.split("blackbox_")[1].split("_")[0]
df["blackbox"] = blackbox
df["gower_dist"] = 0
df["loss"] = 0
# add gower dist & loss
for i, row in df.iterrows():
z = np.array(eval(row['z']))
x = np.array(eval(row['x']))
dataset_name = row.dataset
feature_intervals = datasets[dataset_name]['feature_intervals']
indices_categorical_features = datasets[dataset_name]['indices_categorical_features']
num_feature_ranges = util.compute_ranges_numerical_features(feature_intervals, indices_categorical_features)
gd = gower_distance(z, x, num_feature_ranges, indices_categorical_features)
df.loc[i, 'gower_dist'] = gd
l_0 = 1/len(x) * np.sum(z != x)
is_not_cfe = 0 if row['pred_class_z'] == row['desired_class'] else 1
loss = .5*gd + .5*l_0 + is_not_cfe
df.loc[i, "sparsity"] = 1.0 - l_0
df.loc[i, 'loss'] = loss
# save df
df.to_csv(filepath, index=False)