-
Notifications
You must be signed in to change notification settings - Fork 0
/
defi.py
93 lines (75 loc) · 2.95 KB
/
defi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import argparse
import sys
from collections import Counter
import numpy as np
from sklearn.preprocessing import normalize
from elasticsearch import Elasticsearch
from intruvu.flowES import FlowES
from intruvu.loader import index_files
from intruvu.ml import train_classifier
###############
## Arguments ##
###############
arg_parser = argparse.ArgumentParser(description='Intruder detection - défi', formatter_class=argparse.RawTextHelpFormatter)
arg_parser.add_argument("-r", action='store_true', required=False, help=
"""Index files to ElasticSearch and exit the program.
Files must be indexed before working with classifiers.""")
arg_parser.add_argument("--index", type=str, nargs=1, default=["defi"], required=False, help=
"""Name of the ElasticSearch index to use.
default: "flow" """)
arg_parser.add_argument("--dir_train", type=str, nargs=1, default=["./defi_train"], required=False, help=
"""Directory to load the XML training file(s) from.
This directory contain the file(s) used for the training.
It shouldn't contain any file with unknown tag.
default: "./defi_train" """)
arg_parser.add_argument("--dir_test", type=str, nargs=1, default=["./defi_test"], required=False, help=
"""Directory to load the XML test file(s) from.
This directory contain the file(s) used for the test.
default: "./defi_test" """)
arg_parser.add_argument("--output", type=str, nargs=1, default=["output.txt"], required=False, help=
"""Name of the output file containing the results.
default: "output" """)
args = arg_parser.parse_args()
###################
## ElasticSearch ##
###################
es = Elasticsearch()
print(es.info())
index_name = args.index[0]
if args.r:
index_files(args.dir_train[0], index_name, es)
index_files(args.dir_test[0], index_name+'_test', es)
print('all files indexed !')
sys.exit()
flow = FlowES(es, args.index[0])
flow_test = FlowES(es, args.index[0]+'_test')
######################
## Machine Learning ##
######################
vect_l, expected_l = flow.get_vectors_for_application("HTTPWeb")
print("classes", Counter(expected_l))
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
classifier = KNeighborsClassifier(5, metric="manhattan")
# classifier = MLPClassifier(random_state=1, hidden_layer_sizes=(100,50), verbose=True)
# train_classifier(classifier, vect_l, expected_l)
print("train")
X = np.array(vect_l)
y = np.array(expected_l)
X = normalize(X)
classifier.fit(X, y)
print("end train")
vect_t, expected_t = flow_test.get_vectors_for_application("HTTPWeb")
del expected_t
print("predict")
X = np.array(vect_t)
X = normalize(X)
predictions = classifier.predict(X)
predictions_proba = classifier.predict_proba(X)
print("end predict")
with open(args.output[0], "wt") as f:
for pred, score in zip(predictions, predictions_proba):
f.write("{}\t{}\n".format(score[int(pred)], pred))
print("done !")