-
Notifications
You must be signed in to change notification settings - Fork 0
/
BrownIC.py
50 lines (41 loc) · 1.1 KB
/
BrownIC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import wordnet_ic
import math,csv
import scipy
brown_ic = wordnet_ic.ic('ic-brown.dat')
def sim_lin(syns1,syns2):
maxSim=None
for s1 in syns1:
for s2 in syns2:
sim=s1.lin_similarity(s2,brown_ic)
if maxSim==None or maxSim<sim:
maxSim=sim
return maxSim
def sim_resnik(syns1,syns2):
maxSim=None
for s1 in syns1:
for s2 in syns2:
sim=s1.res_similarity(s2,brown_ic)
if maxSim==None or maxSim<sim:
maxSim=sim
return maxSim
train = csv.reader(open("rg.csv",'rb'),delimiter=';')
word1=[]
word2=[]
hr=[]
LinS=[]
for row in train:
word1.append(row[0])
word2.append(row[1])
hr.append(row[2])
#f=open("BrownIC.txt","w")
for i in range(1,len(hr)):
a=wn.synsets(word1[i],pos="n")
b=wn.synsets(word2[i],pos="n")
LinS.append(sim_resnik(a,b))
#print sim_lin(a,b)
#f.write("%s\t%s\t%.5s\t%.5s\t%.5s\n"%(word1[i],word2[i],sim_lin(a,b),sim_resnik(a,b),hr[i]))
#f.close()
hr.pop(0)
print scipy.stats.spearmanr(LinS,hr)