-
Notifications
You must be signed in to change notification settings - Fork 0
/
Index.py
76 lines (61 loc) · 1.88 KB
/
Index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
__author__ = 'kasper'
# Index
# TODO
from DocumentsList import DocumentList
import math
class Index:
def __init__(self):
self.__index = {}
self.__wordsCount = 0
self.__distinctWords = 0
def index(self):
return self.__index
def count(self):
return self.__wordsCount
def distinctCount(self):
return self.__distinctWords
def sameCount(self):
return self.__wordsCount - self.__distinctWords
def mergeWL(self, wordsList, dId):
self.__wordsCount += wordsList.count()
val = 0
for key in wordsList.list():
tf = round(1 + math.log10(wordsList.getWord(key)), 2)
wordsList.tf(key, tf)
val += tf * tf
length = round(math.sqrt(val), 2)
for key in wordsList.list():
if key in self.__index:
self.__index[key].addDocument(dId, round(wordsList.getWord(key)/length, 2))
else:
dl = DocumentList(key)
dl.addDocument(dId, round(wordsList.getWord(key)/length, 2))
self.__index[key] = dl
self.__distinctWords += 1
def createFromCursor(self, cursor):
for data in cursor:
self.__index[data] = cursor[data]
return 1
def find(self, word):
word = word.decode('utf-8')
if word in self.__index:
result = []
try:
result = self.__index[word].docList()
except AttributeError, e:
result = self.__index[word]
return result
else:
return None
# from WordsList import WordsList
# w = WordsList(1)
# lst = ['kasper', 'jabolko', 'krusa']
# w.insertList(lst)
# w.tf('kasper', 115)
# w.tf('jabolko', 10)
# w.tf('krusa', 2)
# print w.list()
# i = Index()
# i.mergeWL(w, 1)
# for key in i.index():
# print i.index()[key].docList()