This repository has been archived by the owner on May 30, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 45
/
dump_index.js
126 lines (109 loc) · 3.09 KB
/
dump_index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
function _is_undefined(value) {
return typeof(value) === "undefined";
}
function _get_query(q) {
var analyzer =
new Packages.org.apache.lucene.analysis.standard.StandardAnalyzer();
// var parser = new Packages.org.apache.lucene.queryParser.QueryParser("f", analyzer);
// return parser.parse(q);
}
function count() {
print("count:" + ir.numDocs());
}
function count(q) {
var searcher = new Packages.org.apache.lucene.search.IndexSearcher(ir);
var query = _get_query(q);
// var hits = searcher.search(query, 100).scoreDocs;
// print("count(" + q + "):" + hits.length);
}
function search(q) {
var searcher = new Packages.org.apache.lucene.search.IndexSearcher(ir);
var query = _get_query(q);
// var hits = searcher.search(query, 100).scoreDocs;
// for (var i = 0; i < hits.length; i++) {
// get(hits[i].doc, hits[i].score);
// }
// searcher.close();
}
function find(key, val) {
var numDocs = ir.numDocs();
for (var i = 0; i < numDocs; i++) {
var doc = ir.document(i);
var docval = String(doc.get(key));
if (docval == null) {
continue;
}
if (val == docval) {
get(i);
}
}
}
function get(docId, score) {
if (_is_undefined(score)) {
print("-- docId: " + docId + " --");
} else {
print("-- docId:" + docId + " (score:" + score + ") --");
}
var doc = ir.document(docId);
var fields = doc.getFields();
for (var i = 0; i < fields.size(); i++) {
var field = fields.get(i);
var fieldname = field.name();
print(fieldname + ":" + doc.get(fieldname));
}
}
function terms(fieldname) {
var te = ir.terms();
var termDict = {};
while (te.next()) {
var fldname = te.term().field();
if (_is_undefined(termDict[fldname])) {
termDict[fldname] = 1;
} else {
termDict[fldname] = termDict[fldname] + 1;
}
}
if (fieldname == "") {
var sortable = [];
for (var key in termDict) {
sortable.push([key, termDict[key]]);
}
var sortedTermDict = sortable.sort(function(a,b) { return b[1] - a[1]; });
for (var i = 0; i < sortedTermDict.length; i++) {
print(sortedTermDict[i][0] + ":" + sortedTermDict[i][1]);
}
} else {
if (_is_undefined(termDict[fieldname])) {
print("Field not found:" + fieldname);
} else {
print(fieldname + ":" + termDict[fieldname]);
}
}
}
// unit tests
print("#-docs in index");
count();
//print("#-docs for title:bone");
//count("title:bone");
//print("Search for title:bone");
//search("title:bone");
//print("get doc 0");
//get(0);
//print("Find record with title: Broken bone");
//find("title", "Broken bone");
print("printing all term counts");
idxInfo = new Packages.org.getopt.luke.IndexInfo(ir,"C:\var\opengrok\data\index\repositories");
idxInfo.getNumTerms();
var fields=idxInfo.getFieldNames();
var termCounts=idxInfo.getFieldTermCounts();
for (var i = 0; i < fields.size(); i++) {
var s = fields.get(i);
var sc=termCounts.get(s).termCount;
print(s+" "+sc);
}
//print(ir.terms());
//terms("");
//print("printing term counts for idx");
//terms("idx");
//print("printing term counts for non-existent field foo");
//terms("foo");