Skip to content

Commit

Permalink
Minor fixes, version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
Hexagon committed Nov 23, 2015
1 parent eb824c3 commit 864dacd
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 35 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ var thinker = Thinker({
minWildcardWordLen: 4,
maxWildcardWordLen: 32,
minWordLen: 2,
maxWordLen: 32
maxWordLen: 32,
suggestionMinWordCount: 6
});

// Options available on run time
Expand Down Expand Up @@ -115,6 +116,10 @@ The shortest word to index, default is 2 which adds 'ex' to the index, but not '

Same as above, but max.

#### opts.suggestionMinWordCount

Set how many times a word have to exist in the index to be used for suggestions. Defaults to 6.

#### thinker.enableSuggestions

If this is enabled, thinker will use unprocessed words from the inputted texts to give suggestions when expressions doesn't give an direct match.
Expand Down
41 changes: 25 additions & 16 deletions lib/Thinker.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,9 @@ function Thinker (opts) {

// Optional `new` keyword
if (!(self instanceof Thinker)) {
return new Thinker;
return new Thinker(opts);
}

// Index backend
self.index = new Index();

// Can be set afterwards
self.enableSuggestions = false;
Expand All @@ -135,8 +133,13 @@ function Thinker (opts) {
minWordLen: 2,
maxWordLen: 32,
wordProcessors: [],
fieldProcessors: []
}, opts && opts.options);
fieldProcessors: [],
suggestionMinWordCount: 6
}, opts );

// Index backend
self.index = new Index(self.options);

};

Thinker.prototype.feed = function (texts, opts) {
Expand Down Expand Up @@ -176,25 +179,29 @@ Thinker.prototype.feed = function (texts, opts) {

// split text into separate words, removing empty results
// Loop through all textfields (index > 0)


for (j = 1 ; j < currentDocument.length; j++) {

// Extract current field
currentField = currentDocument[j];
if (currentField) {

// Apply all fieldProcessors
for (i = 0; i < opts.fieldProcessors.length; i++) {
if (currentField) {
currentField = opts.fieldProcessors[i](currentField);
// Apply all fieldProcessors
for (i = 0; i < opts.fieldProcessors.length; i++) {
if (currentField) {
currentField = opts.fieldProcessors[i](currentField);
}
}
}

// Split field into separate words
currentField = currentField.match(opts.characters);
// Split field into separate words
currentField = currentField.match(opts.characters);

// Extract unique words
for (k = 0; k < currentField.length; k++) {
if (currentWord !== '' && (currentWord = processWord(currentField[k], opts))) {
addWord(currentWord, currentDocument[0], j);
// Extract unique words
for (k = 0; k < currentField.length; k++) {
if (currentWord !== '' && (currentWord = processWord(currentField[k], opts))) {
addWord(currentWord, currentDocument[0], j);
}
}
}
}
Expand Down Expand Up @@ -228,6 +235,7 @@ Thinker.prototype.find = function (string, exact) {
exact = !!exact;

for (i = 0; i < words.length; i++) {

// Normalize and validate word
if (!(word = words[i]) || !(word = processWord(words[i], self.options))) {
continue;
Expand All @@ -237,6 +245,7 @@ Thinker.prototype.find = function (string, exact) {
queryResult = self.index.query(word, exact);

//
suggestion = undefined;
if (!queryResult.direct && self.enableSuggestions) {
suggestion = self.index.findClosestWord(word.original);
}
Expand Down
40 changes: 24 additions & 16 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ THE SOFTWARE.

var levenshtein = require('fast-levenshtein');

function index() {
function index(opts) {

var data = [],
var options = opts,
data = [],
lookupPartial = {},
lookupFull = {},
lookupOriginal = {},
lookupSuggestion = {},

i, found,

Expand Down Expand Up @@ -87,7 +88,7 @@ function index() {
match,found;

indexProcessed = lookupFull[location.processed];
indexOriginal = lookupOriginal[location.original];
indexOriginal = lookupSuggestion[location.original];

// Index processed
if(indexProcessed === undefined) {
Expand All @@ -109,9 +110,11 @@ function index() {
}
}

// Index original
// Index original words for expression suggestions
if(indexOriginal === undefined) {
lookupOriginal[location.original] = true;
lookupSuggestion[location.original] = 1;
} else {
lookupSuggestion[location.original]++;
}

return indexProcessed;
Expand All @@ -125,33 +128,38 @@ function index() {
return { direct: result, partial: resultPartial };
},
getData: function ( ) {
return [data,lookupPartial,lookupFull,lookupOriginal];
return [data,lookupPartial,lookupFull,lookupSuggestion];
},
setData: function ( d ) {
data = d[0];
lookupPartial = d[1];
lookupFull = d[2];
lookupOriginal = d[3];
lookupSuggestion = d[3];
},
findClosestWord: function ( w ) {
var i, closestValue = Infinity, closestIndex, distance;

// Convert to array on first run
if ( Object.prototype.toString.call( lookupOriginal ) !== '[object Array]' ) {
lookupOriginal = Object.keys(lookupOriginal).map(function (key) { return key; });
// Convert to array and filter on first run
if ( Object.prototype.toString.call( lookupSuggestion ) !== '[object Array]' ) {
var result = [];
Object.keys(lookupSuggestion).forEach(function (key) {
if (lookupSuggestion[key] >= options.suggestionMinWordCount) {
result.push(key);
}
});
lookupSuggestion = result;
}

for (i = 0; i < lookupOriginal.length; i++) {
distance = levenshtein.get(w, lookupOriginal[i]);
for (i = 0; i < lookupSuggestion.length; i++) {
distance = levenshtein.get(w, lookupSuggestion[i]);
if (distance < closestValue) {
closestIndex = i;
closestValue = distance;
}
}


if (closestIndex !== undefined) {
return lookupOriginal[closestIndex];
if (closestIndex !== undefined && closestValue < 5) {
return lookupSuggestion[closestIndex];
}
}
};
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "thinker-fts",
"version": "1.0.4",
"version": "1.0.5",
"description": "Javascript/Node.js in-memory full text search engine.",
"author": "Hexagon <github.com/hexagon>",
"contributors": [{
Expand Down
26 changes: 25 additions & 1 deletion test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,30 @@ describe('Simple usage', function () {
});
});

describe('Simple usage', function () {
var thinker = Thinker({characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g});

thinker.ranker = Thinker.rankers.standard();

// We need to make a copy of exampletexts, as feed consumes the object
var exampleTextsCopy = JSON.parse(JSON.stringify(exampleTexts));
thinker.feed(exampleTextsCopy);

describe('opts.characters', function () {
var result = thinker.find("ånglok");

// The second expressin is ignored as default minWordLength is 2
it('Should return one expression', function () {
result.results.expressions.length.should.equal(1);
});

it('Expression interpretation should equal "ånglok"', function () {
result.results.expressions[0].interpretation.should.equal("ånglok");
});

});
});

describe('Partial match', function () {
var thinker = Thinker();

Expand Down Expand Up @@ -286,7 +310,7 @@ describe('Advanced ranker', function () {
});

describe('Suggestion', function () {
var thinker = Thinker();
var thinker = Thinker({suggestionMinWordCount: 1});
var ranker = Thinker.rankers.standard();

thinker.enableSuggestions = true;
Expand Down

0 comments on commit 864dacd

Please sign in to comment.