From e68dd8fb304f484f3a8dcdeb8c3420c40792e8f0 Mon Sep 17 00:00:00 2001
From: Hexagon
Date: Mon, 6 Feb 2017 22:42:10 +0100
Subject: [PATCH] Major update, release 1.1.0 * Reduced memory usage * API
Breaking changes * Various bugfixes * Performance improvements * New
features * Document metadata * Filters * Resultset sorting on
metadata
---
.travis.yml | 7 +-
LICENSE | 2 +-
README.md | 88 ++--
benchmark/soundex.js | 13 -
benchmark/stemmers.english.js | 13 -
benchmark/stemmers.swedish.js | 13 -
index.js | 4 +-
lib/Thinker.js | 256 +++++++++--
lib/index.js | 267 ++++++-----
lib/processors.js | 40 +-
lib/rankers.js | 248 +++++++---
lib/utils.js | 4 +-
package.json | 6 +-
test/test.js | 824 ++++++++++++++++++++++++++++------
14 files changed, 1330 insertions(+), 455 deletions(-)
delete mode 100644 benchmark/soundex.js
delete mode 100644 benchmark/stemmers.english.js
delete mode 100644 benchmark/stemmers.swedish.js
diff --git a/.travis.yml b/.travis.yml
index 3e167c5..3ae1c05 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,5 @@
language: node_js
node_js:
- - "4.1"
- - "4.0"
- - "0.12"
- - "0.11"
+ - "6"
+ - "5"
+ - "4"
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index e528d58..bae76cb 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
The MIT License (MIT)
-Copyright (c) 2015 Hexagon
+Copyright (c) 2015-2017 Hexagon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 3d25427..b2da188 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,19 @@
[![Build status](https://travis-ci.org/Hexagon/thinker-fts.svg)](https://travis-ci.org/Hexagon/thinker-fts) [![npm version](https://badge.fury.io/js/thinker-fts.svg)](https://badge.fury.io/js/thinker-fts) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/f4a95b3f01b644d9af07476e4e048c60)](https://www.codacy.com/app/robinnilsson/thinker-fts?utm_source=github.com&utm_medium=referral&utm_content=Hexagon/thinker-fts&utm_campaign=Badge_Grade) [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://img.shields.io/badge/license-MIT-blue.svg)
-Fast and extendible pure JavaScript full text search engine.
+Fast, extendible and stand alone pure JavaScript full text search engine.
## Features
- * Highly optimized, will give a ranked resultset within 20 ms on a 5000 (average wikipedia sized) document dataset.
* In-memory operation
+ * Highly optimized, will give a ranked resultset within 10 ms on a 5000 (average wikipedia sized) document dataset.
* Few external dependencies
- * Natural language searchx
+ * Natural language search
* Partial matching
* Expression correction / suggestions
* Weighted ranker (configurable weights for each field, all-expression-match-factor, partial vs exact factor etc.)
- * Search modifiers (+ require, - exclude, "searchword" precise match - excepts wordprocessors)
+ * Search modifiers (+ require, - exclude, "searchword" precise match which excepts wordprocessors)
+ * Result filters
* Field preprocessors
* HTML-Stripper
* Word preprocessors
@@ -23,8 +24,7 @@ Fast and extendible pure JavaScript full text search engine.
* [Stop words](https://en.wikipedia.org/wiki/Stop_words)
* Word forms
* [Soundex](https://en.wikipedia.org/wiki/Soundex)
- * Stripper for repeated characters
- * Allows saving/loading the index to/from disk, but for small datasets you can feed the index on-the-fly.
+ * Stripper for repeated characters
## Installation
@@ -43,10 +43,10 @@ var Thinker = require('thinker-fts'),
// Connect standard ranker
thinker.ranker = Thinker.rankers.standard();
-// Feed thinker with documents of format [id, textfield, textfield, ...]
+// Feed thinker with an array of documents formatted like { id: id, fields: [textfield, textfield] }
thinker.feed([
- [1, 'Lorem', 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.'],
- [2, 'Ipsum', 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.']
+ { id: 1, fields: ['Lorem', 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.'] },
+ { id: 2, fields: ['Ipsum', 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.'] }
]);
// Search for text
@@ -54,43 +54,42 @@ var result = thinker.find('ut in');
// Show result
console.log(result);
-
-{
- expressions: [
- {
- interpretation: 'ut',
+{
+ expressions: [
+ {
original: 'ut',
+ interpretation: [Object],
suggestion: undefined,
modifier: undefined,
- exactMode: false
+ exactMode: false
},
{
- interpretation: 'in',
original: 'in',
+ interpretation: [Object],
suggestion: undefined,
modifier: undefined,
- exactMode: false
+ exactMode: false
}
],
+ performance: {
+ find: 1.107075,
+ rank: 0.598558,
+ sort: 0.688598,
+ filter: 0.060182,
+ total: 2.639159
+ },
documents: [
- {
- id: 1,
- weight: 12,
- expressions: [1,0] // <- Array where index 0 correspods to first expression,
- // 1 to second expression etc.
- // Value is 2 for exact match
- // 1 for partial match and 0 for no match
- }
+ { id: 2, weight: 1.5, expressions: [Object] },
+ { id: 1, weight: 1.5, expressions: [Object] }
],
- findTime: 0.908248, // ms
- rankTime: 0.109632 // ms
+ totalHits: 2,
+ returnedHits: 2
}
```
Please not that you _have to_ connect a ranker, else find won't provide a result set. The ranker build the result set.
-
## Basic configuration
Thinkers default configuration is overridden by supplying an options object to Thinkers constructor.
@@ -99,14 +98,16 @@ Thinkers default configuration is overridden by supplying an options object to T
// Options only available at initialization
var thinker = Thinker({
- characters: /([a-zA-Z0-9']*)/g,
+ characters: /([a-zA-Z0-9]*)/g,
caseSensitive: false,
- minWildcardWordLen: 4,
+ minWildcardWordLen: 3,
maxWildcardWordLen: 32,
minWordLen: 2,
maxWordLen: 32,
suggestionMinWordCount: 6,
- enableSuggestions: false
+ enableSuggestions: false,
+ optionalPlusFromExpressions: 1,
+ coalesceWords: 1
});
```
@@ -150,10 +151,25 @@ If this is enabled, thinker will use unprocessed words from the inputted texts t
This is what results.expressions[n] will look like when you search for 'exression' (missing p)
+#### opts.optionalPlusFromExpressions
+
+Will be renamed, I promise.
+
+This is how many words there should be in the expression before all words become optional. Defaults to 1 (disabled).
+
+If you set this to 4, and search for a three word expression, all words will need to exist in the document to giva e match. In the background ```what you want``` become ```+what +you +want```.
+If you giva a four word expression, all words become optional as usuabl.
+
+#### opts.coalesceWords
+
+When this property is set to greater than one, augmented words will be inserted into the index, consisting of current and next word. If this property is set to 3 and the field is "i want cookies today", a search for ```iwantcookies```, ```wantcookiestoday``` or ```wantcookies``` will give a match.
```javascript
{
- interpretation: 'exression',
+ interpretation: {
+ original: 'expression',
+ ...
+ },
...
suggestion: 'expression',
...
@@ -198,7 +214,7 @@ Object defining a different base weight for a match in each field of a document,
```javascript
var docs = [
- [1,"This is the title", "This is the ingress", "This is the text"],
+ { id: 1, fields: ["This is the title", "This is the ingress", "This is the text"] },
...
];
```
@@ -207,9 +223,9 @@ and your fields weights look like
```javascript
fields: {
- 1: { weight: 4, boostPercentage: true },
- 2: { weight: 2, boostPercentage: false },
- 3: { weight: 2, boostPercentage: false }
+ 0: { weight: 4, boostPercentage: true },
+ 1: { weight: 2, boostPercentage: false },
+ 2: { weight: 2, boostPercentage: false }
}
```
diff --git a/benchmark/soundex.js b/benchmark/soundex.js
deleted file mode 100644
index 420f7b3..0000000
--- a/benchmark/soundex.js
+++ /dev/null
@@ -1,13 +0,0 @@
-var
- Benchmark = require("benchmark"),
- Thinker = require("../"),
- suite = new Benchmark.Suite,
- soundex = Thinker.processors.soundex();
-
-suite.add("Soundex", function() {
- var result = soundex("convolution");
-})
-.on("cycle", function(event) {
- console.log(String(event.target));
-})
-.run();
diff --git a/benchmark/stemmers.english.js b/benchmark/stemmers.english.js
deleted file mode 100644
index 7562c03..0000000
--- a/benchmark/stemmers.english.js
+++ /dev/null
@@ -1,13 +0,0 @@
-var
- Benchmark = require('benchmark'),
- Thinker = require('../'),
- suite = new Benchmark.Suite,
- englishStemmer = Thinker.processors.stemmers.english();
-
-suite.add('English stemmer', function() {
- var result = englishStemmer('convolution');
-})
-.on('cycle', function(event) {
- console.log(String(event.target));
-})
-.run();
diff --git a/benchmark/stemmers.swedish.js b/benchmark/stemmers.swedish.js
deleted file mode 100644
index c9cbd5b..0000000
--- a/benchmark/stemmers.swedish.js
+++ /dev/null
@@ -1,13 +0,0 @@
-var
- Benchmark = require("benchmark"),
- Thinker = require("../"),
- suite = new Benchmark.Suite,
- swedishStemmer = Thinker.processors.stemmers.swedish();
-
-suite.add("Swedish stemmer", function() {
- var result = swedishStemmer("friserandets");
-})
-.on("cycle", function(event) {
- console.log(String(event.target));
-})
-.run();
diff --git a/index.js b/index.js
index d61fee4..a1da04f 100644
--- a/index.js
+++ b/index.js
@@ -1,3 +1 @@
-var Thinker = require('./lib/Thinker.js');
-
-module.exports = Thinker;
\ No newline at end of file
+module.exports = require('./lib/Thinker.js');
\ No newline at end of file
diff --git a/lib/Thinker.js b/lib/Thinker.js
index 469cb45..61fb372 100644
--- a/lib/Thinker.js
+++ b/lib/Thinker.js
@@ -21,6 +21,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
+"use strict";
var Index = require("./index.js"),
processors = require("./processors.js"),
@@ -28,6 +29,7 @@ var Index = require("./index.js"),
utils = require("./utils.js");
function processWord (word, opts) {
+
var result,
i;
@@ -47,7 +49,23 @@ function processWord (word, opts) {
}
// Prepare object
- result = { original: word, processed: undefined };
+ result = { original: word, preprocessed: undefined, processed: undefined };
+
+ // Apply all wordProcessors
+ for (i = 0; i < opts.wordPreProcessors.length; i++) {
+ if (!word) {
+ break;
+ }
+
+ word = opts.wordPreProcessors[i](word);
+ }
+
+ // Check if the preprocessor disabled this word
+ if (!word) {
+ return;
+ }
+
+ result.preprocessed = word;
// Apply all wordProcessors
for (i = 0; i < opts.wordProcessors.length; i++) {
@@ -58,7 +76,7 @@ function processWord (word, opts) {
word = opts.wordProcessors[i](word);
}
- // Check if the preprocessor disabled this word
+ // Check if the wordProcessors disabled this word
if (!word) {
return;
}
@@ -67,9 +85,10 @@ function processWord (word, opts) {
result.processed = word;
return result;
-};
+}
function Thinker (opts) {
+
var self = this;
// Optional `new` keyword
@@ -78,20 +97,24 @@ function Thinker (opts) {
}
self.ranker = function() {};
+ self.propertyRanker = rankers.property();
// All these options must be set before indexing and
// cannot change afterwards (the object will also be frozen).
self.options = utils.defaults({
characters: /([a-zA-Z0-9]*)/g,
caseSensitive: false,
- minWildcardWordLen: 4,
+ minWildcardWordLen: 3,
maxWildcardWordLen: 32,
minWordLen: 2,
maxWordLen: 32,
wordProcessors: [],
+ wordPreProcessors: [],
fieldProcessors: [],
suggestionMinWordCount: 6,
- enableSuggestions: false
+ enableSuggestions: false,
+ optionalPlusFromExpressions: 1,
+ coalesceWords: 1
}, opts );
// Changing settings after initializing the index would break things, we will try to prevent that
@@ -100,9 +123,10 @@ function Thinker (opts) {
// Index backend
self.index = new Index(self.options);
-};
+}
Thinker.prototype.feed = function (texts) {
+
var self = this,
opts = self.options,
currentDocument,
@@ -110,38 +134,46 @@ Thinker.prototype.feed = function (texts) {
currentWord,
i,j,k;
-
// Helper function adding a single word to the index
- function addWord (word, docid, fieldIdx) {
+ function addWord (word, docid, fieldIdx, augmented) {
+
var wIndex,
i, j;
-
- // Add original
- wIndex = self.index.populate(word, docid, fieldIdx);
- // Add processed
+ // Add original, preprocessed and processed
+ wIndex = self.index.populate(word, docid, fieldIdx, augmented);
+ self.index.populatePreProcessed(word.preprocessed, wIndex);
self.index.populateProcessed(word.processed, wIndex);
- // Add partials
- for (i = opts.minWildcardWordLen; i < word.original.length && i < opts.maxWildcardWordLen; i++) {
- for (j = 0; j < (word.original.length - i) + 1; j++) {
- // Do not input partial if equals processed or equals original
- if( word.original.substr(j,i) !== word.processed && word.original.substr(j,i) !== word.original ) {
- self.index.populatePartial(word.original.substr(j, i), wIndex);
+ if (!augmented) {
+ for (i = opts.minWildcardWordLen; i < word.original.length && i < opts.maxWildcardWordLen; i++) {
+ for (j = 0; j < (word.original.length - i) + 1; j++) {
+ // Do not input partial if equals processed or equals original
+ if( word.preprocessed.substr(j,i) !== word.processed && word.preprocessed.substr(j,i) !== word.preprocessed ) {
+ self.index.populatePartial(word.preprocessed.substr(j, i), wIndex);
+ }
}
}
}
+
}
/* Stage 1, query index for each individual word */
- while (currentDocument = texts.pop()) {
-
+ while ( (currentDocument = texts.pop() ) ) {
+
+ // Add metatada for current document
+ if (currentDocument.metadata) {
+ self.index.populateMetadata(currentDocument.id, currentDocument.metadata);
+ }
+
// split text into separate words, removing empty results
// Loop through all textfields (index > 0)
- for (j = 1 ; j < currentDocument.length; j++) {
+ for (var j = 0; j < currentDocument.fields.length; j++) {
// Extract current field
- if ( (currentField = currentDocument[j]) ) {
+ if ( (currentField = currentDocument.fields[j]) ) {
+
+ var wordHistory = [];
// Apply all fieldProcessors
for (i = 0; i < opts.fieldProcessors.length; i++) {
@@ -155,48 +187,142 @@ Thinker.prototype.feed = function (texts) {
// Extract unique words
for (k = 0; k < currentField.length; k++) {
+
+ // Check that the current word is't invalidated by the word processors, and add it to the index
if (currentWord !== "" && (currentWord = processWord(currentField[k], opts))) {
- addWord(currentWord, currentDocument[0], j);
+ addWord(currentWord, currentDocument.id, j);
}
+
+ // Coalesce words (making separate words and written together words equal)
+ // This bypasses the valid word check, allowing single character words etc to be concatenated
+ if (opts.coalesceWords > 1 && currentField[k] !== "") {
+
+ wordHistory.push(currentField[k]);
+
+ if (wordHistory.length > 1 ) {
+ for(var i = 0; i < wordHistory.length - 1; i++) {
+ var augmentedWord = processWord(wordHistory.slice(i,wordHistory.length).join(""), opts);
+ addWord(augmentedWord, currentDocument.id, j, true);
+ }
+ if (wordHistory.length >= opts.coalesceWords) {
+ wordHistory.shift();
+ }
+
+ }
+
+ }
+
}
+
}
+
}
+
}
+
};
Thinker.prototype.addFieldProcessor = function (fn) {
return (this.options.fieldProcessors.push(fn), this);
};
+Thinker.prototype.addWordPreProcessor = function (fn) {
+ return (this.options.wordPreProcessors.push(fn), this);
+};
+
Thinker.prototype.addWordProcessor = function (fn) {
return (this.options.wordProcessors.push(fn), this);
};
-Thinker.prototype.find = function (string) {
+Thinker.prototype.find = function (params) {
+
+ utils.time("totalFindTime");
utils.time("findTime");
+ // Allow search string instead of params
+ // Ignore that f-ed up strings can be typeof "object" :)
+ if (typeof params === "string") {
+ params = { expression: params };
+ }
+
+ // Exapand params with refaults
+ params = utils.defaults({
+
+ // Search string
+ // Value: String
+ expression: null,
+
+ // Search only in specifiec field
+ // Value: Array or nullFmeta
+ fields: null,
+
+ // Direction
+ // Value: Boolean
+ // true = descending
+ // false = ascending
+ direction: true,
+
+ // Filter function
+ // Filter results on
+ // filter: function (metadata) {
+ // return metadata.active;
+ // }
+ filter: null,
+
+ // Sort by
+ // Value: String
+ // sortBy: weight <- Default, sort by ranker weight
+ // sortBy: anything <- Sort by metadata propert "anything"
+ sortBy: "weight",
+
+ // Limit number of results
+ // Value: null or integer
+ limit: null
+
+ }, params);
+
+ // Handle inconsistencies
+ if (!params.expression) params.expression = '';
+
var self = this,
- // Extract valid parts of the expression
- words = string.split(" "),
+ words,
word,
- // Find matching texts
- resultSet = { expressions: [] },
+ resultSet = { expressions: [], performance: {} },
queryResult,
suggestion,
- i;
+ i,
+
+ expression;
+
+ // Remove trailing spaces after + and -
+ expression = params.expression.replace(/([+-])+(\s)+/g, '$1');
+
+ // Remove dashes without space in front
+ expression = expression.replace(/([^\s]){1}-/, '$1');
+
+ // Remove leading and trailing spaces from search query
+ expression = expression.trim(" ");
+
+ // Split query into searate words on whitespace charcter
+ words = expression.split(" ");
for (i = 0; i < words.length; i++) {
var modifier=undefined, exact=false;
// Find modifiers, set flags, and remove their textual representation
+ // Plus modifier is automagically applied to each word(expression) if total
if ( ["+","-"].indexOf(words[i][0]) !== -1) {
modifier = words[i][0];
words[i] = words[i].substring(1,words[i].length);
+ } else {
+ if ( words.length < self.options.optionalPlusFromExpressions ) {
+ modifier = "+";
+ }
}
// Trigger exact mode
@@ -206,23 +332,23 @@ Thinker.prototype.find = function (string) {
words[i] = words[i].replace(/\"/g,"");
// Normalize and validate word
- if (!(word = words[i]) || !(word = processWord(words[i], self.options))) {
+ if (!(word = processWord(words[i], self.options))) {
continue;
}
//
- queryResult = self.index.query(word, exact);
+ queryResult = self.index.query(word, exact, params.filter);
// Enable suggestions if self.options.enableSuggestions is true
suggestion = undefined;
- if (!queryResult.direct.length && self.options.enableSuggestions) {
+ if ((!queryResult.exact.length && !queryResult.processed.length) && self.options.enableSuggestions) {
suggestion = self.index.findClosestWord(word.original);
}
// Push this expression to result array
resultSet.expressions.push({
- interpretation: exact ? word.original : word.processed,
- original: word.original,
+ original: words[i],
+ interpretation: word,
suggestion: suggestion,
modifier: modifier,
exactMode: exact,
@@ -232,21 +358,73 @@ Thinker.prototype.find = function (string) {
}
// Done finding
- resultSet.findTime = utils.time("findTime");
+ resultSet.performance.find = utils.time("findTime");
+
+ // Start ranking
+ utils.time("rankTime");
+
+ // Rank by weight
+ if (params.sortBy === "weight") {
+ resultSet.documents = self.ranker(resultSet, self.index.getWordCount());
+
+ // Rank by metadata
+ } else {
+ resultSet.documents = self.propertyRanker({
+ resultSet: resultSet,
+ index: self.index,
+ sortBy: params.sortBy
+ });
+
+ }
+
+ // Done ranking
+ resultSet.performance.rank = utils.time("rankTime");
+
+ // Start sorting
+ utils.time("sortTime");
+
+ // Sort documents by total weight
+ resultSet.documents = resultSet.documents.sort(function(a, b) {
+ return params.direction ? (b.weight - a.weight) : (a.weight - b.weight);
+ });
+
+ // Done sorting
+ resultSet.performance.sort = utils.time("sortTime");
// Start ranking
- utils.time("rankTime")
- resultSet.documents = self.ranker(resultSet,self.index.getWordCount());
+ utils.time("filterTime");
+
+ resultSet.totalHits = resultSet.documents.length;
+
+ if (params.limit) {
+ resultSet.documents = resultSet.documents.slice(0, params.limit);
+ }
+
+ resultSet.returnedHits = resultSet.documents.length;
// Remove expression[m].hits from resultset, not needed anymore
for (i = 0; i < resultSet.expressions.length; i++) {
delete resultSet.expressions[i].hits;
}
- // Done fanking
- resultSet.rankTime = utils.time("rankTime");
+ // Restore document ids, append filters, append meta
+ for (i = 0; i < resultSet.documents.length; i++) {
+
+ let docIdx = resultSet.documents[i].id;
+
+ // Restore metadata and document id
+ resultSet.documents[i].metadata = self.index.getMetadata(docIdx);
+ resultSet.documents[i].id = self.index.docIndexToId(resultSet.documents[i].id);
+
+
+ }
+
+ resultSet.performance.filter = utils.time("filterTime");
+
+ resultSet.performance.total = utils.time("totalFindTime");
return resultSet;
+
};
Thinker.processors = processors;
diff --git a/lib/index.js b/lib/index.js
index 308a2a3..71f1ded 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -28,191 +28,227 @@ var levenshtein = require('fast-levenshtein');
function index(options) {
- var data = [],
+ var
+ // Array of Array with DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField
+ // Index of outer array is WordIndex, matched to an actual word through lookupOriginal, lookupPartial, lookupProcessed och lookupPreProcessed
+ // [
+ // [DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, ... ],
+ // [DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, ... ],
+ // [DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, ... ],
+ // [DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, DocumentIndex, FieldIndex, OcurrencesOfWordInCurrentDocumentAndField, ... ]
+ // }
+ data = [],
+
+ // Lookup Maps of original, partial, processed and preprocessed words
+ // {
+ // "ActualWord": WordIndexIn data
+ // }
+ lookupOriginal = new Map(null),
lookupPartial = new Map(null),
lookupProcessed = new Map(null),
- lookupOriginal = new Map(null),
+ lookupPreProcessed = new Map(null),
+
+ // Map of every word and how many times it is used
lookupSuggestion = new Map(null),
- wordCount = new Object(null),
-
- found,
-
- concatArray = function ( arr1, arr2 ) {
- var j, i, found, newIdx;
- for(i = 0; i < arr2.length; i+=3) {
- found = false;
- for(j = 0; j < arr1.length; j+=3) {
- if (arr2[i] == arr1[j] && arr2[i+1] == arr1[j+1]) {
- arr1[j+2]++;
- found = true;
- break;
+
+ // Map documentIndex => {
+ // supplied: "Document metadata"
+ // }
+ lookupMetadata = new Map(null),
+
+ // Map documentId: documentIndex
+ lookupDocId = new Map(null),
+
+ // Map documentIndex: documentId
+ lookupDocIdReverse = new Map(null),
+
+ // Keep track of next available unique documentId
+ currentDocIndex = 0,
+
+ // Keep track of number of words in each document and field
+ wordCount = Object.create(null),
+
+ query = function ( location, lookupMap, filterFunc ) {
+
+ // Add object
+ var words = lookupMap.get(location),
+ arr1 = [],
+ arr2,
+ i, j,
+ newIdx,
+ idx,
+ idxKey,
+ iEntry;
+
+ var subroutine = function (word, arr1) {
+
+ var j;
+
+ arr2 = data[word];
+ idx = new Map();
+
+ for(j = 0; j < arr2.length; j+=3) {
+
+ if ( ! ( filterFunc && !filterFunc(lookupMetadata.get(arr2[j])))) {
+
+ idxKey = arr2[j]*1e10+arr2[j+1];
+
+ if ( (iEntry = idx.get(idxKey)) ) {
+ arr1[iEntry+2]++;
+ } else {
+ newIdx = arr1.length;
+ idx.set(idxKey, newIdx);
+ arr1[newIdx] = arr2[j];
+ arr1[newIdx+1] = arr2[j+1];
+ arr1[newIdx+2] = arr2[j+2];
+ }
+
}
+
}
- if (!found) {
- newIdx = arr1.length-1;
- arr1[++newIdx] = arr2[i];
- arr1[++newIdx] = arr2[i+1];
- arr1[++newIdx] = arr2[i+2];
- }
+
}
- return arr1;
- },
- queryPartial = function ( location ) {
+ if( words !== undefined ) {
- // Add object
- var words = lookupPartial.get(location),
- currentResult,
- result = [],
- i;
-
- if( words !== void 0 ) {
- for( i = 0; i < words.length; i++) {
- currentResult = data[words[i]];
- result = concatArray (result, currentResult);
+ if (words.constructor === Array) {
+ for( i = 0; i < words.length; i++) {
+ subroutine(words[i], arr1);
+ }
+ } else {
+ subroutine(words, arr1);
}
- return result;
+
+ return arr1;
} else {
return;
}
},
- queryProcessed = function ( location ) {
+ populate = function ( location, wordIdx, lookup ) {
+
+ var dest = lookup.get(location);
// Add object
- var words = lookupProcessed.get(location),
- currentResult,
- currentIndex,
- addedIndex = [],
- result = [],
- i;
-
- if( words !== void 0 ) {
- for( i = 0; i < words.length; i++) {
- currentResult = data[words[i]];
- result = concatArray (result, currentResult);
- }
- return result;
+ if(dest === undefined) {
+ lookup.set(location,[wordIdx]);
+
} else {
- return;
+ // Only insert if not already existing
+ if( dest.indexOf(wordIdx) === -1) {
+ dest[dest.length] = wordIdx;
+ }
+
}
},
- exports = {
- populateProcessed: function ( location, wordIdx ) {
-
- var i, location, dest = lookupProcessed.get(location);
+ docIdToIndex = function (docId) {
+ let d = lookupDocId.get(docId);
+ if(d === undefined) {
+ d = currentDocIndex++;
+ lookupDocId.set(docId, d);
+ lookupDocIdReverse.set(d, docId);
+ }
+ return d;
+ },
- // Add object
- if(dest === void 0) {
- lookupProcessed.set(location,[wordIdx]);
- } else {
- // Only insert if not already existing
- if( dest.indexOf(wordIdx) === -1) {
- dest.push(wordIdx);
- }
- }
+ docIndexToId = function (docIndex) {
+ return lookupDocIdReverse.get(docIndex);
+ };
- },
+ exports = {
populatePartial: function ( location, wordIdx ) {
-
- var i, dest = lookupPartial.get(location);
-
- // Add object
- if(dest === void 0) {
- lookupPartial.set(location,[wordIdx]);
- } else {
- // Only insert if not already existing
- if( dest.indexOf(wordIdx) === -1) {
- dest.push(wordIdx);
- }
- }
-
+ populate( location, wordIdx, lookupPartial);
+ },
+ populateProcessed: function ( location, wordIdx ) {
+ populate( location, wordIdx, lookupProcessed);
+ },
+ populatePreProcessed: function ( location, wordIdx ) {
+ populate( location, wordIdx, lookupPreProcessed);
+ },
+ populateMetadata: function (docid, meta) {
+ lookupMetadata.set(docIdToIndex(docid), meta);
+ },
+ getMetadata: function (docidx) {
+ return lookupMetadata.get(docidx);
},
+ docIndexToId: docIndexToId,
populate: function ( location, docId, fieldIdx ) {
- // Add object
- var i,
+ var
+ docIdx = docIdToIndex(docId),
+ i,
indexOriginal,
- indexSuggestion,
+ suggestionCounter,
match,
found;
// Index original words
indexOriginal = lookupOriginal.get(location.original);
- if(indexOriginal === void 0) {
+ if(indexOriginal === undefined) {
indexOriginal = data.length;
- lookupOriginal.set(location.original,indexOriginal);
- data[indexOriginal] = [docId, fieldIdx, 1];
+ lookupOriginal.set(location.original, indexOriginal);
+ data[indexOriginal] = [docIdx, fieldIdx, 1];
} else {
found = false;
match = data[indexOriginal];
for (i = 0; i < match.length; i+=3) {
- if(match[i] === docId && match[i+1] === fieldIdx ) {
+ if(match[i] === docIdx && match[i+1] === fieldIdx ) {
match[i+2]++;
found = true;
break;
}
}
if (!found) {
- match.push(docId, fieldIdx, 1);
+ match.push(docIdx, fieldIdx, 1);
}
}
// Update wordcount of current document and field
- if (wordCount[docId] === void 0 ) {
- wordCount[docId] = [];
+ if (wordCount[docIdx] === undefined ) {
+ wordCount[docIdx] = [];
}
- wordCount[docId][fieldIdx-1] = (wordCount[docId][fieldIdx-1] || 0) + 1;
+ wordCount[docIdx][fieldIdx-1] = (wordCount[docIdx][fieldIdx-1] || 0) + 1;
// Index original words for expression suggestions, this is filtered on
// first run of 'findClosestWord'
- indexSuggestion = lookupSuggestion.get(location.original);
- if(indexSuggestion === void 0) {
+ suggestionCounter = lookupSuggestion.get(location.original);
+ if(suggestionCounter === undefined) {
lookupSuggestion.set(location.original,1);
} else {
- lookupSuggestion.set(location.original,indexSuggestion++);
+ lookupSuggestion.set(location.original,++suggestionCounter);
}
return indexOriginal;
},
- query: function ( location, exact ) {
- var direct,
- partial;
+ query: function ( location, exact, filterFunc ) {
+
+ var hits = {};
if ( exact ) {
- direct = ((index = lookupOriginal.get(location.original)) !== void 0) ? data[index] : void 0;
+ hits.exact = query( location.preprocessed, lookupPreProcessed, filterFunc ) || [];
+ hits.processed = [];
+ hits.partial = [];
+
} else {
- direct = queryProcessed( location.processed );
- if ( (partial = queryPartial( location.original )) === void 0 ) partial = queryPartial( location.processed );
+ hits.exact = query( location.preprocessed, lookupPreProcessed, filterFunc ) || [];
+ hits.processed = query( location.processed, lookupProcessed, filterFunc ) || [];
+ if ( (hits.partial = query( location.preprocessed, lookupPartial, filterFunc )) === undefined ) hits.partial = query( location.processed, lookupPartial, filterFunc ) || [];
+
}
- return {
- direct: direct || [],
- partial: partial || []
- };
+ return hits;
+
},
getWordCount: function ( ) {
return wordCount;
},
- getData: function ( ) {
- return [data,lookupPartial,lookupProcessed,lookupOriginal,lookupSuggestion,wordCount];
- },
- setData: function ( d ) {
- data = d[0];
- lookupPartial = d[1];
- lookupProcessed = d[2];
- lookupOriginal = d[3];
- lookupSuggestion = d[4];
- wordCount = d[5];
-
- },
findClosestWord: function ( w ) {
var closestValue = Infinity, closestIndex, distance;
- lookupSuggestion.forEach(function(value,key) {
+ lookupSuggestion.forEach(function(value, key) {
if(value >= options.suggestionMinWordCount) {
distance = levenshtein.get(w, key);
if (distance < closestValue) {
@@ -223,8 +259,7 @@ function index(options) {
lookupSuggestion.delete(key);
}
});
-
- if (closestIndex !== void 0 && closestValue < 5) {
+ if (closestIndex !== undefined && closestValue < 5) {
return closestIndex;
}
}
diff --git a/lib/processors.js b/lib/processors.js
index df4042c..80b016c 100644
--- a/lib/processors.js
+++ b/lib/processors.js
@@ -27,29 +27,37 @@ THE SOFTWARE.
var porterStemmer = require("stemmer"),
Soundex = require("soundex");
-function stopwords ( stopwords ) {
- stopwords = stopwords || {};
+function stopwords ( stopword ) {
+ stopword = stopword || {};
return function ( w ) {
- if ( stopwords[w] === true ) return;
+ if ( stopword[w] === true ) return;
return w;
};
-};
+}
-function wordforms ( wordforms ) {
- var wordforms = wordforms || {};
+function wordforms ( wordform ) {
+ wordform = wordform || {};
return function ( w ) {
- return wordforms[w] || w;
+ return wordform[w] || w;
};
-};
+}
-function multiples ( stopwords ) {
- var stopwords = stopwords || {};
+function multiples ( stopword ) {
+ stopword = stopword || {};
return function ( w ) {
- if ( stopwords[w] === true ) return;
+ if ( stopword[w] === true ) return;
return w.replace(/([a-zåäö])\1+/gi, "$1");
};
-};
+}
+
+function dashes ( stopword ) {
+ stopword = stopword || {};
+ return function ( w ) {
+ if ( stopword[w] === true ) return;
+ return w.replace(/([^\s]){1}-/, '$1');
+ };
+}
function stripHtml ( ) {
@@ -117,7 +125,8 @@ function swedishStemmer(stopwords) {
stopwords = stopwords || {},
suffix = ["dd", "gd", "nn", "dt", "gt", "mm", "tt"],
- endings = ["iteten", "anden", "andet", "orna", "aste", "aren", "arna", "ande", "erna", "arne", "itet", "ning", "het", "ast", "ade", "ern", "ing", "are", "en", "ad", "an", "ar", "ig", "er", "et", "or", "at", "na", "e", "a"];
+ endings = ["igheter", "igheten", "ingarna", "iteten", "ingen", "anden", "andet", "orna", "aste", "aren", "arna", "ande", "erna", "arne", "itet", "ndet", "orn","het", "ast", "and", "ade", "ern", "ing", "are", "en", "ad", "an", "ar", "ig", "er", "et", "or", "at", "e", "a"];
+
return function (w) {
@@ -133,7 +142,7 @@ function swedishStemmer(stopwords) {
// Remove trailing s
if (r1[r1.length-1]==="s") r1 = r1.substring(0,r1.length-1);
- // Return of we didnt find r1
+ // Return if we didnt find r1
if (r1.length === 0) return word;
// Stage 1a-1
@@ -188,5 +197,6 @@ module.exports = {
stopwords: stopwords,
wordforms: wordforms,
multiples: multiples,
- stripHtml: stripHtml
+ stripHtml: stripHtml,
+ dashes: dashes
}
\ No newline at end of file
diff --git a/lib/rankers.js b/lib/rankers.js
index 1c0510b..45c972b 100644
--- a/lib/rankers.js
+++ b/lib/rankers.js
@@ -36,10 +36,9 @@ function standard (options) {
},
options = utils.defaults({
- directHit: 1,
+ exactHit: 1.5,
+ processedHit: 1,
partialHit: 0.5,
- eachPartialExpressionFactor: 1.5,
- eachDirectExpressionFactor: 2,
fields: {},
minimumWeight: 0
},options);
@@ -47,7 +46,7 @@ function standard (options) {
return function (resultSet, wordCount) {
var documentResultsFinal = [],
- documentResults = {},
+ documentResultsLookup = {},
i, j,
@@ -68,71 +67,73 @@ function standard (options) {
while ((word = resultSet.expressions[j++])) {
matches = [
- {
- flag: 2,
- rows: word.hits.direct,
- weight: options.directHit,
- length: word.hits.direct.length
- },
{
flag: 1,
rows: word.hits.partial,
weight: options.partialHit,
length: word.hits.partial.length
+ },
+ {
+ flag: 2,
+ rows: word.hits.processed,
+ weight: options.processedHit,
+ length: word.hits.processed.length
+ },
+ {
+ flag: 3,
+ rows: word.hits.exact,
+ weight: options.exactHit,
+ length: word.hits.exact.length
}
];
- // Get first match (partial)
- match = matches.pop();
+ // Jump to processed if it"s empty
+ while(match = matches.pop()) {
- // Jump to partials if it"s empty
- if (!match.length) {
- match = matches.pop();
- }
+ for (i = 0; i < match.length; i) {
- for (i = 0; i < match.length; i) {
- documentId = match.rows[i++];
- fieldIndex = match.rows[i++];
- matchCount = match.rows[i++];
+ documentId = match.rows[i++];
+ fieldIndex = match.rows[i++];
+ matchCount = match.rows[i++];
- // Get the specific user-specified settings for the
- // current field or fall back on the default settings.
- fieldOptions = options.fields[fieldIndex] || (options.fields[fieldIndex] = defaultFieldOptions);
+ // Ensure that document exists in results
+ if (documentResultsLookup[documentId] === void 0) {
+ documentResultsLookup[documentId] = documentResultsFinal.length;
- // Multiply match weight with field-specific weight
- weight = match.weight * fieldOptions.weight;
+ doc = documentResultsFinal[documentResultsLookup[documentId]] = {
+ id: documentId,
+ weight: 0,
+ expressions: [],
+ };
- // For field with boostPercentage flag enabled - add extra weight the more of the field that is matched.
- // 1 + (noOfMatchedWords / totalWordsInField)
- if (fieldOptions.boostPercentage) {
- weight *= (1 + 2.8*(matchCount / wordCount[documentId][fieldIndex - 1]));
- }
+ } else {
+ doc = documentResultsFinal[documentResultsLookup[documentId]];
+ }
- doc = documentResults[documentId] || (documentResults[documentId] = {
- id: documentId,
- weight: 0,
- expressions: []
- });
+ // Don't do unnessesary work
+ if ( !doc.expressions[j-1] ) {
- doc.weight += weight;
- doc.expressions[j-1] = match.flag;
+ // Get the specific user-specified settings for the
+ // current field or fall back on the default settings.
+ fieldOptions = options.fields[fieldIndex] || (options.fields[fieldIndex] = defaultFieldOptions);
- // Jump to the next match when the current is exhausted
- if (i === match.length && matches.length) {
- match = matches.pop();
- i = 0;
- }
+ // Multiply match weight with field-specific weight
+ weight = match.weight * fieldOptions.weight;
- }
- }
+ // For field with boostPercentage flag enabled - add extra weight the more of the field that is matched.
+ // 1 + (noOfMatchedWords / totalWordsInField)
+ if (fieldOptions.boostPercentage) {
+ weight *= (1 + 2.8*(matchCount / wordCount[documentId][fieldIndex - 1]));
+ }
- // Convert document results from object to array (to be sortable)
- documentResultsFinal = Object.keys(documentResults).map(function (key) { return documentResults[key]; });
+ doc.weight += weight;
+ doc.expressions[j-1] = match.flag;
- // Sort documents by total weight
- documentResultsFinal.sort(function(a, b) {
- return b.weight - a.weight;
- });
+ }
+
+ }
+ }
+ }
// Postprocess resultset, multiplying total weight with a factor under certain circumstances,
var temp = [];
@@ -141,51 +142,158 @@ function standard (options) {
var toss = false;
if ( documentResultsFinal[i].weight < options.minimumWeight ) {
-
toss = true;
-
} else {
-
// - Multiply document weight by a factor
for ( j = 0; j < resultSet.expressions.length; j++ ) {
-
- // 2 == Exact match
- if (documentResultsFinal[i].expressions[j]==2) {
- documentResultsFinal[i].weight *= options.eachDirectExpressionFactor;
- // 1 == Partial match
- } else if (documentResultsFinal[i].expressions[j]==1) {
- documentResultsFinal[i].weight *= options.eachPartialExpressionFactor;
+ if (!documentResultsFinal[i].expressions[j]) documentResultsFinal[i].expressions[j] = 0;
+
+ // Keep this row?
+ if ( resultSet.expressions[j].modifier === "-" && documentResultsFinal[i].expressions[j] > 0 ) {
+ toss = true;
+ } else if ( resultSet.expressions[j].modifier === "+" && documentResultsFinal[i].expressions[j] === 0) {
+ toss = true;
+ }
+ }
+ }
+
+ if (!toss) {
+ temp[temp.length] = documentResultsFinal[i];
+ }
+
+ }
+
+ return temp;
+ };
+
+}
+
+/* Rank by generic property */
+function property () {
+
+ return function (options) {
+
+ options = utils.defaults({
+ resultSet: null,
+ index: null,
+ sortBy: null
+ },options);
+
+ var documentResultsFinal = [],
+ documentResultsLookup = {},
+
+ i, j,
+
+ documentId,
+ fieldIndex,
+ matchCount,
+
+ doc,
+
+ fieldOptions,
+ weight,
+
+ matches,
+ word,
+ match;
+
+ j = 0;
+ while ((word = options.resultSet.expressions[j++])) {
+
+ matches = [
+ {
+ flag: 1,
+ rows: word.hits.partial,
+ weight: options.partialHit,
+ length: word.hits.partial.length
+ },
+ {
+ flag: 2,
+ rows: word.hits.processed,
+ weight: options.processedHit,
+ length: word.hits.processed.length
+ },
+ {
+ flag: 3,
+ rows: word.hits.exact,
+ weight: options.exactHit,
+ length: word.hits.exact.length
+ }
+ ];
+
+ // Jump to processed if it"s empty
+ while(match = matches.pop()) {
+
+ for (i = 0; i < match.length; i) {
+
+ documentId = match.rows[i++];
+ fieldIndex = match.rows[i++];
+ matchCount = match.rows[i++];
+
+ // Ensure that document exists in results
+ if (documentResultsLookup[documentId] === void 0) {
+ documentResultsLookup[documentId] = documentResultsFinal.length;
+
+ doc = documentResultsFinal[documentResultsLookup[documentId]] = {
+ id: documentId,
+ weight: 0,
+ expressions: [],
+ };
- // Else set to zero
} else {
- documentResultsFinal[i].expressions[j] = 0;
+ doc = documentResultsFinal[documentResultsLookup[documentId]];
+ }
+
+ // Don't do unnessesary work
+ if ( !doc.expressions[j-1] ) {
+
+ // Multiply match weight with field-specific weight
+ weight = options.index.getMetadata(documentId)[options.sortBy];
+
+ doc.weight += weight;
+ doc.expressions[j-1] = match.flag;
}
+ }
+ }
+ }
+
+ // Postprocess resultset, dropping records and stuff
+ var temp = [];
+ for ( i = 0; i < documentResultsFinal.length; i++) {
+
+ var toss = false;
+
+ if ( documentResultsFinal[i].weight < options.minimumWeight ) {
+ toss = true;
+ } else {
+ for ( j = 0; j < options.resultSet.expressions.length; j++ ) {
+
+ if (!documentResultsFinal[i].expressions[j]) documentResultsFinal[i].expressions[j] = 0;
+
// Keep this row?
- if ( resultSet.expressions[j].modifier === "-" && documentResultsFinal[i].expressions[j] > 0 ) {
+ if ( options.resultSet.expressions[j].modifier === "-" && documentResultsFinal[i].expressions[j] > 0 ) {
toss = true;
- } else if ( resultSet.expressions[j].modifier === "+" && documentResultsFinal[i].expressions[j] === 0) {
+ } else if ( options.resultSet.expressions[j].modifier === "+" && documentResultsFinal[i].expressions[j] === 0) {
toss = true;
}
}
-
}
if (!toss) {
- temp.push(documentResultsFinal[i]);
+ temp[temp.length] = documentResultsFinal[i];
}
}
-
- resultSet = temp;
- return resultSet;
+ return temp;
};
}
module.exports = {
- standard: standard
+ standard: standard,
+ property: property
}
\ No newline at end of file
diff --git a/lib/utils.js b/lib/utils.js
index 56cea39..997a76b 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -1,3 +1,5 @@
+"use strict";
+
// Helper function for measuring execution time
var time = (function () {
var times = {};
@@ -11,7 +13,7 @@ var time = (function () {
}
diff = process.hrtime(times[id]);
- times[id] = null;
+ times[id] = undefined;
return (diff[0] * 1e9 + diff[1]) / 1E6;
};
diff --git a/package.json b/package.json
index 5a926af..ba14109 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "thinker-fts",
- "version": "1.0.11",
+ "version": "1.1.0",
"description": "Pure Javascript/Node.js in-memory full text search engine.",
"author": "Hexagon ",
"contributors": [{
@@ -22,12 +22,12 @@
"thinker",
"fts",
"fulltext",
+ "full-text-search",
"in-memory",
"levenshtein",
"soundex",
"porter",
- "stemmer",
- "full text search"
+ "stemmer"
],
"dependencies": {
"fast-levenshtein": "*",
diff --git a/test/test.js b/test/test.js
index 656f4dc..2036348 100644
--- a/test/test.js
+++ b/test/test.js
@@ -22,19 +22,20 @@ THE SOFTWARE.
*/
-'use strict';
+"use strict";
var should = require('should'),
Thinker = require('../lib/Thinker.js');
/* START OF EXAMPLE DATA */
var exampleTexts = [
- [0,"Artikel nummer noll","Det här är ettan i det hela, Anders är ett namn. Jonas likaså antikvitets. Bemötandet. effektivitet Kalle olle lars considerable"],
- [1,"Bemötande testtitel med extra ord","Brödtext nummer ett. Ander antikviteten olle lars sven"],
- [2,"Titeln med extra Testning","Brödtext i sanden artikeln artikeln artikeln artikeln två. Bemött namn Andersson antikvitet nyhet, nyheter, nyheten, nyhetens, nya olle"],
+ {id: 0, fields: [ "Artikel nummer noll","Det här är ettan i det hela, Anders är ett namn. Stavros likaså antikvitets. Bemötandet. kreativitet Kalle olle lars considerable"] },
+ {id: 1, fields: [ "Bemötande testtitel med extra ord","Brödtext nummer ett. Ander antikviteten olle lars sven"] },
+ {id: 2, fields: [ "Titeln med extra Testning","Brödtext i sanden artikeln artikeln artikeln artikeln två. Bemött namn Andersson antikvitet nyhet, nyheter, nyheten, nyhetens, nya olle"] }
];
/* END OF EXAMPLE DATA */
+
describe('Simple usage', function () {
var thinker = Thinker();
@@ -52,8 +53,8 @@ describe('Simple usage', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "artikel"', function () {
- result.expressions[0].interpretation.should.equal("artikel");
+ it('Expression processed should equal "artikel"', function () {
+ result.expressions[0].interpretation.processed.should.equal("artikel");
});
it('Should return two results', function () {
@@ -65,7 +66,7 @@ describe('Simple usage', function () {
});
it('First result should be an direct match', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
it('Second result should have id 2', function () {
@@ -78,7 +79,6 @@ describe('Simple usage', function () {
});
});
-
describe('Simple usage: Local characters', function () {
var thinker = Thinker({characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g});
@@ -96,8 +96,8 @@ describe('Simple usage: Local characters', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "ånglok"', function () {
- result.expressions[0].interpretation.should.equal("ånglok");
+ it('Expression processed should equal "ånglok"', function () {
+ result.expressions[0].interpretation.processed.should.equal("ånglok");
});
});
@@ -120,8 +120,8 @@ describe('Simple usage: Exact mode', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "ånglok"', function () {
- result.expressions[0].interpretation.should.equal("ånglok");
+ it('Expression processed should equal "ånglok"', function () {
+ result.expressions[0].interpretation.processed.should.equal("ånglok");
});
});
@@ -152,10 +152,9 @@ describe('Simple usage: Modifiers', function () {
result.expressions[2].modifier.should.equal("-");
});
- it('Expression interpretation two should equal "lars"', function () {
- result.expressions[1].interpretation.should.equal("lars");
+ it('Expression processed two should equal "lars"', function () {
+ result.expressions[1].interpretation.processed.should.equal("lars");
});
-
it('Should return one result', function () {
result.documents.length.should.equal(1);
});
@@ -185,8 +184,8 @@ describe('Partial match', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "emöt"', function () {
- result.expressions[0].interpretation.should.equal("emöt");
+ it('Expression processed should equal "emöt"', function () {
+ result.expressions[0].interpretation.processed.should.equal("emöt");
});
it('Should return three results (bemötandet, bemötande, bemött)', function () {
@@ -219,8 +218,8 @@ describe('Partial match with minimum word length match 5', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "emöt"', function () {
- result.expressions[0].interpretation.should.equal("emöt");
+ it('Expression processed should equal "emöt"', function () {
+ result.expressions[0].interpretation.processed.should.equal("emöt");
});
it('Should return zero results', function () {
@@ -234,13 +233,12 @@ describe('Ranker', function () {
characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g
}),
ranker = Thinker.rankers.standard({
- directHit: 1,
+ exactHit: 1,
+ processedHit: 0.75,
partialHit: 0.5,
- eachPartialExpressionFactor: 1.5,
- eachDirectExpressionFactor: 2,
fields: {
- 1: { weight: 4},
- 2: { weight: 2}
+ 0: { weight: 4},
+ 1: { weight: 2}
}
});
@@ -272,7 +270,7 @@ describe('Ranker', function () {
describe('Result type', function () {
it('First result should be direct', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
@@ -282,12 +280,12 @@ describe('Ranker', function () {
});
describe('Result weight', function () {
- it('First result should have a weight of 4*1*2', function () {
- result.documents[0].weight.should.equal(8);
+ it('First result should have a weight of 4*1', function () {
+ result.documents[0].weight.should.equal(4);
});
- it('Second result should have a weight of 2*0.5*1.5', function () {
- result.documents[1].weight.should.equal(1.5);
+ it('Second result should have a weight of 2*0.5', function () {
+ result.documents[1].weight.should.equal(1);
});
});
});
@@ -297,13 +295,12 @@ describe('Ranker', function () {
describe('Ranker: Boost percentage', function () {
var thinker = Thinker({ characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g }),
ranker = Thinker.rankers.standard({
- directHit: 1,
+ exactHit: 1,
+ processedHit: 0.75,
partialHit: 0.5,
- eachPartialExpressionFactor: 1.5,
- eachDirectExpressionFactor: 2,
fields: {
- 1: { weight: 4, boostPercentage: true},
- 2: { weight: 2}
+ 0: { weight: 4, boostPercentage: true},
+ 1: { weight: 2}
}
});
@@ -315,6 +312,7 @@ describe('Ranker: Boost percentage', function () {
thinker.feed(exampleTextsCopy);
describe('Basic search "artikel"', function () {
+
var result = thinker.find("artikel");
it('Should return two results', function () {
@@ -322,12 +320,12 @@ describe('Ranker: Boost percentage', function () {
});
describe('Result weight', function () {
- it('First result should have a weight of 4*1*2*1.3333', function () {
- result.documents[0].weight.toFixed(4).should.equal('15.4667');
+ it('First result should have a weight of 4*1*1.9333', function () {
+ result.documents[0].weight.toFixed(4).should.equal('7.7333');
});
- it('Second result should have a weight of 2*0.5*1.5', function () {
- result.documents[1].weight.should.equal(1.5);
+ it('Second result should have a weight of 2*0.5', function () {
+ result.documents[1].weight.should.equal(1);
});
});
});
@@ -338,13 +336,12 @@ describe('Advanced ranker', function () {
characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g
});
var ranker = Thinker.rankers.standard({
- directHit: 1,
- partialHit: 0.5,
- eachPartialExpressionFactor: 1.5,
- eachDirectExpressionFactor: 2,
+ exactHit: 1,
+ processedHit: 0.75,
+ partialHit: 0.5,
fields: {
- 1: {weight: 4 },
- 2: {weight: 2 }
+ 0: {weight: 4 },
+ 1: {weight: 2 }
}
});
@@ -376,26 +373,26 @@ describe('Advanced ranker', function () {
describe('Result type', function () {
it('First result should be 3 direct matches', function () {
- result.documents[0].expressions[0].should.equal(2);
- result.documents[0].expressions[1].should.equal(2);
- result.documents[0].expressions[2].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
+ result.documents[0].expressions[1].should.equal(3);
+ result.documents[0].expressions[2].should.equal(3);
});
it('Second result should be 2 partial and one direct', function () {
result.documents[1].expressions[0].should.equal(1);
result.documents[1].expressions[1].should.equal(1);
- result.documents[1].expressions[2].should.equal(2);
+ result.documents[1].expressions[2].should.equal(3);
});
});
describe('Result weight', function () {
- it('First result should have a weight of (((4*1)+(2*1)+(2*1)))*2*2*2', function () {
- result.documents[0].weight.should.equal(64);
+ it('First result should have a weight of (((4*1)+(2*1)+(2*1)))', function () {
+ result.documents[0].weight.should.equal((((4*1)+(2*1)+(2*1))));
});
- it('Second result should have a weight of (((2*0.5)+(2*0.5)+(2*1)))*2*1.5*1.5', function () {
- result.documents[1].weight.should.equal(18);
+ it('Second result should have a weight of (((2*0.5)+(2*0.5)+(2*1)))', function () {
+ result.documents[1].weight.should.equal((((2*0.5)+(2*0.5)+(2*1))));
});
});
});
@@ -492,17 +489,16 @@ describe('Word-processor: Multiples', function () {
describe('Search "k000aaaallle"', function () {
var result = thinker.find("k000aaaallle");
- it('Expression interpretation should equal "k000ale"', function () {
- result.expressions[0].interpretation.should.equal("k000ale");
+ it('Expression processed should equal "k000ale"', function () {
+ result.expressions[0].interpretation.processed.should.equal("k000ale");
});
});
});
-
describe('Word processor: Swedish stemmer', function () {
var stemmerStopwords = {
"anders": true,
- "jonas": true
+ "stavros": true
};
var thinker = Thinker({characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g});
@@ -522,8 +518,8 @@ describe('Word processor: Swedish stemmer', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation be unchanged("anders")', function () {
- result.expressions[0].interpretation.should.equal("anders");
+ it('Expression processed be unchanged("anders")', function () {
+ result.expressions[0].interpretation.processed.should.equal("anders");
});
it('Should return two results', function () {
@@ -531,7 +527,7 @@ describe('Word processor: Swedish stemmer', function () {
});
it('First result should be a direct match (anders)', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
it('Second result should be a partial match (andersson)', function () {
@@ -546,8 +542,8 @@ describe('Word processor: Swedish stemmer', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation be unchanged("bemötandet")', function () {
- result.expressions[0].interpretation.should.equal("bemötandet");
+ it('Expression processed be unchanged("bemötandet")', function () {
+ result.expressions[0].interpretation.preprocessed.should.equal("bemötandet");
});
it('Expression should be in exact mode', function () {
@@ -559,7 +555,7 @@ describe('Word processor: Swedish stemmer', function () {
});
it('First result should be a direct match (anders)', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
it('First result should have document id 0', function () {
@@ -578,22 +574,112 @@ describe('Word processor: Swedish stemmer', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "bemöt"', function () {
- result.expressions[0].interpretation.should.equal("bemöt");
+ it('Expression processed should equal "bemöt"', function () {
+ result.expressions[0].interpretation.processed.should.equal("bemöt");
});
it('Should return three results (bemötandet, bemötande, bemött)', function () {
result.documents.length.should.equal(3);
});
- it('All results should be a direct match', function () {
+ it('All results should be a processed match', function () {
result.documents[0].expressions[0].should.equal(2);
result.documents[1].expressions[0].should.equal(2);
result.documents[2].expressions[0].should.equal(2);
});
});
-
+
+ describe('Search for "lyssningarna"', function () {
+
+ var result = thinker.find("lyssningarna");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssna"', function () {
+
+ var result = thinker.find("lyssning");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssning"', function () {
+
+ var result = thinker.find("lyssning");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssnarens"', function () {
+
+ var result = thinker.find("lyssnarens");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssningens"', function () {
+
+ var result = thinker.find("lyssningens");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssningen"', function () {
+
+ var result = thinker.find("lyssningen");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssnandet"', function () {
+
+ var result = thinker.find("lyssnandet");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssnare"', function () {
+
+ var result = thinker.find("lyssnare");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
+ describe('Search for "lyssna"', function () {
+
+ var result = thinker.find("lyssna");
+
+ it('Expression processed should equal "lyssn"', function () {
+ result.expressions[0].interpretation.processed.should.equal("lyssn");
+ });
+
+ });
+
describe('Search for "nyheternas"', function () {
var result = thinker.find("nyheternas");
@@ -602,20 +688,21 @@ describe('Word processor: Swedish stemmer', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "ny"', function () {
- result.expressions[0].interpretation.should.equal("ny");
+ it('Expression processed should equal "ny"', function () {
+ result.expressions[0].interpretation.processed.should.equal("ny");
});
it('Should return 1 document', function () {
result.documents.length.should.equal(1);
});
- it('All four (nyhet, nyheter, nyheten, nyhetens)results should be a direct match on the first result', function () {
+ it('All four (nyhet, nyheter, nyheten, nyhetens)results should be a processed match on the first result', function () {
result.documents[0].expressions[0].should.equal(2);
});
});
+
describe('Search for "nya"', function () {
var result = thinker.find("nya");
@@ -624,8 +711,8 @@ describe('Word processor: Swedish stemmer', function () {
result.expressions.length.should.equal(1);
});
- it('Expression interpretation should equal "ny"', function () {
- result.expressions[0].interpretation.should.equal("ny");
+ it('Expression processed should equal "ny"', function () {
+ result.expressions[0].interpretation.processed.should.equal("ny");
});
it('Should return one document', function () {
@@ -633,7 +720,7 @@ describe('Word processor: Swedish stemmer', function () {
});
it('The result should be a direct match on the first result', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
});
@@ -642,8 +729,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("radioar");
- it('Expression interpretation should equal "radio"', function () {
- result.expressions[0].interpretation.should.equal("radio");
+ it('Expression processed should equal "radio"', function () {
+ result.expressions[0].interpretation.processed.should.equal("radio");
});
});
@@ -652,8 +739,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("sprit");
- it('Expression interpretation should equal "sprit"', function () {
- result.expressions[0].interpretation.should.equal("sprit");
+ it('Expression processed should equal "sprit"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sprit");
});
});
@@ -662,8 +749,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("produktutveckling");
- it('Expression interpretation should equal "produktutveckl"', function () {
- result.expressions[0].interpretation.should.equal("produktutveckl");
+ it('Expression processed should equal "produktutveckl"', function () {
+ result.expressions[0].interpretation.processed.should.equal("produktutveckl");
});
});
@@ -672,8 +759,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("produktutvecklare");
- it('Expression interpretation should equal "produktutveckl"', function () {
- result.expressions[0].interpretation.should.equal("produktutveckl");
+ it('Expression processed should equal "produktutveckl"', function () {
+ result.expressions[0].interpretation.processed.should.equal("produktutveckl");
});
});
@@ -682,8 +769,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("produktutvecklarens");
- it('Expression interpretation should equal "produktutveckl"', function () {
- result.expressions[0].interpretation.should.equal("produktutveckl");
+ it('Expression processed should equal "produktutveckl"', function () {
+ result.expressions[0].interpretation.processed.should.equal("produktutveckl");
});
});
@@ -692,8 +779,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("skrotverktyget");
- it('Expression interpretation should equal "skrotverktyg"', function () {
- result.expressions[0].interpretation.should.equal("skrotverktyg");
+ it('Expression processed should equal "skrotverktyg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("skrotverktyg");
});
});
@@ -704,49 +791,170 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("skrotverktygets");
- it('Expression interpretation should equal "skrotverktyg"', function () {
- result.expressions[0].interpretation.should.equal("skrotverktyg");
+ it('Expression processed should equal "skrotverktyg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("skrotverktyg");
});
});
-
- describe('Search for "sandning"', function () {
+
+ describe('Search for "sand"', function () {
- var result = thinker.find("sandning");
+ var result = thinker.find("sand");
- it('Expression interpretation should equal "sand"', function () {
- result.expressions[0].interpretation.should.equal("sand");
+ it('Expression processed should equal "sand"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sand");
});
});
- describe('Search for "sand"', function () {
+ describe('Search for "sandarens"', function () {
- var result = thinker.find("sand");
+ var result = thinker.find("sandarens");
- it('Expression interpretation should equal "sand"', function () {
- result.expressions[0].interpretation.should.equal("sand");
+ it('Expression processed should equal "sand"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sand");
});
});
+
+ describe('Search for "faktura"', function () {
+
+ var result = thinker.find("faktura");
- describe('Search for "sandarens"', function () {
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
- var result = thinker.find("sandarens");
+ });
+
+ describe('Search for "fakturan"', function () {
+
+ var result = thinker.find("fakturan");
+
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
+
+ });
+
+ describe('Search for "fakturans"', function () {
+
+ var result = thinker.find("fakturans");
- it('Expression interpretation should equal "sand"', function () {
- result.expressions[0].interpretation.should.equal("sand");
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
});
- });
+ });
+
+ describe('Search for "fakturor"', function () {
+
+ var result = thinker.find("fakturor");
+
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
+
+ });
+
+ describe('Search for "fakturorna"', function () {
+
+ var result = thinker.find("fakturorna");
+
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
+
+ });
+
+ describe('Search for "fakturornas"', function () {
+
+ var result = thinker.find("fakturornas");
+
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
+
+ });
+
+ describe('Search for "fakturors"', function () {
+
+ var result = thinker.find("fakturors");
+
+ it('Expression processed should equal "faktur"', function () {
+ result.expressions[0].interpretation.processed.should.equal("faktur");
+ });
+
+ });
+
+ describe('Search for "kampanj"', function () {
+
+ var result = thinker.find("kampanj");
+
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+ describe('Search for "kampanjer"', function () {
+
+ var result = thinker.find("kampanjer");
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+ describe('Search for "kampanjen"', function () {
+
+ var result = thinker.find("kampanjen");
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+ describe('Search for "kampanjens"', function () {
+
+ var result = thinker.find("kampanjens");
+
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+ describe('Search for "kampanjernas"', function () {
+
+ var result = thinker.find("kampanjernas");
+
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+
+ describe('Search for "kampanjerna"', function () {
+
+ var result = thinker.find("kampanjerna");
+
+ it('Expression processed should equal "kampanj"', function () {
+ result.expressions[0].interpretation.processed.should.equal("kampanj");
+ });
+
+ });
+
+
describe('Search for "skrotverktyg"', function () {
var result = thinker.find("skrotverktyg");
- it('Expression interpretation should equal "skrotverktyg"', function () {
- result.expressions[0].interpretation.should.equal("skrotverktyg");
+ it('Expression processed should equal "skrotverktyg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("skrotverktyg");
});
});
@@ -755,8 +963,8 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("inbyggda");
- it('Expression interpretation should equal "inbygg"', function () {
- result.expressions[0].interpretation.should.equal("inbygg");
+ it('Expression processed should equal "inbygg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("inbygg");
});
});
@@ -765,31 +973,163 @@ describe('Word processor: Swedish stemmer', function () {
var result = thinker.find("inbyggd");
- it('Expression interpretation should equal "inbygg"', function () {
- result.expressions[0].interpretation.should.equal("inbygg");
+ it('Expression processed should equal "inbygg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("inbygg");
});
});
-
+
+ describe('Search for "inbyggda"', function () {
+
+ var result = thinker.find("inbyggda");
+
+ it('Expression processed should equal "inbygg"', function () {
+ result.expressions[0].interpretation.processed.should.equal("inbygg");
+ });
+
+ });
+
+ describe('Search for "hastighet"', function () {
+
+ var result = thinker.find("hastighet");
+
+ it('Expression processed should equal "hast"', function () {
+ result.expressions[0].interpretation.processed.should.equal("hast");
+ });
+
+ });
+
+ describe('Search for "hastighetens"', function () {
+
+ var result = thinker.find("hastighetens");
+
+ it('Expression processed should equal "hast"', function () {
+ result.expressions[0].interpretation.processed.should.equal("hast");
+ });
+
+ });
+
+ describe('Search for "hastigheter"', function () {
+
+ var result = thinker.find("hastigheter");
+
+ it('Expression processed should equal "hast"', function () {
+ result.expressions[0].interpretation.processed.should.equal("hast");
+ });
+
+ });
+
+ describe('Search for "hastigheternas"', function () {
+
+ var result = thinker.find("hastigheternas");
+
+ it('Expression processed should equal "hast"', function () {
+ result.expressions[0].interpretation.processed.should.equal("hast");
+ });
+
+ });
+
+
+ describe('Search for "hastigheterna"', function () {
+
+ var result = thinker.find("hastigheterna");
+
+ it('Expression processed should equal "hast"', function () {
+ result.expressions[0].interpretation.processed.should.equal("hast");
+ });
+
+ });
+
+
+ describe('Search for "bredband"', function () {
+
+ var result = thinker.find("bredband");
+
+ it('Expression processed should equal "bredb"', function () {
+ result.expressions[0].interpretation.processed.should.equal("bredb");
+ });
+
+ });
+
+ describe('Search for "bredbandet"', function () {
+
+ var result = thinker.find("bredbandet");
+
+ it('Expression processed should equal "bredb"', function () {
+ result.expressions[0].interpretation.processed.should.equal("bredb");
+ });
+
+ });
+
+ describe('Search for "bredbandens"', function () {
+
+ var result = thinker.find("bredbandens");
+
+ it('Expression processed should equal "bredb"', function () {
+ result.expressions[0].interpretation.processed.should.equal("bredb");
+ });
+
+ });
+
+ describe('Search for "bredbandets"', function () {
+
+ var result = thinker.find("bredbandets");
+
+ it('Expression processed should equal "bredb"', function () {
+ result.expressions[0].interpretation.processed.should.equal("bredb");
+ });
+
+ });
+
+ describe('Search for "sökmotorn"', function () {
+
+ var result = thinker.find("sökmotorn");
+
+ it('Expression processed should equal "sökmot"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sökmot");
+ });
+ });
+
+ describe('Search for "sökmotor"', function () {
+
+ var result = thinker.find("sökmotor");
+
+ it('Expression processed should equal "sökmot"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sökmot");
+ });
+
+ });
+
+ describe('Search for "sökmotorer"', function () {
+
+ var result = thinker.find("sökmotorer");
+
+ it('Expression processed should equal "sökmot"', function () {
+ result.expressions[0].interpretation.processed.should.equal("sökmot");
+ });
+
+ });
+
describe('Search for "antikviteten"', function () {
-
+
var result = thinker.find("antikviteten");
-
+
it('Should return one expression', function () {
result.expressions.length.should.equal(1);
});
-
- it('Expression interpretation should equal "antikv"', function () {
- result.expressions[0].interpretation.should.equal("antikv");
+
+ it('Expression processed should equal "antikv"', function () {
+ result.expressions[0].interpretation.processed.should.equal("antikv");
});
-
+
it('Should return one result (antikviteten, antivitet, antikvitets)', function () {
result.documents.length.should.equal(3);
});
-
+
it('All results should be a direct match', function () {
- result.documents[0].expressions[0].should.equal(2);
+ result.documents[0].expressions[0].should.equal(3);
});
+
});
});
@@ -813,7 +1153,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("considerable");
it('Should be interpreted as "considerable"', function () {
- result.expressions[0].interpretation.should.equal("considerable");
+ result.expressions[0].interpretation.processed.should.equal("considerable");
});
it('Should give one result"', function () {
@@ -826,7 +1166,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("considering");
it('Should be interpreted as "consid"', function () {
- result.expressions[0].interpretation.should.equal("consid");
+ result.expressions[0].interpretation.processed.should.equal("consid");
});
it('Should give one PARTIAL result"', function () {
@@ -840,7 +1180,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("consider");
it('Should be interpreted as "consid"', function () {
- result.expressions[0].interpretation.should.equal("consid");
+ result.expressions[0].interpretation.processed.should.equal("consid");
});
it('Should give one PARTIAL result"', function () {
@@ -854,7 +1194,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("triplicate");
it('Should be interpreted as "triplic"', function () {
- result.expressions[0].interpretation.should.equal("triplic");
+ result.expressions[0].interpretation.processed.should.equal("triplic");
});
});
@@ -863,7 +1203,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("dependent");
it('Should be interpreted as "depend"', function () {
- result.expressions[0].interpretation.should.equal("depend");
+ result.expressions[0].interpretation.processed.should.equal("depend");
});
});
@@ -872,7 +1212,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("probate");
it('Should be interpreted as "probat"', function () {
- result.expressions[0].interpretation.should.equal("probat");
+ result.expressions[0].interpretation.processed.should.equal("probat");
});
});
@@ -881,7 +1221,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("controllable");
it('Should be interpreted as "control"', function () {
- result.expressions[0].interpretation.should.equal("control");
+ result.expressions[0].interpretation.processed.should.equal("control");
});
});
@@ -890,7 +1230,7 @@ describe('Word processor: English stemmer', function () {
var result = thinker.find("rolling");
it('Should be interpreted as "roll"', function () {
- result.expressions[0].interpretation.should.equal("roll");
+ result.expressions[0].interpretation.processed.should.equal("roll");
});
});
@@ -908,16 +1248,15 @@ describe('Word processor: English soundex', function () {
thinker.ranker = ranker;
thinker.feed([
- [0,"This is a tile","This is a textual"],
- [1,"This is a tilly","This is a sexual"],
-
+ { id: 0, fields: ["This is a tile","This is a textual"] },
+ { id: 1, fields: ["This is a tilly","This is a sexual"] }
]);
describe('Search for "tile"', function () {
var result = thinker.find("tile");
it('Should be interpreted as "T400"', function () {
- result.expressions[0].interpretation.should.equal("T400");
+ result.expressions[0].interpretation.processed.should.equal("T400");
});
it('Should give two results', function () {
@@ -937,6 +1276,89 @@ describe('Word processor: English soundex', function () {
});
+describe('coalesceWords option', function () {
+
+ var thinker = Thinker({
+ coalesceWords: 3
+ });
+ var ranker = Thinker.rankers.standard();
+
+ thinker.ranker = ranker;
+
+ thinker.feed([
+ { id: 0, fields: ["This is a tile","This is a textual"] },
+ { id: 1, fields: ["This is a tilly","This is a sexual"] }
+ ]);
+
+ describe('Search for "isatextual"', function () {
+ var result = thinker.find("isatextual");
+
+ it('Should be interpreted as "isatextual"', function () {
+ result.expressions[0].interpretation.processed.should.equal("isatextual");
+ });
+
+ it('Should give one result', function () {
+ result.documents.length.should.equal(1);
+ });
+
+ });
+
+ describe('Search for "thisisatextual"', function () {
+ var result = thinker.find("thisisatextual");
+
+ it('Should give zero result', function () {
+ result.documents.length.should.equal(0);
+ });
+
+ });
+
+ describe('Search for "thisisa"', function () {
+ var result = thinker.find("thisisa");
+ console.log(result);
+ it('Should give two result', function () {
+ result.documents.length.should.equal(2);
+ });
+
+ });
+
+ describe('Search for "thisis"', function () {
+ var result = thinker.find("thisis");
+
+ it('Should give zero result', function () {
+ result.documents.length.should.equal(2);
+ });
+
+ });
+
+ describe('Search for "isa"', function () {
+ var result = thinker.find("isa");
+
+ it('Should give zero result', function () {
+ result.documents.length.should.equal(2);
+ });
+
+ });
+
+ describe('Search for "atextual"', function () {
+ var result = thinker.find("atextual");
+
+ it('Should give zero result', function () {
+ result.documents.length.should.equal(1);
+ });
+
+ });
+
+ describe('Search for "isa"', function () {
+ var result = thinker.find("isa");
+
+ it('Should give two result', function () {
+ result.documents.length.should.equal(2);
+ });
+
+ });
+
+});
+
describe('Field processor: HTML-Stripper', function () {
var thinker = Thinker({characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g});
@@ -948,7 +1370,7 @@ describe('Field processor: HTML-Stripper', function () {
// We need to make a copy of exampletexts, as feed consumes the object
var exampleHtml = [
- [0,"title","atitle
linktext
awordÅrsringar <innanför>
"]
+ { id: 0, fields: [ "title","atitle
linktext
awordÅrsringar <innanför> "] }
];
thinker.feed(exampleHtml);
@@ -1040,4 +1462,150 @@ describe('Field processor: HTML-Stripper', function () {
result.documents.length.should.equal(1);
});
});
+});
+
+describe('Filters', function () {
+ var thinker = Thinker({characters: /([a-zA-Z0-9åäöÅÄÖ]*)/g});
+ var ranker = Thinker.rankers.standard();
+ var stripHtml = Thinker.processors.stripHtml();
+
+ thinker.addFieldProcessor(stripHtml);
+ thinker.ranker = ranker;
+
+ // We need to make a copy of exampletexts, as feed consumes the object
+ var exampleHtml = [
+ { id: 0, metadata: { testfilterbool: true, testfilterstring: "adfa", testfilterarr: [1,4,5] }, fields: ["Detta är en text som innehåller apa"] },
+ { id: 1, metadata: { testfilterbool: false, testfilterstring: "asdf", testfilterarr: [2,5] }, fields: [ "Detta är en text som innehåller kamel"] },
+ { id: 2, metadata: { testfilterbool: false, testfilterstring: "asdf", testfilterarr: [] }, fields: [ "Detta är en text som innehåller kanel"] },
+ { id: 3, metadata: { testfilterbool: false, testfilterstring: "asd", testfilterarr: [5] }, fields: [ "Detta är en text som innehåller kanel"] },
+ ];
+
+ thinker.feed(exampleHtml);
+
+ describe('Search "apa"', function () {
+ var result = thinker.find( { expression: "apa", filter: () => true });
+
+ it('Should return one result', function () {
+ result.documents.length.should.equal(1);
+ });
+ });
+
+ describe('Search "text"', function () {
+ var result = thinker.find( { expression: "text", filter: () => true} );
+
+ it('Should return three result', function () {
+ result.documents.length.should.equal(4);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterbool: true"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => filterData.testfilterbool} );
+
+ it('Should return one result', function () {
+ result.documents.length.should.equal(1);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterbool: false"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => !filterData.testfilterbool} );
+
+ it('Should return three result', function () {
+ result.documents.length.should.equal(3);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterstring: asdf"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => filterData.testfilterstring === "asdf" } );
+ it('Should return two result', function () {
+ result.documents.length.should.equal(2);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterstring: adfa"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => filterData.testfilterstring === "adfa" } );
+ it('Should return one result', function () {
+ result.documents.length.should.equal(1);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterstring: fafa"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => filterData.testfilterstring === "fafa" } );
+ it('Should return zero result', function () {
+ result.documents.length.should.equal(0);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterarr has 5"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => ~filterData.testfilterarr.indexOf(5) } );
+ it('Should return three result', function () {
+ result.documents.length.should.equal(3);
+ });
+ });
+
+ describe('Search "text" with filter "testfilterarr has 5 && not testfilterbool"', function () {
+ var result = thinker.find( { expression: "text", filter: (filterData) => ~filterData.testfilterarr.indexOf(5) && !filterData.testfilterbool } );
+ it('Should return two result', function () {
+ result.documents.length.should.equal(2);
+ });
+ });
+
+ describe('Search exact ""text"" with filter "testfilterbool"', function () {
+ var result = thinker.find( { expression: "\"text\"", filter: (filterData) => filterData.testfilterbool } );
+ it('Should return one result', function () {
+ result.documents.length.should.equal(1);
+ });
+ });
+
+ describe('Search exact ""text"" with filter "!testfilterbool"', function () {
+ var result = thinker.find( { expression: "\"text\"", filter: (filterData) => !filterData.testfilterbool } );
+ it('Should return one result', function () {
+ result.documents.length.should.equal(3);
+ });
+ });
+
+ describe('Ranker: Sort by metadata parameter', function () {
+
+ var thinker = Thinker();
+
+ thinker.feed([
+ { id: 0, metadata: {a:2}, fields: ["This is a tile","This is a textual"] },
+ { id: 1, metadata: {a:1}, fields: ["This is a tilly","This is a sexual"] },
+ { id: 2, metadata: {a:3}, fields: ["This is a tilly","This is a usual"] },
+ { id: 3, metadata: {a:0}, fields: ["This is a tilly","This is a muse"] }
+ ]);
+
+ describe('Search for "tile"', function () {
+
+ var result = thinker.find({
+ expression: "this",
+ sortBy: "a",
+ direction: true
+ });
+
+ it('Should be interpreted as "this"', function () {
+ result.expressions[0].interpretation.processed.should.equal("this");
+ });
+
+ it('Should give four results', function () {
+ result.documents.length.should.equal(4);
+ });
+
+ it('First result should have id 2', function () {
+ result.documents[0].id.should.equal(2);
+ });
+
+ it('Second result should have id 0', function () {
+ result.documents[1].id.should.equal(0);
+ });
+
+ it('Third result should have id 1', function () {
+ result.documents[2].id.should.equal(1);
+ });
+
+ it('Fourth result should have id 3', function () {
+ result.documents[3].id.should.equal(3);
+ });
+ });
+
+ });
});
\ No newline at end of file