diff --git a/lib/classifier-reborn/extensions/hasher.rb b/lib/classifier-reborn/extensions/hasher.rb index c1bf1de..a6b4c5f 100644 --- a/lib/classifier-reborn/extensions/hasher.rb +++ b/lib/classifier-reborn/extensions/hasher.rb @@ -27,7 +27,7 @@ def clean_word_hash(str, language = 'en', enable_stemmer = true) def word_hash_for_words(words, language = 'en', enable_stemmer = true) d = Hash.new(0) words.each do |word| - next unless word.length > 2 && !STOPWORDS[language].include?(word) + next unless word.length > 0 && !STOPWORDS[language].include?(word) if enable_stemmer d[word.stem.intern] += 1 else diff --git a/test/bayes/bayesian_common_tests.rb b/test/bayes/bayesian_common_tests.rb index 902ba92..c007e3f 100644 --- a/test/bayes/bayesian_common_tests.rb +++ b/test/bayes/bayesian_common_tests.rb @@ -139,10 +139,10 @@ def test_skip_empty_training_and_classification classifier.train('Ruby', '') assert classifier.categories.empty? classifier.train('Ruby', 'To be or not to be') - assert classifier.categories.empty? + refute classifier.categories.empty? classifier.train('Ruby', 'A really sweet language') refute classifier.categories.empty? - assert_equal Float::INFINITY, classifier.classify_with_score('To be or not to be')[1] + assert_equal Float::INFINITY, classifier.classify_with_score('')[1] end def test_empty_string_stopwords diff --git a/test/extensions/hasher_test.rb b/test/extensions/hasher_test.rb index 336a8b7..c362d41 100644 --- a/test/extensions/hasher_test.rb +++ b/test/extensions/hasher_test.rb @@ -56,7 +56,7 @@ def test_add_custom_stopword_path temp_stopwords_name = File.basename(temp_stopwords.path) Hasher.add_custom_stopword_path(temp_stopwords_path) - hash = { list: 1, cool: 1 } + hash = {:is=>1, :a=>1, :list=>1, :of=>1, :cool=>1} assert_equal hash, Hasher.clean_word_hash("this is a list of cool words!", temp_stopwords_name) end