From 2ca9dfd8bc7e13ba65199e8de87853747bcd9bb8 Mon Sep 17 00:00:00 2001 From: Chase Date: Thu, 1 Mar 2018 21:04:17 -0500 Subject: [PATCH 1/5] first pass on import/export --- .gitignore | 2 + .rubocop.yml | 9 +-- lib/classifier-reborn/bayes.rb | 14 ++++ test/bayes/bayesian_test.rb | 141 +++++++++++++++++++++++++++++++++ test/fixtures/reference.yml | 47 +++++++++++ 5 files changed, 207 insertions(+), 6 deletions(-) create mode 100755 test/bayes/bayesian_test.rb create mode 100644 test/fixtures/reference.yml diff --git a/.gitignore b/.gitignore index 50bc8eb..640353c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ Gemfile.lock pkg + *.rdb docs/_site/ docs/.sass-cache/ docs/.jekyll-metadata +test/fixtures/export.yml diff --git a/.rubocop.yml b/.rubocop.yml index 1169d3d..f0aafa5 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,3 @@ -Style/IfUnlessModifier: - MaxLineLength: 150 - Metrics/LineLength: Max: 146 @@ -28,7 +25,7 @@ Metrics/ClassLength: SingleLineBlockParams: Enabled: false -Lint/Eval: +Security/Eval: Enabled: false Lint/AssignmentInCondition: @@ -37,10 +34,10 @@ Lint/AssignmentInCondition: SignalException: Enabled: false -Style/FileName: +Naming/FileName: Enabled: false -Style/MethodName: +Naming/MethodName: Enabled: false Lint/UnusedBlockArgument: diff --git a/lib/classifier-reborn/bayes.rb b/lib/classifier-reborn/bayes.rb index 157b40f..c87e5f3 100644 --- a/lib/classifier-reborn/bayes.rb +++ b/lib/classifier-reborn/bayes.rb @@ -3,6 +3,7 @@ # License:: LGPL require 'set' +require 'yaml' require_relative 'extensions/tokenizer/whitespace' require_relative 'extensions/token_filter/stopword' @@ -261,6 +262,19 @@ def reset populate_initial_categories end + # Read the yaml_data_file and populate the classifier + def self.import!(file_path) + data = File.read(file_path) + YAML.load(data) + end + + # Writes a file by calling #to_yaml on self + def export(file_path) + yaml = to_yaml + File.open(file_path, 'w') { |f| f.write(yaml) } + yaml + end + private def populate_initial_categories diff --git a/test/bayes/bayesian_test.rb b/test/bayes/bayesian_test.rb new file mode 100755 index 0000000..1b8c832 --- /dev/null +++ b/test/bayes/bayesian_test.rb @@ -0,0 +1,141 @@ +# encoding: utf-8 + +require File.dirname(__FILE__) + '/../test_helper' +class BayesianTest < Minitest::Test + def setup + @classifier = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting' + end + + def test_good_training + assert_equal ['love'], @classifier.train_interesting('love') + end + + def test_training_with_utf8 + assert_equal ['Água'], @classifier.train_interesting('Água') + end + + def test_stemming_enabled_by_default + assert @classifier.stemmer_enabled? + end + + def test_bad_training + assert_raises(StandardError) { @classifier.train_no_category 'words' } + end + + def test_bad_method + assert_raises(NoMethodError) { @classifier.forget_everything_you_know '' } + end + + def test_categories + assert_equal %w(Interesting Uninteresting).sort, @classifier.categories.sort + end + + def test_categories_from_array + another_classifier = ClassifierReborn::Bayes.new %w(Interesting Uninteresting) + assert_equal another_classifier.categories.sort, @classifier.categories.sort + end + + def test_add_category + @classifier.add_category 'Test' + assert_equal %w(Test Interesting Uninteresting).sort, @classifier.categories.sort + end + + def test_dynamic_category_succeeds_with_auto_categorize + classifier = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting', auto_categorize: true + classifier.train('Ruby', 'I really sweet language') + assert classifier.categories.include?('Ruby') + end + + def test_dynamic_category_fails_without_auto_categorize + assert_raises(ClassifierReborn::Bayes::CategoryNotFoundError) do + @classifier.train('Ruby', 'A really sweet language') + end + refute @classifier.categories.include?('Ruby') + end + + def test_classification + @classifier.train_interesting 'here are some good words. I hope you love them' + @classifier.train_uninteresting 'here are some bad words, I hate you' + assert_equal 'Uninteresting', @classifier.classify('I hate bad words and you') + end + + def test_classification_with_threshold + b = ClassifierReborn::Bayes.new 'Digit' + assert_equal 1, b.categories.size + + refute b.threshold_enabled? + b.enable_threshold + assert b.threshold_enabled? + assert_equal 0.0, b.threshold # default + + b.threshold = -7.0 + + 10.times do |a_number| + b.train_digit(a_number.to_s) + b.train_digit(a_number.to_s) + end + + 10.times do |a_number| + assert_equal 'Digit', b.classify(a_number.to_s) + end + + refute b.classify('xyzzy') + end + + def test_classification_with_threshold_again + b = ClassifierReborn::Bayes.new 'Normal' + assert_equal 1, b.categories.size + + refute b.threshold_enabled? + b.enable_threshold + assert b.threshold_enabled? + assert_equal 0.0, b.threshold # default + + %w( + http://example.com/about + http://example.com/contact + http://example.com/download + http://example.com/login + http://example.com/logout + http://example.com/blog/2015-04-01 + ).each do |url| + b.train_normal(url) + end + + assert 'Normal', b.classify('http://example.com') + refute b.classify("http://example.com/login/?user='select * from users;'") + end + + def test_classification_with_score + @classifier.train_interesting 'here are some good words. I hope you love them' + @classifier.train_uninteresting 'here are some bad words, I hate you' + assert_in_delta(-4.85, @classifier.classify_with_score('I hate bad words and you')[1], 0.1) + end + + def test_untrain + @classifier.train_interesting 'here are some good words. I hope you love them' + @classifier.train_uninteresting 'here are some bad words, I hate you' + @classifier.add_category 'colors' + @classifier.train_colors 'red orange green blue seven' + classification_of_bad_data = @classifier.classify 'seven' + @classifier.untrain_colors 'seven' + classification_after_untrain = @classifier.classify 'seven' + refute_equal classification_of_bad_data, classification_after_untrain + end + + def test_export + @classifier.train_interesting %"Dutch painting of the Golden Age is included in the general European + period of Baroque painting, and often shows many of its characteristics + most lacks the idealization" + @classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are + estimated to constitute forty percent of the land area of the Earth" + yaml = @classifier.export('test/fixtures/export.yml') + reference_file = File.read('test/fixtures/reference.yml') + assert_equal(yaml, reference_file) + end + + def test_import + classifier = ClassifierReborn::Bayes.import!('test/fixtures/reference.yml') + assert_equal('Interesting', classifier.classify('Dutch painting of the Golden Age')) + end +end diff --git a/test/fixtures/reference.yml b/test/fixtures/reference.yml new file mode 100644 index 0000000..1519826 --- /dev/null +++ b/test/fixtures/reference.yml @@ -0,0 +1,47 @@ +--- !ruby/object:ClassifierReborn::Bayes +categories: + :Interesting: + :dutch: 1 + :paint: 2 + :golden: 1 + :ag: 1 + :includ: 1 + :gener: 1 + :european: 1 + :period: 1 + :baroqu: 1 + :often: 1 + :show: 1 + :mani: 1 + :it: 1 + :characterist: 1 + :lack: 1 + :ideal: 1 + :,: 1 + :Uninteresting: + :grassland: 1 + :such: 1 + :savannah: 1 + :prairi: 1 + :where: 1 + :grass: 1 + :domin: 1 + :estim: 1 + :constitut: 1 + :forti: 1 + :percent: 1 + :land: 1 + :area: 1 + :earth: 1 +total_words: 32 +category_counts: + :Interesting: 1 + :Uninteresting: 1 +category_word_count: + :Interesting: 18 + :Uninteresting: 14 +language: en +auto_categorize: false +enable_threshold: false +threshold: 0.0 +enable_stemmer: true From 74fd30ddb59c1ebccf16d1f5f5b9ffd437290206 Mon Sep 17 00:00:00 2001 From: Chase Date: Thu, 1 Mar 2018 21:34:06 -0500 Subject: [PATCH 2/5] make import/export work with simple hashes --- lib/classifier-reborn/bayes.rb | 33 +++++++++++++++++++++++---------- test/bayes/bayesian_test.rb | 10 ++++++---- test/fixtures/reference.yml | 20 ++++++++++---------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/lib/classifier-reborn/bayes.rb b/lib/classifier-reborn/bayes.rb index c87e5f3..93cd3cd 100644 --- a/lib/classifier-reborn/bayes.rb +++ b/lib/classifier-reborn/bayes.rb @@ -3,7 +3,6 @@ # License:: LGPL require 'set' -require 'yaml' require_relative 'extensions/tokenizer/whitespace' require_relative 'extensions/token_filter/stopword' @@ -262,17 +261,31 @@ def reset populate_initial_categories end - # Read the yaml_data_file and populate the classifier - def self.import!(file_path) - data = File.read(file_path) - YAML.load(data) + # Read the data and populate the backend in use + def import!(data) + @auto_categorize = data[:auto_categorize] + @categories = data[:categories] + @category_counts = data[:category_counts] + @category_word_count = data[:category_word_count] + @enable_stemmer = data[:enable_stemmer] + @enable_threshold = data[:enable_threshold] + @language = data[:language] + @threshold = data[:threshold] + @total_words = data[:total_words] end - # Writes a file by calling #to_yaml on self - def export(file_path) - yaml = to_yaml - File.open(file_path, 'w') { |f| f.write(yaml) } - yaml + def export + { + auto_categorize: @auto_categorize, + categories: @categories, + category_counts: @category_counts, + category_word_count: @category_word_count, + enable_stemmer: @enable_stemmer, + enable_threshold: @enable_threshold, + language: @language, + threshold: @threshold, + total_words: @total_words + } end private diff --git a/test/bayes/bayesian_test.rb b/test/bayes/bayesian_test.rb index 1b8c832..ab09236 100755 --- a/test/bayes/bayesian_test.rb +++ b/test/bayes/bayesian_test.rb @@ -129,13 +129,15 @@ def test_export most lacks the idealization" @classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are estimated to constitute forty percent of the land area of the Earth" - yaml = @classifier.export('test/fixtures/export.yml') - reference_file = File.read('test/fixtures/reference.yml') - assert_equal(yaml, reference_file) + exported_data = @classifier.export + reference_data = YAML.load(File.read('test/fixtures/reference.yml')) + assert_equal(exported_data, reference_data) end def test_import - classifier = ClassifierReborn::Bayes.import!('test/fixtures/reference.yml') + classifier = ClassifierReborn::Bayes.new + reference_data = YAML.load(File.read('test/fixtures/reference.yml')) + classifier.import!(reference_data) assert_equal('Interesting', classifier.classify('Dutch painting of the Golden Age')) end end diff --git a/test/fixtures/reference.yml b/test/fixtures/reference.yml index 1519826..9ed3c40 100644 --- a/test/fixtures/reference.yml +++ b/test/fixtures/reference.yml @@ -1,5 +1,6 @@ ---- !ruby/object:ClassifierReborn::Bayes -categories: +--- +:auto_categorize: false +:categories: :Interesting: :dutch: 1 :paint: 2 @@ -33,15 +34,14 @@ categories: :land: 1 :area: 1 :earth: 1 -total_words: 32 -category_counts: +:category_counts: :Interesting: 1 :Uninteresting: 1 -category_word_count: +:category_word_count: :Interesting: 18 :Uninteresting: 14 -language: en -auto_categorize: false -enable_threshold: false -threshold: 0.0 -enable_stemmer: true +:enable_stemmer: true +:enable_threshold: false +:language: en +:threshold: 0.0 +:total_words: 32 From 9524dc6b055e49c05c28b131b198d3f2de48bde5 Mon Sep 17 00:00:00 2001 From: Chase Date: Mon, 5 Mar 2018 23:03:35 -0500 Subject: [PATCH 3/5] move tests --- test/bayes/bayesian_common_tests.rb | 18 ++++ test/bayes/bayesian_test.rb | 143 ---------------------------- 2 files changed, 18 insertions(+), 143 deletions(-) delete mode 100755 test/bayes/bayesian_test.rb diff --git a/test/bayes/bayesian_common_tests.rb b/test/bayes/bayesian_common_tests.rb index 902ba92..09919dd 100644 --- a/test/bayes/bayesian_common_tests.rb +++ b/test/bayes/bayesian_common_tests.rb @@ -191,6 +191,24 @@ def test_reset assert classifier.categories.empty? end + def test_export + @classifier.train_interesting %"Dutch painting of the Golden Age is included in the general European + period of Baroque painting, and often shows many of its characteristics + most lacks the idealization" + @classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are + estimated to constitute forty percent of the land area of the Earth" + exported_data = @classifier.export + reference_data = YAML.load(File.read('test/fixtures/reference.yml')) + assert_equal(exported_data, reference_data) + end + + def test_import + classifier = ClassifierReborn::Bayes.new + reference_data = YAML.load(File.read('test/fixtures/reference.yml')) + classifier.import!(reference_data) + assert_equal('Interesting', classifier.classify('Dutch painting of the Golden Age')) + end + private def another_classifier diff --git a/test/bayes/bayesian_test.rb b/test/bayes/bayesian_test.rb deleted file mode 100755 index ab09236..0000000 --- a/test/bayes/bayesian_test.rb +++ /dev/null @@ -1,143 +0,0 @@ -# encoding: utf-8 - -require File.dirname(__FILE__) + '/../test_helper' -class BayesianTest < Minitest::Test - def setup - @classifier = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting' - end - - def test_good_training - assert_equal ['love'], @classifier.train_interesting('love') - end - - def test_training_with_utf8 - assert_equal ['Água'], @classifier.train_interesting('Água') - end - - def test_stemming_enabled_by_default - assert @classifier.stemmer_enabled? - end - - def test_bad_training - assert_raises(StandardError) { @classifier.train_no_category 'words' } - end - - def test_bad_method - assert_raises(NoMethodError) { @classifier.forget_everything_you_know '' } - end - - def test_categories - assert_equal %w(Interesting Uninteresting).sort, @classifier.categories.sort - end - - def test_categories_from_array - another_classifier = ClassifierReborn::Bayes.new %w(Interesting Uninteresting) - assert_equal another_classifier.categories.sort, @classifier.categories.sort - end - - def test_add_category - @classifier.add_category 'Test' - assert_equal %w(Test Interesting Uninteresting).sort, @classifier.categories.sort - end - - def test_dynamic_category_succeeds_with_auto_categorize - classifier = ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting', auto_categorize: true - classifier.train('Ruby', 'I really sweet language') - assert classifier.categories.include?('Ruby') - end - - def test_dynamic_category_fails_without_auto_categorize - assert_raises(ClassifierReborn::Bayes::CategoryNotFoundError) do - @classifier.train('Ruby', 'A really sweet language') - end - refute @classifier.categories.include?('Ruby') - end - - def test_classification - @classifier.train_interesting 'here are some good words. I hope you love them' - @classifier.train_uninteresting 'here are some bad words, I hate you' - assert_equal 'Uninteresting', @classifier.classify('I hate bad words and you') - end - - def test_classification_with_threshold - b = ClassifierReborn::Bayes.new 'Digit' - assert_equal 1, b.categories.size - - refute b.threshold_enabled? - b.enable_threshold - assert b.threshold_enabled? - assert_equal 0.0, b.threshold # default - - b.threshold = -7.0 - - 10.times do |a_number| - b.train_digit(a_number.to_s) - b.train_digit(a_number.to_s) - end - - 10.times do |a_number| - assert_equal 'Digit', b.classify(a_number.to_s) - end - - refute b.classify('xyzzy') - end - - def test_classification_with_threshold_again - b = ClassifierReborn::Bayes.new 'Normal' - assert_equal 1, b.categories.size - - refute b.threshold_enabled? - b.enable_threshold - assert b.threshold_enabled? - assert_equal 0.0, b.threshold # default - - %w( - http://example.com/about - http://example.com/contact - http://example.com/download - http://example.com/login - http://example.com/logout - http://example.com/blog/2015-04-01 - ).each do |url| - b.train_normal(url) - end - - assert 'Normal', b.classify('http://example.com') - refute b.classify("http://example.com/login/?user='select * from users;'") - end - - def test_classification_with_score - @classifier.train_interesting 'here are some good words. I hope you love them' - @classifier.train_uninteresting 'here are some bad words, I hate you' - assert_in_delta(-4.85, @classifier.classify_with_score('I hate bad words and you')[1], 0.1) - end - - def test_untrain - @classifier.train_interesting 'here are some good words. I hope you love them' - @classifier.train_uninteresting 'here are some bad words, I hate you' - @classifier.add_category 'colors' - @classifier.train_colors 'red orange green blue seven' - classification_of_bad_data = @classifier.classify 'seven' - @classifier.untrain_colors 'seven' - classification_after_untrain = @classifier.classify 'seven' - refute_equal classification_of_bad_data, classification_after_untrain - end - - def test_export - @classifier.train_interesting %"Dutch painting of the Golden Age is included in the general European - period of Baroque painting, and often shows many of its characteristics - most lacks the idealization" - @classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are - estimated to constitute forty percent of the land area of the Earth" - exported_data = @classifier.export - reference_data = YAML.load(File.read('test/fixtures/reference.yml')) - assert_equal(exported_data, reference_data) - end - - def test_import - classifier = ClassifierReborn::Bayes.new - reference_data = YAML.load(File.read('test/fixtures/reference.yml')) - classifier.import!(reference_data) - assert_equal('Interesting', classifier.classify('Dutch painting of the Golden Age')) - end -end From da7beb9c9ebf6ee324dfedcabbbd5f7d49c22853 Mon Sep 17 00:00:00 2001 From: Chase Date: Mon, 5 Mar 2018 23:05:12 -0500 Subject: [PATCH 4/5] missing import --- test/bayes/bayesian_common_tests.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/bayes/bayesian_common_tests.rb b/test/bayes/bayesian_common_tests.rb index 09919dd..3373f17 100644 --- a/test/bayes/bayesian_common_tests.rb +++ b/test/bayes/bayesian_common_tests.rb @@ -1,4 +1,5 @@ # encoding: utf-8 +require 'yaml' module BayesianCommonTests def test_good_training From 36d0532497a1a1c9b5b721740643e49e39c3c639 Mon Sep 17 00:00:00 2001 From: Chase Date: Tue, 6 Mar 2018 00:25:54 -0500 Subject: [PATCH 5/5] working backend import/exp for in mem only --- .../backends/bayes_memory_backend.rb | 3 +++ .../backends/bayes_redis_backend.rb | 2 ++ .../backends/data_handler.rb | 22 +++++++++++++++++++ lib/classifier-reborn/bayes.rb | 17 +++++--------- test/bayes/bayesian_common_tests.rb | 9 ++++---- test/fixtures/reference.yml | 20 +++++++++-------- 6 files changed, 48 insertions(+), 25 deletions(-) create mode 100644 lib/classifier-reborn/backends/data_handler.rb diff --git a/lib/classifier-reborn/backends/bayes_memory_backend.rb b/lib/classifier-reborn/backends/bayes_memory_backend.rb index 3fba151..6f516ed 100644 --- a/lib/classifier-reborn/backends/bayes_memory_backend.rb +++ b/lib/classifier-reborn/backends/bayes_memory_backend.rb @@ -1,6 +1,9 @@ +require_relative 'data_handler' + module ClassifierReborn class BayesMemoryBackend attr_reader :total_words, :total_trainings + include DataHandler # This class provides Memory as the storage backend for the classifier data structures def initialize diff --git a/lib/classifier-reborn/backends/bayes_redis_backend.rb b/lib/classifier-reborn/backends/bayes_redis_backend.rb index 758c3f7..7c66940 100644 --- a/lib/classifier-reborn/backends/bayes_redis_backend.rb +++ b/lib/classifier-reborn/backends/bayes_redis_backend.rb @@ -1,3 +1,4 @@ +require_relative 'data_handler' require_relative 'no_redis_error' # require redis when we run #intialize. This way only people using this backend # will need to install and load the backend without having to @@ -6,6 +7,7 @@ module ClassifierReborn # This class provides Redis as the storage backend for the classifier data structures class BayesRedisBackend + include DataHandler # The class can be created with the same arguments that the redis gem accepts # E.g., # b = ClassifierReborn::BayesRedisBackend.new diff --git a/lib/classifier-reborn/backends/data_handler.rb b/lib/classifier-reborn/backends/data_handler.rb new file mode 100644 index 0000000..01b589c --- /dev/null +++ b/lib/classifier-reborn/backends/data_handler.rb @@ -0,0 +1,22 @@ +module DataHandler + # Read the data and populate the backend in use + def import!(data) + data[:categories].keys.each { |category| add_category(category) } + categories = data[:categories] + categories.each_key do |category| + categories[category].each do |word, diff| + update_category_word_frequency(category, word, diff) + end + end + update_total_words(data[:total_words]) + end + + def export + { + categories: @categories, + category_counts: @category_counts, + category_word_count: @category_word_count, + total_words: @total_words + } + end +end diff --git a/lib/classifier-reborn/bayes.rb b/lib/classifier-reborn/bayes.rb index 93cd3cd..9ce78bf 100644 --- a/lib/classifier-reborn/bayes.rb +++ b/lib/classifier-reborn/bayes.rb @@ -1,7 +1,6 @@ # Author:: Lucas Carlson (mailto:lucas@rufy.com) # Copyright:: Copyright (c) 2005 Lucas Carlson # License:: LGPL - require 'set' require_relative 'extensions/tokenizer/whitespace' @@ -261,31 +260,25 @@ def reset populate_initial_categories end - # Read the data and populate the backend in use def import!(data) @auto_categorize = data[:auto_categorize] - @categories = data[:categories] - @category_counts = data[:category_counts] - @category_word_count = data[:category_word_count] @enable_stemmer = data[:enable_stemmer] @enable_threshold = data[:enable_threshold] + @initial_categories = data[:categories].keys.map(&:to_s) @language = data[:language] @threshold = data[:threshold] - @total_words = data[:total_words] + @backend.import!(data) end def export + backend_data = @backend.export { auto_categorize: @auto_categorize, - categories: @categories, - category_counts: @category_counts, - category_word_count: @category_word_count, enable_stemmer: @enable_stemmer, enable_threshold: @enable_threshold, language: @language, - threshold: @threshold, - total_words: @total_words - } + threshold: @threshold + }.merge(backend_data) end private diff --git a/test/bayes/bayesian_common_tests.rb b/test/bayes/bayesian_common_tests.rb index 3373f17..5777ec5 100644 --- a/test/bayes/bayesian_common_tests.rb +++ b/test/bayes/bayesian_common_tests.rb @@ -193,18 +193,19 @@ def test_reset end def test_export - @classifier.train_interesting %"Dutch painting of the Golden Age is included in the general European + classifier = another_classifier + classifier.train_interesting %"Dutch painting of the Golden Age is included in the general European period of Baroque painting, and often shows many of its characteristics most lacks the idealization" - @classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are + classifier.train_uninteresting %"Grasslands such as savannah and prairie where grasses are dominant are estimated to constitute forty percent of the land area of the Earth" - exported_data = @classifier.export + exported_data = classifier.export reference_data = YAML.load(File.read('test/fixtures/reference.yml')) assert_equal(exported_data, reference_data) end def test_import - classifier = ClassifierReborn::Bayes.new + classifier = ClassifierReborn::Bayes.new backend: @alternate_backend reference_data = YAML.load(File.read('test/fixtures/reference.yml')) classifier.import!(reference_data) assert_equal('Interesting', classifier.classify('Dutch painting of the Golden Age')) diff --git a/test/fixtures/reference.yml b/test/fixtures/reference.yml index 9ed3c40..44ffe4b 100644 --- a/test/fixtures/reference.yml +++ b/test/fixtures/reference.yml @@ -1,5 +1,9 @@ --- :auto_categorize: false +:enable_stemmer: true +:enable_threshold: false +:language: en +:threshold: 0.0 :categories: :Interesting: :dutch: 1 @@ -35,13 +39,11 @@ :area: 1 :earth: 1 :category_counts: - :Interesting: 1 - :Uninteresting: 1 -:category_word_count: - :Interesting: 18 - :Uninteresting: 14 -:enable_stemmer: true -:enable_threshold: false -:language: en -:threshold: 0.0 + :Interesting: + :training: 1 + :word: 18 + :Uninteresting: + :training: 1 + :word: 14 +:category_word_count: :total_words: 32