Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Skip of newline for single-examples #4636

Merged
merged 4 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cs/unittest/RunTests.tt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ var skipList = new[] { 13, 32, 39, 258, 40, 259, 41, 260, 59, 60, 61, 66, 68, 90
256, 299, 300, 306, 310, 311, 327, 328, 329, 330, 331, 367, 368, 396, 397, 398, // DSJSON not supported
383, 389, 390, 391, 392, 393, // no data file
400, 404, // positional args
405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462 // DSJSON not supported
405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462, // DSJSON not supported
464 // Empty lines not supported
};

var outputModels = new Dictionary<string, TestCase>();
Expand Down
13 changes: 13 additions & 0 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6004,5 +6004,18 @@
"input_files": [
"train-sets/automl_spin_off.txt"
]
},
{
"id": 464,
"desc": "Ignore empty lines on single-examples",
"vw_command": "-d train-sets/single_empty_lines.txt -p single_empty_lines.predict",
"diff_files": {
"stderr": "test-sets/ref/single_empty_lines.stderr",
"single_empty_lines.predict": "pred-sets/ref/single_empty_lines.predict",
"stdout": "test-sets/ref/single_empty_lines.stdout"
},
"input_files": [
"train-sets/single_empty_lines.txt"
]
}
]
1 change: 1 addition & 0 deletions test/pred-sets/ref/single_empty_lines.predict
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
23 changes: 23 additions & 0 deletions test/test-sets/ref/single_empty_lines.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
predictions = single_empty_lines.predict
using no cache
Reading datafile = train-sets/single_empty_lines.txt
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
Enabled learners: gd, scorer-identity, count_label
Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
1.000000 1.000000 1 1.0 1.0000 0.0000 2

finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 1.000000
average loss = 1.000000
best constant = 1.000000
best constant's loss = 0.000000
total feature number = 2
Empty file.
7 changes: 3 additions & 4 deletions test/train-sets/ref/empty-set.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ Input label = SIMPLE
Output pred = SCALAR
average since example example current current current
loss last counter weight label predict features
n.a. n.a. 1 1.0 unknown 0.0000 1

finished run
number of examples = 1
weighted example sum = 1.000000
number of examples = 0
weighted example sum = 0.000000
weighted label sum = 0.000000
average loss = n.a.
total feature number = 1
total feature number = 0
23 changes: 11 additions & 12 deletions test/train-sets/ref/topk-train.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,20 @@ average since example example current current cur
loss last counter weight label predict features
9.000000 9.000000 1 1.0 3.0000 0.0000 4
4.590362 0.180723 2 2.0 0.0000 0.4251 4
3.928039 2.603395 4 4.0 unknown 0.2876 1
3.523584 3.119128 8 8.0 unknown 0.4184 1
2.610412 1.697241 16 16.0 unknown 0.6151 1
1.917275 1.224138 32 32.0 unknown 0.7335 1
1.246961 0.576646 64 64.0 unknown 0.8100 1
0.784439 0.321916 128 128.0 unknown 0.8650 1
0.439552 0.094665 256 256.0 unknown 0.9058 1
0.226776 0.014000 512 512.0 unknown 0.9328 1
0.113599 0.000422 1024 1024.0 unknown 0.9396 1
3.008577 1.426792 4 4.0 0.0000 0.5002 4
2.893238 2.777898 8 8.0 1.0000 0.7497 4
2.321989 1.750740 16 16.0 2.0000 1.5635 4
1.640977 0.959966 32 32.0 3.0000 1.4030 4
1.041363 0.441749 64 64.0 3.0000 2.2510 4
0.623755 0.206147 128 128.0 0.0000 0.4018 4
0.336533 0.049310 256 256.0 0.0000 0.1610 4
0.170349 0.004165 512 512.0 1.0000 1.0024 4

finished run
number of examples per pass = 12
number of examples per pass = 9
passes used = 100
weighted example sum = 1200.000000
weighted example sum = 900.000000
weighted label sum = 1500.000000
average loss = 0.096938
best constant = 1.666667
total feature number = 3900
total feature number = 3600
9 changes: 9 additions & 0 deletions test/train-sets/single_empty_lines.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
1 | x:1








1 change: 1 addition & 0 deletions vowpalwabbit/core/src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ class single_example_handler
}
else if (ec->end_pass) { _context.template process<example, end_pass>(*ec); }
else if (is_save_cmd(ec)) { _context.template process<example, save>(*ec); }
else if (ec->is_newline) { VW::finish_example(_context.get_master(), *ec); }
else { _context.template process<example, learn_ex>(*ec); }
}

Expand Down
Loading