VowpalWabbit · bassmang · Sep 8, 2023 · Aug 31, 2023 · Aug 31, 2023 · Sep 1, 2023
diff --git a/cs/unittest/RunTests.tt b/cs/unittest/RunTests.tt
@@ -40,7 +40,8 @@ var skipList = new[] { 13, 32, 39, 258, 40, 259, 41, 260, 59, 60, 61, 66, 68, 90
     256, 299, 300, 306, 310, 311, 327, 328, 329, 330, 331, 367, 368, 396, 397, 398, // DSJSON not supported
     383, 389, 390, 391, 392, 393, // no data file
     400, 404, // positional args
-    405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462 // DSJSON not supported
+    405, 406, 407, 411, 415, 417, 456, 457, 458, 459, 460, 461, 462, // DSJSON not supported
+    464 // Empty lines not supported
     };
 
 var outputModels = new Dictionary<string, TestCase>();

diff --git a/test/core.vwtest.json b/test/core.vwtest.json
@@ -6004,5 +6004,18 @@
     "input_files": [
       "train-sets/automl_spin_off.txt"
     ]
+  },
+  {
+    "id": 464,
+    "desc": "Ignore empty lines on single-examples",
+    "vw_command": "-d train-sets/single_empty_lines.txt -p single_empty_lines.predict",
+    "diff_files": {
+      "stderr": "test-sets/ref/single_empty_lines.stderr",
+      "single_empty_lines.predict": "pred-sets/ref/single_empty_lines.predict",
+      "stdout": "test-sets/ref/single_empty_lines.stdout"
+    },
+    "input_files": [
+      "train-sets/single_empty_lines.txt"
+    ]
   }
 ]
diff --git a/test/pred-sets/ref/single_empty_lines.predict b/test/pred-sets/ref/single_empty_lines.predict
@@ -0,0 +1 @@
+0
diff --git a/test/test-sets/ref/single_empty_lines.stderr b/test/test-sets/ref/single_empty_lines.stderr
@@ -0,0 +1,23 @@
+predictions = single_empty_lines.predict
+using no cache
+Reading datafile = train-sets/single_empty_lines.txt
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+Enabled learners: gd, scorer-identity, count_label
+Input label = SIMPLE
+Output pred = SCALAR
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+1.000000 1.000000            1            1.0         1.0000         0.0000        2
+
+finished run
+number of examples = 1
+weighted example sum = 1.000000
+weighted label sum = 1.000000
+average loss = 1.000000
+best constant = 1.000000
+best constant's loss = 0.000000
+total feature number = 2
diff --git a/test/test-sets/ref/single_empty_lines.stdout b/test/test-sets/ref/single_empty_lines.stdout
diff --git a/test/train-sets/ref/empty-set.stderr b/test/train-sets/ref/empty-set.stderr
@@ -10,11 +10,10 @@ Input label = SIMPLE
 Output pred = SCALAR
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-n.a.     n.a.                1            1.0        unknown         0.0000        1
 
 finished run
-number of examples = 1
-weighted example sum = 1.000000
+number of examples = 0
+weighted example sum = 0.000000
 weighted label sum = 0.000000
 average loss = n.a.
-total feature number = 1
+total feature number = 0
diff --git a/test/train-sets/ref/topk-train.stderr b/test/train-sets/ref/topk-train.stderr
@@ -15,21 +15,20 @@ average  since         example        example        current        current  cur
 loss     last          counter         weight          label        predict features
 9.000000 9.000000            1            1.0         3.0000         0.0000        4
 4.590362 0.180723            2            2.0         0.0000         0.4251        4
-3.928039 2.603395            4            4.0        unknown         0.2876        1
-3.523584 3.119128            8            8.0        unknown         0.4184        1
-2.610412 1.697241           16           16.0        unknown         0.6151        1
-1.917275 1.224138           32           32.0        unknown         0.7335        1
-1.246961 0.576646           64           64.0        unknown         0.8100        1
-0.784439 0.321916          128          128.0        unknown         0.8650        1
-0.439552 0.094665          256          256.0        unknown         0.9058        1
-0.226776 0.014000          512          512.0        unknown         0.9328        1
-0.113599 0.000422         1024         1024.0        unknown         0.9396        1
+3.008577 1.426792            4            4.0         0.0000         0.5002        4
+2.893238 2.777898            8            8.0         1.0000         0.7497        4
+2.321989 1.750740           16           16.0         2.0000         1.5635        4
+1.640977 0.959966           32           32.0         3.0000         1.4030        4
+1.041363 0.441749           64           64.0         3.0000         2.2510        4
+0.623755 0.206147          128          128.0         0.0000         0.4018        4
+0.336533 0.049310          256          256.0         0.0000         0.1610        4
+0.170349 0.004165          512          512.0         1.0000         1.0024        4
 
 finished run
-number of examples per pass = 12
+number of examples per pass = 9
 passes used = 100
-weighted example sum = 1200.000000
+weighted example sum = 900.000000
 weighted label sum = 1500.000000
 average loss = 0.096938
 best constant = 1.666667
-total feature number = 3900
+total feature number = 3600
diff --git a/test/train-sets/single_empty_lines.txt b/test/train-sets/single_empty_lines.txt
@@ -0,0 +1,9 @@
+1 | x:1
+
+
+
+
+
+
+
+
diff --git a/vowpalwabbit/core/src/learner.cc b/vowpalwabbit/core/src/learner.cc
@@ -145,6 +145,7 @@ class single_example_handler
     }
     else if (ec->end_pass) { _context.template process<example, end_pass>(*ec); }
     else if (is_save_cmd(ec)) { _context.template process<example, save>(*ec); }
+    else if (ec->is_newline) { VW::finish_example(_context.get_master(), *ec); }
     else { _context.template process<example, learn_ex>(*ec); }
   }