Add some doc on the early_dropout

stanfordnlp · Dec 10, 2024 · 8ba68db · 8ba68db
1 parent 3fb4f68
commit 8ba68db
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/stanza/models/constituency_parser.py b/stanza/models/constituency_parser.py
@@ -562,6 +562,13 @@ def build_argparse():
     parser.add_argument('--loss', default='cross', help='cross, large_margin, or focal.  Focal requires `pip install focal_loss_torch`')
     parser.add_argument('--loss_focal_gamma', default=2, type=float, help='gamma value for a focal loss')
 
+    # turn off dropout for word_dropout, predict_dropout, and lstm_input_dropout
+    # this mechanism doesn't actually turn off lstm_layer_dropout (yet)
+    # but that is set to a default of 0 anyway
+    # this is reusing the idea presented in
+    # https://arxiv.org/pdf/2303.01500v2
+    # "Dropout Reduces Underfitting"
+    # Zhuang Liu, Zhiqiu Xu, Joseph Jin, Zhiqiang Shen, Trevor Darrell
     parser.add_argument('--early_dropout', default=-1, type=int, help='When to turn off dropout')
     # When using word_dropout and predict_dropout in conjunction with relu, one particular experiment produced the following dev scores after 300 iterations:
     # 0.0: 0.9085