-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnlpwdl-bibliography.bib
835 lines (718 loc) · 31.6 KB
/
nlpwdl-bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
@inproceedings{Wang.et.al.2019.NeurIPS,
address = {Vancouver, Canada},
author = {Wang, Alex and Pruksachatkun, Yada and Nangia, Nikita and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},
booktitle = {Proceedings of the 33rd International Conference on Neural Information Processing Systems},
pages = {3266--3280},
publisher = {Curran Associates, Inc.},
title = {{SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems}},
year = {2019}
}
@inproceedings{Levesque.et.al.2012,
address = {Rome, Italy},
author = {Levesque, Hector J. and Davis, Ernest and Morgenstern, Leora},
booktitle = {Proceedings of the Thirteenth International Conference on Principles of Knowledge Representation and Reasoning},
pages = {552--561},
publisher = {Association for the Advancement of Artificial Intelligence},
title = {{The Winograd Schema Challenge}},
year = {2012}
}
@article{Dagan.et.al.2009.NLE,
author = {Dagan, Ido and Dolan, BIll and Magnini, Bernardo and Roth, Dan},
doi = {10.1017/S1351324909990209},
journal = {Natural Language Engineering},
number = {4},
pages = {1--27},
title = {{Recognizing textual entailment: Rational, evaluation and approaches}},
volume = {15},
year = {2009}
}
@inproceedings{Maas.et.al.2011,
address = {Portland, Oregon},
author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},
booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
pages = {142--150},
publisher = {Association for Computational Linguistics},
title = {{Learning Word Vectors for Sentiment Analysis}},
url = {https://aclanthology.org/P11-1015},
year = {2011}
}
@inproceedings{Bowman.et.al.2015,
address = {Lisbon, Portugal},
author = {Bowman, Samuel R. and Angeli, Gabor and Potts, Christopher and Manning, Christopher D.},
booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing},
doi = {10.18653/v1/D15-1075},
pages = {632--642},
publisher = {Association for Computational Linguistics},
title = {{A large annotated corpus for learning natural language inference}},
url = {http://aclweb.org/anthology/D15-1075},
year = {2015}
}
@article{Habernal.et.al.2023.AILaw,
title = {{Mining Legal Arguments in Court Decisions}},
author = {\textbf{Habernal}, \textbf{Ivan} and Faber, Daniel and Recchia, Nicola and Bretthauer, Sebastian and Gurevych, Iryna and Spiecker genannt Döhmann, Indra and Burchard, Christoph},
year = 2023,
journal = {Artificial Intelligence and Law},
doi = {10.1007/s10506-023-09361-y},
}
@article{Artstein.Poesio.2008.CoLi,
author = {Artstein, Ron and Poesio, Massimo},
doi = {10.1162/coli.07-034-R2},
journal = {Computational Linguistics},
number = {4},
pages = {555--596},
title = {{Inter-Coder Agreement for Computational Linguistics}},
volume = {34},
year = {2008}
}
@inproceedings{TjongKimSang.DeMeulder.2003,
author = {{Tjong Kim Sang}, Erik F. and {De Meulder}, Fien},
booktitle = {Proceedings of the Seventh Conference on Natural Language Learning at HLT-NAACL 2003},
pages = {142--147},
publisher = {https://aclanthology.org/W03-0419},
title = {{Introduction to the CoNLL-2003 Shared Task: Language-Independent Named Entity Recognition}},
year = {2003}
}
@inproceedings{Clark.et.al.2019.NAACL,
address = {Minneapolis, Minnesota},
author = {Clark, Christopher and Lee, Kenton and Chang, Ming-wei and Kwiatkowski, Tom and Collins, Michael and Toutanova, Kristina},
booktitle = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
doi = {10.18653/v1/N19-1300},
pages = {2924--2936},
publisher = {Association for Computational Linguistics},
title = {{BoolQ: Exploring the Surprising Difficulty of Natural Yes/No Questions}},
year = {2019}
}
@inproceedings{Khashabi.et.al.2018.NAACL,
address = {New Orleans, LA},
author = {Khashabi, Daniel and Chaturvedi, Snigdha and Roth, Michael and Upadhyay, Shyam and Roth, Dan},
booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
doi = {10.18653/v1/N18-1023},
pages = {252--262},
publisher = {Association for Computational Linguistics},
title = {{Looking Beyond the Surface: A Challenge Set for Reading Comprehension over Multiple Sentences}},
year = {2018}
}
@inproceedings{Bojar.et.al.2018.WMT,
address = {Brussels, Belgium},
author = {Bojar, Ondřej and Federmann, Christian and Fishel, Mark and Graham, Yvette and Haddow, Barry and Koehn, Philipp and Monz, Christof},
booktitle = {Proceedings of the Third Conference on Machine Translation: Shared Task Papers},
doi = {10.18653/v1/W18-6401},
pages = {272--303},
publisher = {Association for Computational Linguistics},
title = {{Findings of the 2018 Conference on Machine Translation (WMT18)}},
volume = {2},
year = {2018}
}
@book{Koehn.2020,
author = {Philipp Koehn},
title = {Neural Machine Translation},
publisher = {Cambridge University Press},
year = {2020},
note = {(not freely available)}
}
@inproceedings{Hermann.et.al.2015.NeurIPS,
author = {Hermann, Karl Moritz and Kocisky, Tomas and Grefenstette, Edward and Espeholt, Lasse and Kay, Will and Suleyman, Mustafa and Blunsom, Phil},
booktitle = {Proceedings of NeurIPS},
pages = {1--9},
publisher = {Curran Associates, Inc.},
title = {{Teaching Machines to Read and Comprehend}},
year = {2015}
}
@article{Raffel.et.al.2020.JMLR,
author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J.},
journal = {Journal of Machine Learning Research},
keywords = {attention-,multi-task learning,natural language processing,transfer learning},
number = {140},
pages = {1--67},
title = {{Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer}},
volume = {21},
year = {2020}
}
@book{Japkowicz.Shah.2011,
title = {{Evaluating Learning Algorithms: A Classification Perspective}},
author = {Nathalie Japkowicz and Mohak Shah},
year = {2011},
publisher = {Cambridge University Press},
note = {(not freely available)},
}
@inproceedings{Papineni.et.al.2002.ACL,
address = {Philadelphia, PA},
author = {Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing},
booktitle = {Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics},
doi = {10.3115/1073083.1073135},
pages = {311--318},
publisher = {Association for Computational Linguistics},
title = {{BLEU: a Method for Automatic Evaluation of Machine Translation}},
year = {2002}
}
@inproceedings{Lin.2004,
title = "{ROUGE}: A Package for Automatic Evaluation of Summaries",
author = "Lin, Chin-Yew",
booktitle = "Text Summarization Branches Out",
year = "2004",
address = "Barcelona, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W04-1013",
pages = "74--81",
}
@inproceedings{Plank.2022.EMNLP,
address = {Abu Dhabi, United Arab Emirates},
author = {Plank, Barbara},
booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
pages = {10671--10682},
publisher = {Association for Computational Linguistics},
title = {{The “Problem” of Human Label Variation: On Ground Truth in Data, Modeling and Evaluation}},
url = {https://aclanthology.org/2022.emnlp-main.731},
year = {2022}
}
@inproceedings{Geva.et.al.2019.EMNLP,
address = {Hong Kong, China},
author = {Geva, Mor and Goldberg, Yoav and Berant, Jonathan},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
doi = {10.18653/v1/D19-1107},
pages = {1161--1166},
publisher = {Association for Computational Linguistics},
title = {{Are We Modeling the Task or the Annotator? An Investigation of Annotator Bias in Natural Language Understanding Datasets}},
year = {2019}
}
@article{Sai.et.al.2023.CSUR,
author = {Sai, Ananya B. and Mohankumar, Akash Kumar and Khapra, Mitesh M.},
doi = {10.1145/3485766},
journal = {ACM Computing Surveys},
number = {2},
pages = {1--39},
title = {{A Survey of Evaluation Metrics Used for NLG Systems}},
volume = {55},
year = {2023}
}
@inproceedings{Habernal.et.al.2018.NAACL.ARCT,
author = {\textbf{Habernal}, \textbf{Ivan} and Wachsmuth, Henning and Gurevych, Iryna and Stein, Benno},
booktitle = {Proceedings of NAACL},
pages = {1930--1940},
title = {{The Argument Reasoning Comprehension Task: Identification and Reconstruction of Implicit Warrants}},
url = {http://aclweb.org/anthology/N18-1175},
address = {New Orleans, LA},
year = {2018}
}
@inproceedings{Niven.Kao.2019.ACL,
address = {Florence, Italy},
author = {Niven, Timothy and Kao, Hung-Yu},
booktitle = {Proceedings of ACL},
pages = {4658--4664},
title = {{Probing Neural Network Comprehension of Natural Language Arguments}},
url = {https://www.aclweb.org/anthology/P19-1459},
year = {2019}
}
@article{Forman.Scholz.2009.SIGKDD,
annote = {fundamental article for reporting f-measure},
author = {Forman, George and Scholz, Martin},
file = {:home/habi/Dokumenty/Mendeley Desktop/Forman, Scholz - 2010 - Apples-to-Apples in Cross-Validation Studies Pitfalls in Classifier Performance Measurement.pdf:pdf},
journal = {ACM SIGKDD Explorations Newsletter},
mendeley-groups = {evaluation},
number = {1},
pages = {49--57},
title = {{Apples-to-Apples in Cross-Validation Studies: Pitfalls in Classifier Performance Measurement}},
volume = {12},
year = {2010}
}
@article{Sokolova.Lapalme.2009,
author = {Sokolova, Marina and Lapalme, Guy},
doi = {10.1016/j.ipm.2009.03.002},
journal = {Information Processing and Management},
number = {4},
pages = {427--437},
publisher = {Elsevier Ltd},
title = {{A systematic analysis of performance measures for classification tasks}},
volume = {45},
year = {2009}
}
@inproceedings{caglayan-etal-2020-curious,
title = "Curious Case of Language Generation Evaluation Metrics: A Cautionary Tale",
author = "Caglayan, Ozan and
Madhyastha, Pranava and
Specia, Lucia",
booktitle = "Proceedings of COLING",
year = "2020",
doi = "10.18653/v1/2020.coling-main.210",
pages = "2322--2328",
}
@inproceedings{Rajpurkar.et.al.2018.ACL,
address = {Melbourne, Australia},
author = {Rajpurkar, Pranav and Jia, Robin and Liang, Percy},
booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
doi = {10.18653/v1/P18-2124},
pages = {784--789},
publisher = {Association for Computational Linguistics},
title = {{Know What You Don't Know: Unanswerable Questions for SQuAD}},
year = {2018}
}
@inproceedings{Zhang.et.al.2018.ACL,
address = {Melbourne, Australia},
author = {Zhang, Saizheng and Dinan, Emily and Urbanek, Jack and Szlam, Arthur and Kiela, Douwe and Weston, Jason},
booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
doi = {10.18653/v1/P18-1205},
pages = {2204--2213},
publisher = {Association for Computational Linguistics},
title = {{Personalizing Dialogue Agents: I have a dog, do you have pets too?}},
year = {2018}
}
@book{Deisenroth.et.al.2021.book,
title = {Mathematics for Machine Learning},
author = {Deisenroth, Marc Peter and Faisal, Aldo and Ong, Cheng Soon},
year = {2021},
publisher = {Cambridge University Press},
url = {mml-book.com},
}
@book{Koller.Friedman.2009.book,
title = {Probabilistic Graphical Models: Principles and Techniques},
author = {Koller, Daphne and Friedman, Nir},
publisher = {MIT Press},
year = {2009},
}
@book{Goodfellow.et.al.2016.book,
title={Deep Learning},
author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
publisher={MIT Press},
url={www.deeplearningbook.org},
year={2016}
}
@inproceedings{Iacobacci.et.al.2015.ACL,
address = {Beijing, China},
author = {Iacobacci, Ignacio and Pilehvar, Mohammad Taher and Navigli, Roberto},
booktitle = {Proceedings of ACL},
doi = {10.3115/v1/P15-1010},
pages = {95--105},
publisher = {Association for Computational Linguistics},
title = {{SensEmbed: Learning Sense Embeddings for Word and Relational Similarity}},
year = {2015}
}
@inproceedings{Upadhyay.et.al.2016.ACL,
address = {Berlin, Germany},
author = {Upadhyay, Shyam and Faruqui, Manaal and Dyer, Chris and Roth, Dan},
booktitle = {Proceedings of ACL},
doi = {10.18653/v1/P16-1157},
pages = {1661--1670},
title = {{Cross-lingual Models of Word Embeddings: An Empirical Comparison}},
year = {2016}
}
@inproceedings{Glavas.et.al.2019.ACL,
address = {Florence, Italy},
author = {Glava{\v{s}}, Goran and Litschko, Robert and Ruder, Sebastian and Vuli{\'{c}}, Ivan},
booktitle = {Proceedings of ACL},
doi = {10.18653/v1/P19-1070},
pages = {710--721},
title = {{How to (Properly) Evaluate Cross-Lingual Word Embeddings: On Strong Baselines, Comparative Analyses, and Some Misconceptions}},
year = {2019}
}
@inproceedings{Vulic.Moens.2015.ACL,
address = {Beijing, China},
author = {Vuli{\'{c}}, Ivan and Moens, Marie-Francine},
booktitle = {Proceedings of ACL (Volume 2: Short Papers)},
doi = {10.3115/v1/P15-2118},
pages = {719--725},
title = {{Bilingual Word Embeddings from Non-Parallel Document-Aligned Data Applied to Bilingual Lexicon Induction}},
year = {2015}
}
@inproceedings{Artetxe.et.al.2017.ACL,
address = {Vancouver, Canada},
author = {Artetxe, Mikel and Labaka, Gorka and Agirre, Eneko},
booktitle = {Proceedings of ACL},
doi = {10.18653/v1/P17-1042},
pages = {451--462},
title = {{Learning bilingual word embeddings with (almost) no bilingual data}},
year = {2017}
}
@inproceedings{Ling.et.al.2015.NAACL,
address = {Denver, Colorado},
author = {Ling, Wang and Dyer, Chris and Black, Alan W and Trancoso, Isabel},
booktitle = {Proceedings of NAACL},
doi = {10.3115/v1/N15-1142},
pages = {1299--1304},
title = {{Two/Too Simple Adaptations of Word2Vec for Syntax Problems}},
year = {2015}
}
@inproceedings{Levy.Goldberg.2014.ACL,
address = {Baltimore, MD, USA},
author = {Levy, Omer and Goldberg, Yoav},
booktitle = {Proceedings of ACL},
doi = {10.3115/v1/P14-2050},
pages = {302--308},
title = {{Dependency-Based Word Embeddings}},
year = {2014}
}
@article{Bojanowski.et.al.2017.TACL,
author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
doi = {10.1162/tacl_a_00051},
journal = {Transactions of the ACL},
pages = {135--146},
title = {{Enriching Word Vectors with Subword Information}},
volume = {5},
year = {2017}
}
@inproceedings{Madasu.AnveshRao.2019.EMNLP,
address = {Hong Kong, China},
author = {Madasu, Avinash and {Anvesh Rao}, Vijjini},
booktitle = {Proceedings of EMNLP-IJCNLP},
doi = {10.18653/v1/D19-1567},
pages = {5657--5666},
publisher = {Association for Computational Linguistics},
title = {{Sequential Learning of Convolutional Features for Effective Text Classification}},
year = {2019}
}
@inproceedings{Kim.2014.EMNLP,
address = {Doha, Qatar},
author = {Kim, Yoon},
booktitle = {Proceedings of EMNLP},
doi = {10.3115/v1/D14-1181},
pages = {1746--1751},
publisher = {Association for Computational Linguistics},
title = {{Convolutional Neural Networks for Sentence Classification}},
year = {2014}
}
@inproceedings{Devlin.et.al.2019.NAACL,
address = {Minneapolis, Minnesota},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
booktitle = {Proceedings of NAACL},
doi = {10.18653/v1/N19-1423},
pages = {4171--4186},
publisher = {Association for Computational Linguistics},
title = {{BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding}},
year = {2019}
}
@inproceedings{Gururangan.et.al.2018.NAACL.short,
address = {New Orleans, LA},
author = {Gururangan, Suchin and Swayamdipta, Swabha and Levy, Omer and Schwartz, Roy and Bowman, Samuel and Smith, Noah A.},
booktitle = {Proceedings of NAACL},
doi = {10.18653/v1/N18-2017},
pages = {107--112},
publisher = {Association for Computational Linguistics},
title = {{Annotation Artifacts in Natural Language Inference Data}},
year = {2018}
}
@article{Goldberg.2016,
author = {Goldberg, Yoav},
doi = {10.1613/jair.4992},
journal = {Journal of Artificial Intelligence Research},
pages = {345--420},
title = {{A Primer on Neural Network Models for Natural Language Processing}},
volume = {57},
year = {2016}
}
@inproceedings{Gehring.et.al.2017a.ICML,
address = {Sydney, Australia},
author = {Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N.},
booktitle = {Proceedings of the 34th International Conference on Machine Learning},
editor = {Precup, Doina and Teh, Yee Whye},
pages = {1243--1252},
publisher = {PMLR},
title = {{Convolutional Sequence to Sequence Learning}},
year = {2017}
}
@inproceedings{Krishnan.Manning.2006,
address = {Sydney, Australia},
author = {Krishnan, Vijay and Manning, Christopher D.},
booktitle = {Proceedings of ACL},
doi = {10.3115/1220175.1220316},
pages = {1121--1128},
publisher = {Association for Computational Linguistics},
title = {{An Effective Two-Stage Model for Exploiting Non-Local Dependencies in Named Entity Recognition}},
year = {2006}
}
@inproceedings{artemova-etal-2021-teaching,
title = "Teaching a Massive Open Online Course on Natural Language Processing",
author = "Artemova, Ekaterina and
Apishev, Murat and
Kirianov, Denis and
Sarkisyan, Veronica and
Aksenov, Sergey and
Serikov, Oleg",
booktitle = "Proceedings of the Fifth Workshop on Teaching NLP",
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/2021.teachingnlp-1.2",
pages = "13--27",
}
@inproceedings{Vaswani.et.al.2017,
address = {Long Beach, CA, USA},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Lukasz and Polosukhin, Illia},
booktitle = {Advances in Neural Information Processing Systems 30},
pages = {5998--6008},
publisher = {Curran Associates, Inc.},
title = {{Attention Is All You Need}},
year = {2017}
}
@article{Koehn.2017,
author = {Koehn, Philipp},
title = {Neural Machine Translation},
journal = {arXiv preprint},
date = {2017},
url = {http://arxiv.org/abs/1709.07809}
}
@inproceedings{Schuster.Nakajima.2012,
address = {Kyoto, Japan},
author = {Schuster, Mike and Nakajima, Kaisuke},
booktitle = {2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
doi = {10.1109/ICASSP.2012.6289079},
pages = {5149--5152},
publisher = {IEEE},
title = {{Japanese and Korean voice search}},
year = {2012}
}
@article{Wu.et.al.2016.GoogleMT,
author = {Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V. and Norouzi, Mohammad and Macherey, Wolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and Klingner, Jeff and Shah, Apurva and Johnson, Melvin and Liu, Xiaobing and Kaiser, {\L}ukasz and Gouws, Stephan and Kato, Yoshikiyo and Kudo, Taku and Kazawa, Hideto and Stevens, Keith and Kurian, George and Patil, Nishant and Wang, Wei and Young, Cliff and Smith, Jason and Riesa, Jason and Rudnick, Alex and Vinyals, Oriol and Corrado, Greg and Hughes, Macduff and Dean, Jeffrey},
pages = {1--23},
title = {{Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}},
url = {http://arxiv.org/abs/1609.08144},
year = {2016},
journal = {arXive},
}
@inproceedings{Sennrich.et.al.2016.ACL,
address = {Berlin, Germany},
author = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
booktitle = {Proceedings of ACL},
doi = {10.18653/v1/P16-1162},
pages = {1715--1725},
publisher = {Association for Computational Linguistics},
title = {{Neural Machine Translation of Rare Words with Subword Units}},
year = {2016}
}
@article{Caruana.1997,
author = {Caruana, Rich},
doi = {10.1023/A:1007379606734},
journal = {Machine Learning},
number = {1},
pages = {41--75},
title = {{Multi-task Learning}},
volume = {28},
year = {1997}
}
@inproceedings{Sogaard.Goldberg.2016,
address = {Berlin, Germany},
author = {S{\o}gaard, Anders and Goldberg, Yoav},
booktitle = {Proceedings of ACL},
doi = {10.18653/v1/P16-2038},
pages = {231--235},
publisher = {Association for Computational Linguistics},
title = {{Deep multi-task learning with low level tasks supervised at lower layers}},
year = {2016}
}
@inproceedings{Conneau.et.al.2017.EMNLP,
address = {Copenhagen, Denmark},
author = {Conneau, Alexis and Kiela, Douwe and Schwenk, Holger and Barrault, Lo{\"{i}}c and Bordes, Antoine},
booktitle = {Proceedings of EMNLP},
pages = {670--680},
title = {{Supervised Learning of Universal Sentence Representations from Natural Language Inference Data}},
year = {2017}
}
@article{Rogers.et.al.2020.BERT,
author = {Rogers, Anna and Kovaleva, Olga and Rumshisky, Anna},
doi = {10.1162/tacl_a_00349},
journal = {Transactions of the Association for Computational Linguistics},
pages = {842--866},
title = {{A Primer in BERTology: What We Know About How BERT Works}},
volume = {8},
year = {2020}
}
@inproceedings{Kingma.Ba.2015,
address = {San Diego, CA, USA},
author = {Kingma, Diederik P. and Ba, Jimmy Lei},
booktitle = {3rd International Conference on Learning Representations, ICLR 2015},
editor = {Bengio, Yoshua and LeCun, Yann},
pages = {1--15},
title = {{Adam: A Method for Stochastic Optimization}},
year = {2015},
url = {https://arxiv.org/abs/1412.6980},
}
@article{Bengio.et.al.2003.JMLR,
author = {Bengio, Yoshua and Ducharme, R{\'{e}}jean and Vincent, Pascal and Jauvin, Christian},
journal = {Journal of Machine Learning Research},
pages = {1137--1155},
title = {{A Neural Probabilistic Language Model}},
volume = {3},
year = {2003},
url = {https://research.jmlr.org/papers/v3/bengio03a.html},
}
@book{Kun.2020,
author = {Jeremy Kun},
edition = {2},
title = {A Programmer’s Introduction to Mathematics},
url = {https://pimbook.org},
year = {2020},
}
@book{Goldberg.2017,
author = {Goldberg, Yoav},
title = {Neural Network Methods for Natural Language Processing},
year = {2017},
publisher = {Morgan \& Claypool},
}
@inproceedings{Kudo.Richardson.2018.EMNLP,
title = "{S}entence{P}iece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing",
author = "Kudo, Taku and
Richardson, John",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
doi = "10.18653/v1/D18-2012",
pages = "66--71",
}
@article{kudo2018subword,
title={Subword regularization: Improving neural network translation models with multiple subword candidates},
author={Kudo, Taku},
journal={arXiv preprint arXiv:1804.10959},
year={2018}
}
@inproceedings{bahdanau2014neural,
author = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
editor = {Yoshua Bengio and Yann LeCun},
title = {{Neural Machine Translation by Jointly Learning to Align and Translate}},
booktitle = {3rd International Conference on Learning Representations (ICLR)},
address = {San Diego, CA, USA},
year = {2015},
}
@book{Murphy.2012,
title = {Machine Learning: a Probabilistic Perspective},
author = {Murphy, Kevin},
publisher = {MIT Press},
year = 2012
}
@inproceedings{Mikolov.et.al.2013.ICLR,
author = {Tomas Mikolov and Kai Chen and Greg Corrado and Jeffrey Dean},
city = {Scottsdale, Arizona, USA},
editor = {Yoshua Bengio and Yann LeCun},
booktitle = {1st International Conference on Learning Representations ICLR, Workshop Track Proceedings},
pages = {1-12},
title = {{Efficient estimation of word representations in vector space}},
year = {2013},
}
@article{Caliskan.et.al.2017.science,
author = {Aylin Caliskan and Bryson, Joanna J. and Arvind Narayanan},
doi = {10.1126/science.aal4230},
issue = {6334},
journal = {Science},
month = {4},
pages = {183-186},
title = {Semantics derived automatically from language corpora contain human-like biases},
volume = {356},
year = {2017},
}
@inproceedings{Kuzi.et.al.2016.CIKM,
author = {Saar Kuzi and Anna Shtok and Oren Kurland},
city = {Indianapolis, IN},
doi = {10.1145/2983323.2983876},
booktitle = {Proceedings of the 25th ACM International on Conference on Information and Knowledge Management},
pages = {1929-1932},
publisher = {ACM},
title = {{Query Expansion Using Word Embeddings}},
year = {2016},
}
@misc{Phuong.Hutter.2022,
title = {Formal Algorithms for Transformers},
author = {Mary Phuong and Marcus Hutter},
year = 2022,
eprint = {2207.09238},
archiveprefix = {arXiv}
}
@inproceedings{izsak-etal-2021-train,
title = {How to Train {BERT} with an Academic Budget},
author = {Izsak, Peter and Berchansky, Moshe and Levy, Omer},
year = 2021,
booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
publisher = {Association for Computational Linguistics},
address = {Online and Punta Cana, Dominican Republic},
pages = {10644--10652},
doi = {10.18653/v1/2021.emnlp-main.831}
}
@misc{Hendrycks.Gimpel.2016.arXiv,
archivePrefix = {arXiv},
arxivId = {1606.08415},
author = {Hendrycks, Dan and Gimpel, Kevin},
eprint = {1606.08415},
journal = {arXiv preprint},
pages = {1--10},
title = {{Gaussian Error Linear Units (GELUs)}},
url = {http://arxiv.org/abs/1606.08415},
year = {2016}
}
@report{Radford.et.al.2018.GPT1.report,
title = {{Improving Language Understanding by Generative Pre-Training}},
author = {Alec Radford and Karthik Narasimhan and Tim Salimans and Ilya Sutskever},
year = 2018,
url = {https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf},
type = {Technical report},
institution = {OpenAI}
}
@report{Radford.et.al.2019.GPT2.report,
title = {{Language Models are Unsupervised Multitask Learners}},
author = {Alec Radford and Jeffrey Wu and Rewon Child and David Luan and Dario Amodei and Ilya Sutskever},
year = 2019,
url = {https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf},
type = {Technical report},
institution = {OpenAI}
}
@inproceedings{Liu.et.al.2018.ICLR,
title = {Generating Wikipedia by Summarizing Long Sequences},
author = {Peter J Liu and Mohammad Saleh and Etienne Pot and Ben Goodrich and Ryan Sepassi and Łukasz Kaiser and Noam Shazeer},
year = 2018,
booktitle = {Proceedings of the 6th International Conference on Learning Representations},
url = {https://openreview.net/forum?id=Hyg0vbWC-},
address = {Vancouver, BC, Canada}
}
@article{Brown.et.al.2020.GPT3,
author = {Tom B. Brown and Benjamin Mann and Nick Ryder and Melanie Subbiah and Jared Kaplan and Prafulla Dhariwal and Arvind Neelakantan and Pranav Shyam and Girish Sastry and Amanda Askell and Sandhini Agarwal and Ariel Herbert-Voss and Gretchen Krueger and Tom Henighan and Rewon Child and Aditya Ramesh and Daniel M. Ziegler and Jeffrey Wu and Clemens Winter and Christopher Hesse and Mark Chen and Eric Sigler and Mateusz Litwin and Scott Gray and Benjamin Chess and Jack Clark and Christopher Berner and Sam McCandlish and Alec Radford and Ilya Sutskever and Dario Amodei},
journal = {arXiv preprint},
title = {Language Models are Few-Shot Learners},
url = {http://arxiv.org/abs/2005.14165},
year = {2020},
}
@article{Touvron.et.al.2023.llama2,
title = {{Llama 2: Open Foundation and Fine-Tuned Chat Models}},
author = {Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and Bikel, Dan and Blecher, Lukas and Ferrer, Cristian Canton and Chen, Moya and Cucurull, Guillem and Esiobu, David and Fernandes, Jude and Fu, Jeremy and Fu, Wenyin and Fuller, Brian and Gao, Cynthia and Goswami, Vedanuj and Goyal, Naman and Hartshorn, Anthony and Hosseini, Saghar and Hou, Rui and Inan, Hakan and Kardas, Marcin and Kerkez, Viktor and Khabsa, Madian and Kloumann, Isabel and Korenev, Artem and Koura, Punit Singh and Lachaux, Marie-Anne and Lavril, Thibaut and Lee, Jenya and Liskovich, Diana and Lu, Yinghai and Mao, Yuning and Martinet, Xavier and Mihaylov, Todor and Mishra, Pushkar and Molybog, Igor and Nie, Yixin and Poulton, Andrew and Reizenstein, Jeremy and Rungta, Rashi and Saladi, Kalyan and Schelten, Alan and Silva, Ruan and Smith, Eric Michael and Subramanian, Ranjan and Tan, Xiaoqing Ellen and Tang, Binh and Taylor, Ross and Williams, Adina and Kuan, Jian Xiang and Xu, Puxin and Yan, Zheng and Zarov, Iliyan and Zhang, Yuchen and Fan, Angela and Kambadur, Melanie and Narang, Sharan and Rodriguez, Aurelien and Stojnic, Robert and Edunov, Sergey and Scialom, Thomas},
year = 2023,
journal = {arXiv},
url = {http://arxiv.org/abs/2307.09288}
}
@inproceedings{Ouyang.et.al.2022.NeurIPS,
title = {Training language models to follow instructions with human feedback},
author = {Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and Schulman, John and Hilton, Jacob and Kelton, Fraser and Miller, Luke and Simens, Maddie and Askell, Amanda and Welinder, Peter and Christiano, Paul and Leike, Jan and Lowe, Ryan},
year = 2022,
booktitle = {Advances in Neural Information Processing Systems},
publisher = {Curran Associates, Inc.},
volume = 35,
pages = {27730–27744}
}
@inproceedings{Min.et.al.2022.EMNLP,
title = {Rethinking the Role of Demonstrations: What Makes In-Context Learning Work?},
author = {Min, Sewon and Lyu, Xinxi and Holtzman, Ari and Artetxe, Mikel and Lewis, Mike and Hajishirzi, Hannaneh and Zettlemoyer, Luke},
year = 2022,
month = dec,
booktitle = {Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing},
publisher = {Association for Computational Linguistics},
address = {Abu Dhabi, United Arab Emirates},
pages = {11048--11064},
doi = {10.18653/v1/2022.emnlp-main.759},
url = {https://aclanthology.org/2022.emnlp-main.759},
editor = {Goldberg, Yoav and Kozareva, Zornitsa and Zhang, Yue}
}
@inproceedings{Dai.et.al.2023.ACLFindings,
title = {Why Can {GPT} Learn In-Context? Language Models Secretly Perform Gradient Descent as Meta-Optimizers},
author = {Dai, Damai and Sun, Yutao and Dong, Li and Hao, Yaru and Ma, Shuming and Sui, Zhifang and Wei, Furu},
year = 2023,
booktitle = {Findings of the Association for Computational Linguistics: ACL 2023},
publisher = {Association for Computational Linguistics},
address = {Toronto, Canada},
pages = {4005--4019},
doi = {10.18653/v1/2023.findings-acl.247},
editor = {Rogers, Anna and Boyd-Graber, Jordan and Okazaki, Naoaki}
}
@inproceedings{Reimers.Gurevych.2019.EMNLP,
title = {{Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
author = {Reimers, Nils and Gurevych, Iryna},
date = {2019},
pages = {3980--3990},
publisher = {Association for Computational Linguistics},
location = {Hong Kong, China},
doi = {10.18653/v1/D19-1410},
}