-
Notifications
You must be signed in to change notification settings - Fork 3
/
fundamentals.tdl
5144 lines (4445 loc) · 192 KB
/
fundamentals.tdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; -*- Mode: tdl; Coding: utf-8; -*-
;;;
;;; Copyright (c) 1994-2018
;;; Dan Flickinger, Rob Malouf, Emily M. Bender
;;; see LICENSE for conditions
;;;
;;; fundamentals.tdl
;;;
;;; Basic definitions of types for English grammar
;;;
;;; Rob Malouf, 3-Nov-1994
;;;
;;; $Id: fundamentals.tdl 7479 2010-02-21 23:11:30Z danf $
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Types for Sign, Word, Phrase, and Lex-Entry
sign_min := *avm* &
[ SYNSEM synsem_min,
KEY-ARG bool ].
;; GENRE used to control variations in grammaticality, e.g. for edited
;; vs informal text, but subsuming earlier boolean ROBUST feature so we
;; can accommodate some ill-formed input such as missing det for sg-noun.
;; IDIOM marks whether a sign contains an idiomatic element.
;; --SIND is re-entrant with HOOK.INDEX, to help modularize path specs,
;; and to delay expansion of subcategorizations until necessary.
;; DPF 2020-02-11 - To avoid dependence on sign type unif failure for phrases
;; (such as avoiding N-N compound as input to punctuation rule), make
;; [DERIVED luk] an attribute of sign, and make phrase be [DERIVED na]. Also
;; stamp na value on most lexical entries with orthography "/" to block prefix
;; punctuation, so as to force fw-sequence analysis for e.g. |"/Desktop/blue"|
;;
sign := sign_min &
[ SYNSEM synsem &
[ LOCAL [ CAT.HEAD.MINORS.MIN #min,
CONT.HOOK.INDEX #index ],
--MIN #min,
--SIND #index ],
ORTH orthog,
ARGS *list*,
INFLECTD luk,
GENRE genre,
DIALECT dialect,
IDIOM bool,
DERIVED luk,
RNAME basic_ctype,
CONCEPTS *list* ].
;; DPF 2012-10-30 - Changed value of RNAME to ctype rather than string, to
;; support better interaction with tokenization
phrase_or_lexrule := sign &
[ SYNSEM canonical_synsem &
[ LOCAL.CONT.HOOK #hook ],
ORTH.FROM #from,
C-CONT mrs_min & [ HOOK #hook ],
ARGS.FIRST.ORTH.FROM #from ].
word_or_lexrule_min := sign_min.
;; DERIVED distinguishes basic from derived words, so the foreign-word-sequence
;; rules can avoid some spurious ambig.
;;
word_or_punct_or_bracketed_phrase := sign.
;; Needed to enable punctuation tokens like dashes (which normally attach
;; only to lexical items) to attach to bracketed phrases formed by the syntactic
;; rule combining hyphenated tokens such as |read- only|
;;
word_or_bracketed_phrase := word_or_punct_or_bracketed_phrase.
word_or_punct_phrase := word_or_punct_or_bracketed_phrase.
; ALTS allow lexical entries to block lexical rule application
;
word_or_lexrule := word_or_lexrule_min & word_or_bracketed_phrase &
non_unary-rule & word_or_punct_phrase &
[ ALTS alts_min,
TOKENS tokens,
SYNSEM.LOCAL.CAT.--SLASHED.BOOL - ].
; Group 'word' together with punctuation rules to distinguish basic words from
; derived or inflected forms - used at least in the rule for italicized or
; foreign word sequences.
word_or_infl_or_punct_rule := word_or_lexrule.
word_or_punct_rule := word_or_infl_or_punct_rule.
word_or_nonpunct_rule := word_or_lexrule.
word_or_infl_rule := word_or_infl_or_punct_rule & word_or_nonpunct_rule.
basic_word := word_or_infl_rule & word_or_punct_rule &
[ SYNSEM [ PHON.ONSET.--TL #tl,
LKEYS.KEYREL [ CFROM #from,
CTO #to ] ],
ORTH [ CLASS #class, FROM #from, TO #to, FORM #form ],
TOKENS [ +LIST #tl & < [ +CLASS #class, +FROM #from, +FORM #form ], ... >,
+LAST.+TO #to ] ].
word := basic_word &
[ SYNSEM.PUNCT.PNCTPR ppair ].
orthog := *cons* &
[ FORM string,
FROM string,
TO string ].
orthography := orthog &
[ CLASS token_class ].
; Not all phrases have SYNSEM phr_synsem, since we need to allow the head-comp
; rules to build signs which are still [ SYNSEM lex_synsem ], for constructions
; like "twenty-two" and "five fifteen p.m.". So most phrases will assign the
; type phr_synsem to the value of SYNSEM, but not all.
; DPF 02-Sept-00 - Removed [ HC-LEX - ] because it blocks conjoined lexical
; somethings.
;; DPF 2020-05-13 - Moved identity of ONSET on mother and first dtr down to
;; subtypes, since for the n-num measure-NP rule for |an $800 phone|, the ONSET
;; comes from the right dtr.
;; DPF 2022-08-18 - For now, limit to at most one gap.
;;
phrase := phrase_or_lexrule &
[ SYNSEM [ LOCAL.ARG-S < >,
NONLOC.SLASH.LIST 0-1-list ],
DERIVED na ].
;; Used to disambiguate bracketed phrases, whose daughter can either be a
;; non_unary_phrase or a lexical rule.
;;
non_unary-rule := phrase_or_lexrule.
; DPF (09-Jan-99) - Added boolean LEX feature to be used for determining the
; head-complement type of the projections of lexical heads - most produce
; phrases, but see discussion of HC-LEX feature below.
synsem_min := *avm*.
synsem_min0 := synsem_min &
[ LOCAL mod_local,
NONLOC non-local_min ].
synsem_min1 := synsem_min0 &
[ OPT bool,
--MIN predsort,
--SIND *top* ].
synsem_min2 := synsem_min1 &
[ LEX luk,
MODIFD xmod_min,
PHON phon_min,
PUNCT punctuation_min ].
lex_or_phrase := synsem_min2.
synsem := synsem_min2.
expressed_synsem := synsem.
; No gaps
canonical_or_unexpressed := synsem_min0.
canonical_synsem := expressed_synsem & canonical_or_unexpressed.
;; DPF 2015-07-07 - Added --BRDG to record whether a bridging rule has applied,
;; to enable blocking of iteration of bridging rules, for efficiency
;;
canonical_lex_or_phrase_synsem := canonical_synsem & lex_or_phrase &
[ --BRDG luk ].
; Type which enables exclusion of phrases like n-n compounds which are built
; by rule but have similar distribution to lexical signs.
lex_or_nonlex_synsem := canonical_lex_or_phrase_synsem.
; To distinguish syntactically constructed constituents marked LEX + from
; simple lexical entries - used e.g. to avoid making nominal gerund NPs
; directly from lexical gerunds, to avoid spurious ambiguity for
; 'Singing is fun' given that there is already a verbal gerund 'singing'.
nonlex_or_lex_phr_synsem := canonical_lex_or_phrase_synsem.
nonlex_synsem := lex_or_nonlex_synsem & nonlex_or_lex_phr_synsem.
;; Used for passive lexical rule to exclude |*looked by Kim up|
phr_synsem := nonlex_synsem &
[ LEX - ].
;; DPF 2017-08-20 - We need yet another level of abstraction here, to enable
;; derived attrib adjectives to constrain their synsem type to be incompatible
;; with abstr_lex_synsem, to ensure that prefix-adj rules apply first, while
;; still letting attr_adj rule constrain LKEYS.
;;
;; LKEYS provide shortcuts to relations in RELS or a lex-entry's complements
;;
abstr_lex_or_deriv_synsem := canonical_lex_or_phrase_synsem &
[ LKEYS lexkeys ].
abstr_lex_synsem := abstr_lex_or_deriv_synsem.
basic_lex_synsem := abstr_lex_synsem & lex_or_nonlex_synsem.
lex_synsem := basic_lex_synsem &
[ LEX + ].
; For signs that are constructed by syntactic rule but have similar
; (though not identical) distribution to lexical items - e.g. n_n_cmpnds.
lex_phr_synsem := abstr_lex_synsem & nonlex_or_lex_phr_synsem &
[ LEX + ].
; For signs that have combined with a punctuation mark, and hence are no longer
; eligible to undergo most derivational rules, to avoid spurious ambiguity.
lex_punct_synsem := basic_lex_synsem.
; DPF 29-Nov-02 - Removed [INDEX non_expl] to allow "There was known to be a
; problem" where pred "was" requires complement to be SUBJ *olist*.
non_canonical := synsem.
expressed_non_canonical := non_canonical & expressed_synsem &
[ NONLOC [ REL.LIST < >,
QUE.LIST < > ] ].
gap := expressed_non_canonical &
[ LOCAL #local,
NONLOC.SLASH.LIST < #local > ].
unexpressed_min := canonical_or_unexpressed.
unexpressed := unexpressed_min & synsem_min2 &
[ LOCAL.CAT.VAL.SPCMPS < >,
NONLOC [ SLASH.LIST < >,
REL.LIST < >,
QUE.LIST < > ] ].
unexpressed_reg := unexpressed & non_canonical.
; DPF 28-Aug-99 - Removed OPT + from anti_synsem since it blocked
; "you can have anything you want"
anti_synsem_min := unexpressed_min.
anti_synsem := anti_synsem_min & unexpressed.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; CAT type
;; The feature MC has three possible values: +, -, and na. Non-clauses are
;; always MC na, since they can't really be said to be root or non-root.
;; All clauses are MC bool, and if they have a restricted distribution then
;; they are MC + or MC -.
local_basic := local_min &
[ CONJ conj_min ].
local := local_basic &
[ CTXT ctxt_min,
ARG-S *list* ].
; Distinguish arg gaps from modifier gaps, to constrain ATB extraction to args
arg-local := local.
mod-local := local.
;; SLASH list-wrapper & [ LIST *locallist* ],
non-local := non-local_min &
[ SLASH list-wrapper,
QUE list-wrapper,
REL list-wrapper ].
non-local_norel := non-local &
[ REL.LIST < >,
QUE.LIST < > ].
non-local_none := non-local_norel &
[ SLASH.LIST < > ].
;; DPF 2020-05-09 - We need to distinguish between non-local_none for use in
;; lexical type definition, where due to lexical threading of NONLOC, we can't
;; stamp LIST < > on each 0-dlist, and the stronger subtype we want for
;; constructions, where we really want an assertion of 0-dlist to entail
;; LIST < >.
non-local_none_phr := non-local_none &
[ SLASH.LIST < >,
REL.LIST < >,
QUE.LIST < > ].
;; For exceptions to lexical rules
;;
alts_min := *avm*.
alts := alts_min &
[ PASSIVE bool,
VPELLIP bool,
ADVADD bool,
ADVNGADD bool,
NPPART bool,
VADJ bool,
NGER bool,
SQPNCT bool,
VPREF bool,
DATIVE bool,
ATTRJ bool,
SAI bool,
CSAI bool,
TOUGH bool,
JTOR bool,
INVQ bool,
CPFRAG bool,
ENOUGH bool,
DETPART bool,
MWEWB bool,
UNADJ bool ].
no_alts := alts_min.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Synsem values
; The feature AGR is introduced on LOCAL rather than on CAT (or on HEAD) since
; coordination schema unify the CAT value of the daughters with that of then
; mother, but need to be able to change AGR on the mother (to get plural
; agreement on verb when subject is a coordinated NP with "and" vs. "or").
cat_min := *avm*.
;; DPF (22-Oct-98) Added feature HC-LEX ("is head-complement type lexical?")
;; on CAT, to identify the type of synsem that results from combining the word
;; with its complements. We recognize at least the head-complement
;; structures "thirty-two" and "two o'clock" as still being lexical signs, not
;; phrasal ones, since they can appear as prenominal adjectives and in noun-
;; noun compound constructions, respectively. The head-complement rule
;; determines its SYNSEM type by unifying its LEX value with the HC-LEX of
;; its head dtr - this works because the HCOMP rule requires its SYNSEM value to
;; be of type canonical_synsem, and the subtypes of canonical_synsem are the
;; only types that introduce the feature LEX, namely lex_synsem and phr_synsem.
; DPF 2-Jun-02 - Similarly, words differ in whether the head-specifier
; construction they head is lexical or phrasal: det+noun is phrasal to avoid
; allowing e.g. "*my the sony laptop arrived" but deg+adj is still lexical to
; avoid "*the children very small arrived" (given other constraints for each
; of these two constructions). So introduce HS-LEX value.
;;
cat_min1 := cat_min &
[ HEAD head_min,
VAL valence_min ].
; DPF 24-apr-08 - Added NEGPOL to distinguish negative polarity effects,
; especially to license declarative subj-aux inversion as in "Not until
; today did Kim see the point."
;; DPF 2020-04-30 - It seems that we need to allow a complement to constrain
;; the LEX value of the head-comp phrase, for example to block
;; |a tall enough to see building| while keeping |a tall enough building to see|
;; where |tall| in either NP picks up |enough|, staying underspecified for LEX,
;; but if the AP picks up the VP marked with NH-LEX -, it becomes LEX - and
;; hence disallowed as prenominal; while if instead the Nbar picks up the VP,
;; the prenominal AP remains compatible with LEX +.
;; DPF 2022-04-26 - After relaxing constraints on the copula's subject in order
;; to analyze "In the corner was standing a coatrack" with a raised PP subject,
;; we need additional machinery to still constrain ordinary copula constructions
;; with APs and PPs, which have an empty SUBJ value but appear with an NP for
;; the subject of the clause. And we want to preserve using the same copula
;; with VPs and with APs and PPs, to analyze "They were singing and happy".
;; So to give the copula a consistent source of the constraints on subject
;; consistent with the copula's complement, we add a HEAD feature RSUBJHD which
;; will be identified with SUBJ..HEAD for verbs, and stipulated to be nominal
;; for adjectives and non-scopal prepositions.
;; DPF 2022-11-12 - Added --SLASHED to record whether a phrase contains a gap,
;; to help with packing, since the use of ET computation types for SLASH makes
;; it harder using subsumption to avoid packing a slashed and a non-slashed
;; constituent, given lexical threading for SLASH. Also added boolean --SLPASS
;; to enable marking of tough-adjectives to block passing of --SLASHED, so
;; head-comp phrase "easy to please" gets [--SLASHED -] even though the non-hd
;; daughter is slashed. It proved necessary to make sure --SLPASS is given a
;; specific value on every element of every COMPS list in lextypes.tdl, since
;; the computation of --SLASHED on the mother of the head-comp rule assigns the
;; value of --SLPASS from the complement in case neither head nor comp is
;; --SLASHED.
;;
cat := cat_min1 &
[ MC luk,
POSTHD bool,
HC-LEX luk,
HS-LEX luk,
NH-LEX luk,
NEGPOL luk,
RSUBJHD head_min,
--SLASHED bool-wrapper,
--SLPASS bool ].
local_min1 := mod_local &
[ CAT cat_min ].
local_min2 := mod_local &
[ CONT mrs_min ].
local_min := local_min1 & local_min2 &
[ AGR *top* ].
non-local_min := *avm*.
phon_min := *avm*.
phon := phon_min &
[ ONSET onset ].
onset := *sort* &
[ --TL *list* ].
con_or_unk := onset.
voc_or_unk := onset.
con_or_voc := onset &
[ --TL native_token_list ].
con := con_or_unk & con_or_voc.
voc := voc_or_unk & con_or_voc.
; For unknown words
unk_onset := con_or_unk & voc_or_unk &
[ --TL generic_token_list ].
;; DPF 2020-04-25 - Added subtype no_onset used in specialized rule mnp-vp_jp_c
;; for |10% owned by IBM|, restricted to just meas-NPs with `%', which has
;; [ONSET onset], so will unify with no_onset, but other measure nouns won't.
;;
no_onset := onset.
phon_num := phon &
[ DIGIT bool ].
basic_ctype := *sort*.
unary_ctype := basic_ctype.
ctype := basic_ctype & [ -CTYPE- string ].
anti_ctype := basic_ctype.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Head and valence
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
mod_local := *avm*.
; The MOD value specifies the type of phrase that a sign can modify, and the
; POSTHD value stipulates the relative position of the modifier with
; respect to the head. (An earlier approach employing a generalization about
; a head being final in its phrase or not might still be considered as an
; alternate to this stipulative one.)
; The POSTHD feature should stay with MOD, since only relevant when MOD is
; non-empty.
; DPF 09-Jan-99 - Changed value of MOD to be of type synsem rather than type
; local, since some modifiers (like vocative NPs) only modify SLASH-empty
; phrases, which means they have to have access to the NONLOC feature of
; the phrase they modify.
head_min := *avm*.
; DPF 16-Aug-99 - Added INV to head type, since need to distinguish root
; from non-root phrases as output from parser, and since inverted S's now have
; MSG empty, we have to require all non-root fragments to be INV -.
; DPF 09-sept-07 - With the departure of messages, we can now push INV back
; down to verb_or_frag.
; TAM likewise can be pushed down to non-noun subtype.
; AUX has to be here as long as we want PPs to avoid attaching to aux-headed
; phrases, since they can also modify nouns.
;; DPF 2013-06-14 - Need to pull INV back up to `head' since the hd-cmp_2_c
;; rule restricts the head to be non-inverted if a verb, but we also want to
;; use this rule for other head types, as in |further than Browne from Abrams|
;; DPF 2020-03-25 - Re 09-sept-07: In extracted-adjunct type, TAM is constrained
;; for a head of type `mobile', whose subtypes include `noun'. So pulling TAM
;; back up to `head'.
;;
head := head_min &
[ MOD *list*,
PRD bool,
MINORS minors_basic,
AUX luk,
INV bool,
TAM tam_min,
CASE case ].
valence_min := *avm*.
valence := valence_min &
[ SUBJ *list*,
SPR *list*,
COMPS *list* ].
;; DPF 2016-11-09 - Constrain SPCMPS to be 0-1-list, to avoid endless optcomp
;; spinning in the cases where the grammar neglects to constrain SPCMPS.
valence_sp := valence &
[ SPEC *list*,
SPCMPS 0-1-list ].
; KCMP is a pointer to a distinguished ('key') complement synsem, used in
; generalizations within the lexical type hierarchy, but not present in the
; syntax.
valence_lex := valence_sp &
[ KCMP *avm* ].
;; DPF 2015-05-06 - KCMP2 is a second-complement specification, used in the
;; definition of day-of-month lexical types.
;;
valence2_lex := valence_lex &
[ KCMP2 *avm* ].
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Head types
; POSS is akin to CASE, distinguishing possessive-marked NPs from ordinary NPs,
; a contrast needed for e.g. the adjective "own" which can only appear in
; phrases with a possessive NP specifier. Also used in building partitive
; phrases such as in "Kim's is good."
;
poss-able := head_min &
[ POSS bool ].
disc_adverbee := head.
subst_or_func := head.
non_frag := subst_or_func.
func := non_frag.
; For small clauses in gapping
n_p_v_adv_adj_comp := head.
n_or_p_or_v_or_adv := n_p_v_adv_adj_comp.
n_or_p_or_v_or_comp := n_p_v_adv_adj_comp.
n_or_p_or_v := n_or_p_or_v_or_adv & n_or_p_or_v_or_comp & subst.
p_or_adv_or_dadv := head.
adv_or_dadv := p_or_adv_or_dadv.
;; DPF 2022-05-28 - Let's try reducing the massive ambiguity in measure phrases
;; by adding a marker for modnp so we can block degsp-H analysis of
;; "[21 cents] [a share]"
n_or_a_or_p_or_adv := head &
[ --MODNP bool ].
v_or_g_or_dadv := head.
p_adj_adv_or_comp := n_p_v_adv_adj_comp.
p_or_adv_or_comp := p_adj_adv_or_comp.
p_or_v_or_adv := n_or_p_or_v_or_adv.
p_or_basic_adv := head.
p_or_adv := p_or_v_or_adv & p_or_adv_or_dadv &
n_or_p_or_adv & p_or_adv_or_comp & p_or_basic_adv.
adv_or_comp := p_or_adv_or_comp.
subst_or_adv_or_frag := head.
subst_or_adv := subst_or_adv_or_frag.
subst := non_frag & subst_or_adv.
v_or_g_or_p := v_or_g_or_a_or_p.
;; DPF 2022-05-16 - Pulled introduction of VFORM up from basic_verb_or_frag
;; so we can have lexical passives such as "muddled" that constrain VFORM pas
;; but can still be HEAD v_or_a.
;;
non_noun_or_frag := head &
[ VFORM vform ].
non_noun_or_adv := non_noun_or_frag.
non_noun := subst & non_noun_or_adv.
v_or_a_or_p := non_noun.
; Used for reordering first and second complements
verbal_or_a_or_p := non_noun.
; Used by copula in "it is that Kim arrived" or "it is because Kim arrived"
verbal_or_p := verbal_or_a_or_p & vc_add & n_or_p_or_v_or_comp.
; To differentiate |our idea is [that] he should stay|
comp_or_p := verbal_or_p.
a_or_p_or_adv := n_or_a_or_p_or_adv.
a_or_adv_or_det := n_or_a_or_adv_or_det.
a_or_adv := a_or_adv_or_det & n_p_v_adv_adj_comp & a_or_p_or_adv.
n_or_p_or_frag := head.
bare_able := head &
[ --BARE luk,
--BARED bool ].
n_or_p_or_a := n_or_a_or_p_or_adv & bare_able.
n_or_p_or_adv := n_or_a_or_p_or_adv.
;v_or_n_or_g_or_a := subst.
;n_or_v_or_a := v_or_n_or_g_or_a.
n_or_v_or_a := subst.
n_or_p := n_or_p_or_v & n_or_p_or_frag & n_or_p_or_a & n_or_p_or_adv.
;n_or_a := n_or_v_or_a.
; DPF 3-apr-05 - Added poss-able to allow for locative inversion using sb-hd_mc_c.
a_or_p := v_or_a_or_p & n_or_p_or_a & poss-able & a_or_p_or_adv.
v_or_p := v_or_a_or_p & n_or_p_or_v & v_or_g_or_p & p_or_v_or_adv.
n_or_v := n_or_p_or_v & n_or_v_or_a.
; For heads that can take comparative degree specifiers
comp_spec_able := head.
; Exclude prepositions for target of most comp. deg. specifiers
n_or_a_or_adv_or_det := comp_spec_able.
n_or_adv := n_or_a_or_adv_or_det.
basic_nom_or_ttl := poss-able & head.
nom_or_ttl := basic_nom_or_ttl.
noun_or_ttl := nom_or_ttl.
; For constraint on head-spec rule.
; DPF 8-aug-07 - The hack feature --BARE is used to prevent underspecified
; mass-count nouns from undergoing the robust bare-pl-sg rule, since they
; can already undergo the ordinary bare-plural rule.
;
n_or_a_or_det := n_or_a_or_adv_or_det & bare_able.
;; det_or_adv allows supertype to include numerals and measure NPs, which
;; both take e.g. "over" as a degree specifier
det_or_adv := func & a_or_adv_or_det.
det_or_partn := non_frag.
adj_or_det := n_or_a_or_det.
det := det_or_adv & poss-able & adj_or_det & det_or_partn.
adverbee_or_modnp := head.
; Adverbs modify adverbees.
adverbee := adverbee_or_modnp.
v_or_g_or_a_or_p := subst.
;; DPF 2022-05-23 - Added CNTRCTD to distinguish contracted auxiliaries, so
;; rules can choose to avoid them, such as cl_adv_c to block spurious
;; "'s clothing proved adequate". The feature needs to be this high in the
;; subst hierarchy since the v_mod rules can apply to a range of head types
;; but we still want to exclude contracted verb heads.
;;
v_or_g_or_a_or_n := subst &
[ CNTRCTD bool ].
v_or_g_or_a := v_or_g_or_a_or_p & v_or_g_or_a_or_n.
;; DPF 2022-12-26 - Pushed verb_or_frag down to subtype verb, to avoid having
;; vocatives modify CPs: "we admire madness -- that we arise"
;v_or_g := v_or_g_or_a & v_or_g_or_p & verb_or_frag & v_or_g_or_dadv.
v_or_g := v_or_g_or_a & v_or_g_or_p & v_or_g_or_dadv.
nominal_or_verbal := subst.
nominal := nominal_or_verbal & poss-able.
verbal := v_or_g & nominal_or_verbal & adverbee & disc_adverbee &
verbal_or_conj-s.
; mobile things can be extracted in the complement-extraction rule, from which
; we exclude nominative-case NPs by not making all NPs mobile.
mobile := head.
; DPF 7-mar-05 - Make subtype of nominal_or_verbal so PPs can modify APs,
; in order to admit "towns not obvious on the map"
; DPF 12-feb-08 - Add a_or_adv to adj_or_intadj so we can coordinate
; integer adjectives as in "six or seven cats"
;; DPF 2017-09-23 - Let's pull the CARDINAL feature up to intsort,
;; so we can mark normal adj as CARDINAL -, to prevent coord of
;; |*big and three cats| while allowing |first and oldest cat|. Will still
;; also get |third and older cat| (okay) but also |?big and third cat|.
;; DPF 2017-10-19 - Changed parent type from n_or_v_or_a to v_or_a so we can
;; include integer adjs as in |we made him third|
;;
v_or_a := v_or_g_or_a & n_or_v_or_a & verbal_or_a_or_p.
adj_or_intadj := v_or_a & a_or_p & adverbee & nominal_or_verbal &
a_or_adv & intsort.
basic_adj := mobile & adj_or_intadj.
; For positive adjectives
adj := basic_adj & adj_or_det & p_adj_adv_or_comp &
[ CARDINAL - ].
; prep_or_modnp := p_or_adv.
prep_or_modnp := p_or_adv & v_or_p.
; DPF 26-Jul-99 - Made prep not inherit from adverbee, since in general
; don't want adverbs modifying PPs.
; DPF 17-Dec-01 - But need to have at least some adverbs modifying PPs:
; "The books currently in the store include ..."
basic_prep := a_or_p & n_or_p & prep_or_modnp & comp_or_p &
adverbee.
prep := basic_prep & mobile & comp_spec_able.
prep_nonmob := basic_prep.
; For result of npadv_mod non-branching rule
; Need mobile for "where did i put it"
modnp := n_or_p & nominal & prep_or_modnp & mobile & a_or_adv &
adverbee_or_modnp & det_or_adv.
;; DPF 2020-07-12 - At long last, add feature to explicitly control whether a
;; a noun can be a nonhead in a N-N compound. We've been trying until now to
;; control this via other properties such as the (vaguely defined) NORM
;; feature, but that feature is also used in other constructions such as
;; vocatives, where no generalization emerges about nouns across those
;; constructions. So let's go down the road of construction-specific features.
;; DPF 2022-04-23 - Add parent n_or_p so prd copula can appear with inverted
;; locative expressions such as "in the corner was standing an old coatrack"
;; DPF 2022-06-03 - Add feature to prevent some nouns from serving as titles,
;; e.g. to block title "hire" in "[[Will Abrams] hire] Browne?"
;; DPF 2022-06-11 - Add feature --COUNTED to enable blocking of "fifty ten cats"
;; DPF 2023-01-07 - Add feature --VOC to enable blocking of vocative rule
;;
supnoun := nominal & n_or_v & n_or_a_or_det & n_or_p &
[ --CPDNHD bool,
--TITLE-CPD bool,
--COUNTED bool,
--VOCATIVE bool ].
noun_or_nomger_or_nadj := supnoun.
; for both nominal and verbal gerunds
noun_or_nomger := basic_nom_or_ttl & noun_or_nomger_or_nadj.
noun_or_gerund := supnoun.
; For nouns that can undergo conversion to adj, like "eared" or "blue"
;noun_adj := supnoun.
noun_adj := subst.
basic_noun := noun_or_nomger & n_or_adv & noun_or_gerund & noun_or_ttl &
v_or_g_or_a_or_n.
;; DPF 2017-10-05 - Non-partitive nouns, overlapping in effect with
;; nonpart_nom_rel
non_partn := noun_or_nomger_or_nadj.
noun := basic_noun & non_partn.
noun_adjable := basic_noun & noun_adj & mobile & non_partn.
; DPF 10-Apr-02 - Added n_or_p since want to allow gerunds to undergo hdn_optcmp_c
; rule, to get e.g."way of saying that Kim arrives"
; DPF 19-may-09 - Now that we make verbal gerunds as phrases, try dropping
; v_or_g supertype, to avoid spurious ambiguity for e.g. "not hiring kim".
; May lead to larger clean-up of head types. FIX?
; gerund := noun_or_gerund & adverbee & v_or_g & n_or_p & disc_adverbee.
; DPF 23-jun-09 - Re 19-may-09: But gerunds still need to inherit verbal
; features VFORM, INV, TAM, so add super-type verb_or_frag.
;;
comp_or_gerund := head.
gerund := noun_or_gerund & adverbee & disc_adverbee & comp_or_gerund &
basic_verb_or_frag.
gerund_full := gerund & vc_add.
; For derived entries like the "eyed" of "wide-eyed"
n-ed := noun_adj & basic_nom_or_ttl & non_partn.
; For run-on sentences.
verb_or_conj_or_frag := head.
verb_or_conj-s := verb_or_conj_or_frag.
; For no-copula and run-on sentences
;; DPF 2022-05-16 - Pushed introduction of VFORM up to parent non_noun_or_frag,
;; so we can have lexical passives such as "muddled" that constrain VFORM pas
;; but can still be HEAD v_or_a.
basic_verb_or_frag := non_noun_or_frag & verb_or_conj_or_frag.
;; For target of ordinary "not"
verb_or_frag_or_comp := basic_verb_or_frag.
; Enable exclusion of spurious ambiguity for gerund "not hiring Kim"
verb_or_frag := verb_or_frag_or_comp.
#|
; Allow conjunction reduction and gapped sentences
verb_or_cr := verb_or_frag.
verbal_or_cr := non_noun.
cr := verb_or_cr & verbal_or_cr & adverbee_or_modnp.
|#
; Added poss-able to allow s's as subjects of quoting verbs, as in
; 'Kim arrived said Sandy.'
;; DPF 2014-10-20 - Added comp_spec_able to allow |was more/most admired|
;;
verb := verbal & v_or_p & n_or_v & v_or_a & poss-able & mobile &
verb_or_conj-s & comp_spec_able & verbal_or_p & verb_or_frag.
addinfl := *avm* &
[ ADDPN pn,
ADDTAM tam_min ].
vc_add := subst &
[ --ADDIN addinfl ].
; DPF 24-jul-05 - Removed nominal as supertype of comp since we use a rule to
; convert NP-subject-taking VPs into CP-taking ones, with distinct semantic
; linking.
;; 2012-12-19 - Added PRD - to prevent spurious coord of conjoined that-S
;; with prdp (depictives), as in wsj20d:22064023.
;;
comp := verbal & poss-able & mobile & adv_or_comp & comp_or_gerund & comp_or_p &
verb_or_frag_or_comp &
[ MOD *anti_list*,
PRD - ].
; For adverb fragments, including speech-relevant adverbs
adv_or_no_head := head.
basic_adv := det_or_adv & non_noun_or_adv & subst_or_adv & adv_or_no_head.
; 'not' and closed class of other adverbs that can be added to aux COMPS list
; via 'adv_addition' lexical rule.
basic_lexadv := basic_adv & disc_adverbee.
negadv := basic_lexadv & adverbee.
lexadv := basic_lexadv & a_or_adv & p_or_basic_adv.
reg_adv := basic_adv & disc_adverbee & p_or_adv & adv_or_comp &
a_or_adv & n_or_adv & adv_or_dadv.
adv := reg_adv & mobile.
adv_nonmob := reg_adv.
deg_adv := basic_adv.
root-marker := v_or_g_or_dadv & adverbee & adv_or_dadv & non_frag.
wh_adv := func.
; For titles like "Mr" or "Professor", not to be confused with nouns
ttl := func & noun_or_ttl & n_or_a_or_p_or_adv.
detspec := func.
; Hack used in blocking unwanted affixes
no_head := non_frag & adv_or_no_head & bare_able.
; Used for binary bridging rule
bridge_head := head.
; Types for punctuation and string boundaries.
; **FIX**
punct_hd := head.
punct_pair_hd := func.
quote_punct_hd := punct_pair_hd.
dq_punct_hd := quote_punct_hd.
sq_punct_hd := quote_punct_hd.
lq_punct_hd := sq_punct_hd.
lp_punct_hd := punct_pair_hd.
rp_punct_hd := punct_pair_hd.
nomger := noun_or_nomger & nom_or_ttl & non_partn.
nomger_mob := nomger & mobile & [ CASE non_nom ].
; The following types are used to enable strict subcategorization (the usual
; case)
;noun_mob := noun & mobile & [ CASE non_nom ].
noun_mob := noun & mobile.
noun_nonmob := noun.
basic_partn := basic_noun & det_or_partn.
partn := basic_partn.
partn_mobile := partn & mobile.
gerund_mob := gerund & mobile & [ CASE non_nom ].
; For 'long' of 'for too long'
grad_n := basic_partn & mobile & a_or_adv_or_det &
[ CASE non_nom ].
interv_n := noun.
; For types that can undergo number-partitive:'five of them','ten feet of rope'
adj_partitive := non_frag.
intadj_or_superl := adj_partitive.
intadj_or_comp := adj_partitive & intsort.
compar_or_superl_adj := basic_adj.
superl_adj := compar_or_superl_adj & intadj_or_superl.
compar_adj := compar_or_superl_adj & intadj_or_comp.
; For measure-NPs
noun_meas := noun & adj_partitive.
; For tag questions, which can't be verbal in order to avoid being root S's.
tagaux := non_frag & verb_or_frag.
conj_or_frag := verb_or_conj_or_frag.
; For fragments of all kinds
; DPF 12-apr-08 - Try adding disc_adverbee, to allow "Yes, and Kim."
frag := subst_or_func & verb_or_frag & conj_or_frag & disc_adverbee & mobile &
subst_or_adv_or_frag &
[ MOD < > ].
; For run-on sentence conjunction (to avoid spurious modifier ambiguity)
;; conj-s := verbal & verb_or_conj-s.
verbal_or_conj-s := basic_verb_or_frag.
conj-s := verb_or_conj-s & verbal_or_conj-s & disc_adverbee.
;; DPF 2020-03-24 - Added type as per trunk.
conj-s_full := conj-s & vc_add.
; For 'foreign' words, typically marked as italicized
;; DPF 2019-11-23 - Change to inherit from subst_or_func rather than just head,
;; so punct clitics will attach.
fw := subst_or_func & [ MOD < > ].
; Speech filler words in word lattices26009
spfill := head & [ MOD < > ].
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Sorts for atomic values
strict_sort := *sort*.
; Person and number represented as atomic sorts, with gender a separate attrib
; relevant for 3sg, and where count/mass distinction only for 3sg neuter.
pn := *sort*.
; 5 cells
-1s := pn.
-2s := pn.
-3s := pn.
-1p := pn.
-2p := pn.
-3p := pn.
; 4 cells
-1 := -1s & -1p.
-2 := -2s & -2p.
-3 := -3s & -3p.
-12s := -1s & -2s.
-13s := -1s & -3s.
-23s := -2s & -3s.
-12p := -1p & -2p.
-13p := -1p & -3p.
-23p := -2p & -3p.
-1s-2p := -1s & -2p.
-1s-3p := -1s & -3p.
-2s-1p := -2s & -1p.
-2s-3p := -2s & -3p.
-3s-1p := -3s & -1p.
-3s-2p := -3s & -2p.
;3 cells
sg := -12p & -13p & -23p.
pl := -12s & -13s & -23s.
1+2s := -3s-2p & -23p & -3.
1+2p := -2s-3p & -23s & -3.
1+3s := -2s-3p & -23p & -2.
1+3p := -3s-2p & -23s & -2.
2+1s := -3s-1p & -13p & -3.
2+1p := -1s-3p & -13s & -3.
2+3s := -1s-3p & -13p & -1.
2+3p := -3s-1p & -13s & -1.
3+1s := -2s-1p & -12p & -2.
3+1p := -1s-2p & -12s & -2.
3+2s := -1s-2p & -12p & -1.
3+2p := -2s-1p & -12s & -1.
12s+3p := -3s-1p & -3s-2p & -12p.
13s+2p := -2s-1p & -2s-3p & -13p.
23s+1p := -1s-2p & -1s-3p & -23p.
1s+23p := -2s-1p & -3s-1p & -23s.
2s+13p := -1s-2p & -3s-2p & -13s.
3s+12p := -1s-3p & -2s-3p & -12s.
; 2 cells
1 := 1+2s & 1+2p & 1+3s & 1+3p.
2 := 2+1s & 2+1p & 2+3s & 2+3p.
3 := 3+1s & 3+1p & 3+2s & 3+2p.
12s := 12s+3p & 1+2s & 2+1s & sg.
13s := 13s+2p & 1+3s & 3+1s & sg.
23s := 23s+1p & 2+3s & 3+2s & sg.
12p := 3s+12p & 1+2p & 2+1p & pl.
23p := 1s+23p & 2+3p & 3+2p & pl.
13p := 2s+13p & 1+3p & 3+1p & pl.
1s+2p := 1+2p & 2+1s & 13s+2p & 1s+23p.
1s+3p := 1+3p & 3+1s & 12s+3p & 1s+23p.
2s+1p := 2+1p & 1+2s & 2s+13p & 23s+1p.
2s+3p := 2+3p & 3+2s & 2s+13p & 12s+3p.
3s+1p := 3+1p & 1+3s & 3s+12p & 23s+1p.
3s+2p := 3+2p & 2+3s & 3s+12p & 13s+2p.
1s := 1 & 12s & 13s & 1s+2p & 1s+3p.
2s := 2 & 12s & 23s & 2s+1p & 2s+3p.
3s := 3 & 13s & 23s & 3s+1p & 3s+2p.
1p := 1 & 12p & 13p & 2s+1p & 3s+1p.
2p := 2 & 12p & 23p & 1s+2p & 3s+2p.
3p := 3 & 13p & 23p & 2s+3p & 1s+3p.
; For unspecified pernum, to constrain generator introducing e.g. 'am' when
; an argument position is unbound.
unsp_pernum := pn.
gender := *sort*.
real_gender := gender.
animate := real_gender.
masc := animate.
fem := animate.
neut := real_gender.
andro := animate.
andro1 := animate.
; The following two are used in tag questions to block mismatched expletive
; pronouns in main clause and tag, since we can't block on unified index
; values, since in general tag questions introduce a pronoun which must have
; a unique index bound by the quantifier it (now) introduces lexically.
no_gend_it := gender.
no_gend_there := gender.
png_min := *avm*.