Skip to content

Commit

Permalink
Merge pull request #1165 from globalwordnet/issue-1134
Browse files Browse the repository at this point in the history
Remove duplicates
  • Loading branch information
jmccrae authored Jan 24, 2025
2 parents 99c82fb + 9fe9992 commit 4dd3205
Show file tree
Hide file tree
Showing 47 changed files with 400 additions and 832 deletions.
6 changes: 6 additions & 0 deletions scripts/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def main():
instances = set()
ilis = set()
wikidatas = set()
definitions = set()

for synset in wn.synsets:
if synset.id[-1:] != synset.part_of_speech.value:
Expand Down Expand Up @@ -368,6 +369,11 @@ def main():
if len(defn.text) == 0:
print("ERROR: empty definition for %s" % (synset.id))
errors += 1
if defn.text in definitions:
print("ERROR: duplicate definition for %s (%s)" % (synset.id, defn.text))
errors += 1
else:
definitions.add(defn.text)

sr_counter = Counter((sr.target, sr.rel_type)
for sr in synset.synset_relations)
Expand Down
47 changes: 47 additions & 0 deletions src/deprecations.csv
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,51 @@
"ewn-92333133-n","","ewn-08986627-n","","Duplicate (#1127)"
"ewn-92333134-n","","ewn-08986776-n","","Duplicate (#1127)"
"ewn-01300469-n","i42189","ewn-01285678-n","i42122","Duplicate (#1127)"
"ewn-14381098-n","i112443","ewn-14104698-n","i110927","Duplicate (#1134)"
"ewn-02025384-v","i31846","ewn-02022224-v","i31831","Duplicate (#1134)"
"ewn-92326409-n","","ewn-07664811-n","i77096","Duplicate (#1134)"
"ewn-12144165-n","i100862","ewn-07819069-n","i78116","Duplicate (#1134)"
"ewn-01743426-v","i30434","ewn-01637966-v","i29918","Duplicate (#1134)"
"ewn-01707783-s","i9334","ewn-01705397-s","i9314","Duplicate (#1134)"
"ewn-02488985-s","i13742","ewn-02179281-s","i11936","Duplicate (#1134)"
"ewn-02463673-s","i13608","ewn-02463536-a","i13607","Duplicate (#1134)"
"ewn-00932330-v","i26262","ewn-00022092-v","i21876","Duplicate (#1134)"
"ewn-00498142-v","i24194","ewn-00158495-v","i22502","Duplicate (#1134)"
"ewn-00179205-v","i22600","ewn-00173351-v","i22577","Duplicate (#1134)"
"ewn-00572673-v","i24606","ewn-00173351-v","i22577","Duplicate (#1134)"
"ewn-86491000-n","","ewn-04771667-n","i62052","Duplicate (#1134)"
"ewn-01409889-v","i28760","ewn-01410030-v","i28761","Duplicate (#1134)"
"ewn-01607363-v","i29771","ewn-01511000-v","i29265","Duplicate (#1134)"
"ewn-07384870-n","i75426","ewn-07377946-n","i75387","Duplicate (#1134)"
"ewn-92363685-n","","ewn-14744853-n","i114358","Duplicate (#1134)"
"ewn-92363714-n","","ewn-14749988-n","i114387","Duplicate (#1134)"
"ewn-92364728-n","","ewn-14927246-n","i115401","Duplicate (#1134)"
"ewn-02228261-v","i32862","ewn-02228837-v","i32865","Duplicate (#1134)"
"ewn-01383926-n","i42585","ewn-01383685-n","i42584","Non-existant (#1134)"
"ewn-01557813-n","i43440","ewn-01550784-n","i43397","Non-existant (#1134)"
"ewn-01739337-n","i44445","ewn-01739210-n","i44444","Non-existant (#1134)"
"ewn-02635917-n","i49561","ewn-02636474-n","i49565","Duplicate (#1134)"
"ewn-02636185-n","i49563","ewn-02636474-n","i49565","Duplicate (#1134)"
"ewn-92315622-n","","ewn-05608025-n","i66342","Duplicate (#1134)"
"ewn-80147706-n","","ewn-09627401-n","i87144","Duplicate (#1134)"
"ewn-82046135-n","","ewn-09914590-n","i88827","Duplicate (#1134)"
"ewn-83218519-n","","ewn-10007754-n","i89371","Duplicate (#1134)"
"ewn-89117996-n","","ewn-81007314-n","","Duplicate (#1134)"
"ewn-08564875-n","i81893","ewn-08564718-n","i81892","Duplicate (#1134)"
"ewn-00093232-r","i18715","ewn-00034576-r","i18337","Duplicate (#1134)"
"ewn-00112752-r","i18849","ewn-00032295-r","i18320","Duplicate (#1134)"
"ewn-00431167-r","i21092","ewn-00032295-r","i18320","Duplicate (#1134)"
"ewn-00228639-r","i19701","ewn-00150568-r","i19142","Duplicate (#1134)"
"ewn-00280604-r","i20097","ewn-00256795-r","i19911","Duplicate (#1134)"
"ewn-00497644-r","i21593","ewn-00046739-r","i18410","Duplicate (#1134)"
"ewn-00497861-r","i21595","ewn-00497722-r","i21594","Duplicate (#1134)"
"ewn-92299200-n","","ewn-02700534-n","i49936","Duplicate (#1134)"
"ewn-92299310-n","","ewn-02719537-n","i50046","Duplicate (#1134)"
"ewn-92302385-n","","ewn-03237120-n","i53116","Duplicate (#1134)"
"ewn-03835818-n","i56630","ewn-03691288-n","i55794","Duplicate (#1134)"
"ewn-13062308-n","i105264","ewn-13059704-n","i105253","Non-existant (#1134)"
"ewn-13067976-n","i105293","ewn-13059704-n","i105253","Non-existant (#1134)"
"ewn-02040664-n","i46092","ewn-02040367-n","i46090","Duplicate (#1134)"
"ewn-02040983-n","i46094","ewn-02040367-n","i46090","Duplicate (#1134)"
"ewn-85556310-n","","ewn-81448123-n","","Duplicate (#1134)"
"ewn-10410299-n","i91815","ewn-"10300973-n,"i91146","Duplicate (#1150)"
48 changes: 9 additions & 39 deletions src/yaml/adj.all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101598,7 +101598,7 @@
- 01461111-a
01461461-s:
definition:
- (chiefly a direction or description in music) very soft
- (chiefly a direction or description in music) very very soft
ili: i7981
members:
- pianissimo assai
Expand Down Expand Up @@ -118581,14 +118581,15 @@
- 01707465-s
- 01707559-s
- 01707690-s
- 01707783-s
- 01707870-s
01705397-s:
definition:
- having two leaves
ili: i9314
members:
- bifoliate
- two-leaved
- two-leafed
partOfSpeech: s
similar:
- 01704867-a
Expand Down Expand Up @@ -118800,16 +118801,6 @@
partOfSpeech: s
similar:
- 01704867-a
01707783-s:
definition:
- having two leaves
ili: i9334
members:
- two-leaved
- two-leafed
partOfSpeech: s
similar:
- 01704867-a
01707870-s:
definition:
- having a single leaf
Expand Down Expand Up @@ -151938,13 +151929,17 @@
02179281-s:
definition:
- divided into two lobes
domain_topic:
- 06076105-n
example:
- a bilobate leaf
- a bifid petal
ili: i11936
members:
- bilobate
- bilobated
- bilobed
- bifid
partOfSpeech: s
similar:
- 02178581-a
Expand Down Expand Up @@ -172890,24 +172885,13 @@
- not traveled over or through
example:
- untraveled roads
- an untraversed region
ili: i13607
members:
- untraveled
- untravelled
partOfSpeech: a
similar:
- 02463673-s
02463673-s:
definition:
- not traveled over or through
example:
- an untraversed region
ili: i13608
members:
- untraversed
partOfSpeech: s
similar:
- 02463536-a
partOfSpeech: a
02463784-a:
definition:
- made neat and tidy by trimming
Expand Down Expand Up @@ -174638,7 +174622,6 @@
partOfSpeech: a
similar:
- 02488854-s
- 02488985-s
- 02489095-s
- 02489516-s
- 02489644-s
Expand Down Expand Up @@ -174675,19 +174658,6 @@
partOfSpeech: s
similar:
- 02488224-a
02488985-s:
definition:
- divided into two lobes
domain_topic:
- 06076105-n
example:
- a bifid petal
ili: i13742
members:
- bifid
partOfSpeech: s
similar:
- 02488224-a
02489095-s:
definition:
- resembling a fork; divided or separated into two branches
Expand Down
Loading

0 comments on commit 4dd3205

Please sign in to comment.