Skip to content

Commit

Permalink
languages: sync with CLDR
Browse files Browse the repository at this point in the history
- add missing definitions present in CLDR
- use CLDR as plural rules instead of default
  • Loading branch information
nijel committed Jan 29, 2024
1 parent 3ad3735 commit d4ada97
Show file tree
Hide file tree
Showing 122 changed files with 2,489 additions and 134 deletions.
7 changes: 5 additions & 2 deletions PLURALS_DIFF.md

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions languages.csv
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ den,Slave (Athapascan),2,n != 1
dgr,Dogrib,2,n != 1
din,Dinka,2,n != 1
dnk,Dengka,2,n != 1
doi,Dogri,2,n != 1
doi,Dogri,2,n > 1
dry,Darai,2,n != 1
dsb,Lower Sorbian,4,(n % 100 == 1) ? 0 : ((n % 100 == 2) ? 1 : ((n % 100 == 3 || n % 100 == 4) ? 2 : 3))
dua,Duala,2,n != 1
Expand Down Expand Up @@ -258,6 +258,7 @@ hil,Hiligaynon,2,n != 1
hit,Hittite,2,n != 1
hmn,Hmong,2,n != 1
hne,Chhattisgarhi,2,n != 1
hnj,Hmong Njua,1,0
ho,Hiri Motu,2,n != 1
hr,Croatian,3,n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2
hrx,Hunsrik,2,n != 1
Expand Down Expand Up @@ -344,6 +345,7 @@ lez,Lezghian,2,n != 1
lfn,Lingua Franca Nova,2,n != 1
lg,Luganda,2,n != 1
li,Limburgish,2,n != 1
lij,Ligurian,2,n != 1
lki,Laki,2,n != 1
lkt,Lakota,1,0
ln,Lingala,2,n > 1
Expand Down Expand Up @@ -453,6 +455,7 @@ pam,Pampanga,2,n != 1
pap,Papiamento,2,n != 1
pau,Palauan,2,n != 1
pbb,Páez,2,n != 1
pcm,Pidgin (Nigeria),2,n > 1
peo,Persian (Old),2,n != 1
phn,Phoenician,2,n != 1
pi,Pali,2,n != 1
Expand Down Expand Up @@ -613,7 +616,7 @@ uz,Uzbek,2,n != 1
uz_Latn,Uzbek (latin),2,n != 1
vai,Vai,2,n != 1
ve,Venda,2,n != 1
vec,Venetian,2,n != 1
vec,Venetian,3,(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)
vi,Vietnamese,1,0
vls,Flemish (West),2,n != 1
vo,Volapük,2,n != 1
Expand Down
23 changes: 23 additions & 0 deletions scripts/lint
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def parse_csv(name):

languages = parse_csv("languages.csv")
aliases = parse_csv("aliases.csv")
cldr = parse_csv("cldr.csv")
default_countries = parse_csv("default_countries.csv")

for alias in aliases:
if not alias.islower():
Expand All @@ -35,3 +37,24 @@ if missing:
overlap = set(languages.keys()) & set(aliases.keys())
if overlap:
raise ValueError(f"Overlaping languages and aliases: {overlap}")

missing = set(cldr.keys()) - set(languages.keys())
# Remove aliases (these use lower case)
missing -= {miss for miss in missing if miss.lower() in aliases}
# Remove default countries (these use lower case)
missing -= {miss for miss in missing if miss.lower() in default_countries}
# Remove unwanted languages
missing -= {"ar_001"}
if missing:
raise ValueError(f"Missing from CLDR: {missing}")

# Validate CLDR plural rules match
exceptions = {"es", "it", "ca", "es_MX", "es_419"}
matching = (set(cldr.keys()) & set(languages.keys())) - exceptions
for match in matching:
plural_our = languages[match][3]
plural_cldr = cldr[match][3]
if plural_our == "n != 1" and plural_cldr != "n != 1":
raise ValueError(
f"Mismatching plural form for {match}: {plural_our!r} != {plural_cldr!r}"
)
3 changes: 3 additions & 0 deletions weblate_language_data/language_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@
"hmn",
"hmo",
"hne",
"hnj",
"ho",
"hr",
"hrv",
Expand Down Expand Up @@ -556,6 +557,7 @@
"lfn",
"lg",
"li",
"lij",
"lim",
"lin",
"lit",
Expand Down Expand Up @@ -717,6 +719,7 @@
"pap",
"pau",
"pbb",
"pcm",
"peo",
"per",
"phn",
Expand Down
33 changes: 30 additions & 3 deletions weblate_language_data/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@
# or other variant.
_("Dogri"),
2,
"n != 1",
"n > 1",
),
(
"dry",
Expand Down Expand Up @@ -2348,6 +2348,15 @@
2,
"n != 1",
),
(
"hnj",
# Translators: Language name for ISO code "hnj". The parenthesis clarifies
# variant of the language. It could contain a region, age (Old, Middle, ...)
# or other variant.
_("Hmong Njua"),
1,
"0",
),
(
"ho",
# Translators: Language name for ISO code "ho". The parenthesis clarifies
Expand Down Expand Up @@ -3122,6 +3131,15 @@
2,
"n != 1",
),
(
"lij",
# Translators: Language name for ISO code "lij". The parenthesis clarifies
# variant of the language. It could contain a region, age (Old, Middle, ...)
# or other variant.
_("Ligurian"),
2,
"n != 1",
),
(
"lki",
# Translators: Language name for ISO code "lki". The parenthesis clarifies
Expand Down Expand Up @@ -4103,6 +4121,15 @@
2,
"n != 1",
),
(
"pcm",
# Translators: Language name for ISO code "pcm". The parenthesis clarifies
# variant of the language. It could contain a region, age (Old, Middle, ...)
# or other variant.
_("Pidgin (Nigeria)"),
2,
"n > 1",
),
(
"peo",
# Translators: Language name for ISO code "peo". The parenthesis clarifies
Expand Down Expand Up @@ -5549,8 +5576,8 @@
# variant of the language. It could contain a region, age (Old, Middle, ...)
# or other variant.
_("Venetian"),
2,
"n != 1",
3,
"(n == 1) ? 0 : ((n != 0 && n % 1000000 == 0) ? 1 : 2)",
),
(
"vi",
Expand Down
20 changes: 19 additions & 1 deletion weblate_language_data/locale/ab/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Weblate Language Data\n"
"Report-Msgid-Bugs-To: https://github.com/WeblateOrg/language-data/issues/\n"
"POT-Creation-Date: 2024-01-29 12:57+0100\n"
"POT-Creation-Date: 2024-01-29 13:13+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Automatically generated\n"
"Language-Team: none\n"
Expand Down Expand Up @@ -1570,6 +1570,12 @@ msgstr ""
msgid "Chhattisgarhi"
msgstr ""

#. Translators: Language name for ISO code "hnj". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Hmong Njua"
msgstr ""

#. Translators: Language name for ISO code "ho". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2086,6 +2092,12 @@ msgstr ""
msgid "Limburgish"
msgstr ""

#. Translators: Language name for ISO code "lij". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Ligurian"
msgstr ""

#. Translators: Language name for ISO code "lki". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2740,6 +2752,12 @@ msgstr ""
msgid "Páez"
msgstr ""

#. Translators: Language name for ISO code "pcm". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Pidgin (Nigeria)"
msgstr ""

#. Translators: Language name for ISO code "peo". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down
21 changes: 20 additions & 1 deletion weblate_language_data/locale/af/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Weblate Language Data\n"
"Report-Msgid-Bugs-To: https://github.com/WeblateOrg/language-data/issues/\n"
"POT-Creation-Date: 2024-01-29 12:57+0100\n"
"POT-Creation-Date: 2024-01-29 13:13+0100\n"
"PO-Revision-Date: 2023-07-12 09:53+0000\n"
"Last-Translator: Gideon Wentink <[email protected]>\n"
"Language-Team: Afrikaans <https://hosted.weblate.org/projects/weblate/"
Expand Down Expand Up @@ -1623,6 +1623,12 @@ msgstr "Hmong"
msgid "Chhattisgarhi"
msgstr ""

#. Translators: Language name for ISO code "hnj". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Hmong Njua"
msgstr ""

#. Translators: Language name for ISO code "ho". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2169,6 +2175,13 @@ msgstr ""
msgid "Limburgish"
msgstr "Limburgs"

#. Translators: Language name for ISO code "lij". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
#, fuzzy
msgid "Ligurian"
msgstr "Litaus"

#. Translators: Language name for ISO code "lki". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2863,6 +2876,12 @@ msgstr "Palauaans"
msgid "Páez"
msgstr ""

#. Translators: Language name for ISO code "pcm". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Pidgin (Nigeria)"
msgstr ""

#. Translators: Language name for ISO code "peo". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down
20 changes: 19 additions & 1 deletion weblate_language_data/locale/afh/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Weblate Language Data\n"
"Report-Msgid-Bugs-To: https://github.com/WeblateOrg/language-data/issues/\n"
"POT-Creation-Date: 2024-01-29 12:57+0100\n"
"POT-Creation-Date: 2024-01-29 13:13+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Automatically generated\n"
"Language-Team: none\n"
Expand Down Expand Up @@ -1570,6 +1570,12 @@ msgstr ""
msgid "Chhattisgarhi"
msgstr ""

#. Translators: Language name for ISO code "hnj". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Hmong Njua"
msgstr ""

#. Translators: Language name for ISO code "ho". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2086,6 +2092,12 @@ msgstr ""
msgid "Limburgish"
msgstr ""

#. Translators: Language name for ISO code "lij". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Ligurian"
msgstr ""

#. Translators: Language name for ISO code "lki". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2740,6 +2752,12 @@ msgstr ""
msgid "Páez"
msgstr ""

#. Translators: Language name for ISO code "pcm". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Pidgin (Nigeria)"
msgstr ""

#. Translators: Language name for ISO code "peo". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down
20 changes: 19 additions & 1 deletion weblate_language_data/locale/ang/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ msgid ""
msgstr ""
"Project-Id-Version: Weblate Language Data\n"
"Report-Msgid-Bugs-To: https://github.com/WeblateOrg/language-data/issues/\n"
"POT-Creation-Date: 2024-01-29 12:57+0100\n"
"POT-Creation-Date: 2024-01-29 13:13+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Automatically generated\n"
"Language-Team: none\n"
Expand Down Expand Up @@ -1570,6 +1570,12 @@ msgstr ""
msgid "Chhattisgarhi"
msgstr ""

#. Translators: Language name for ISO code "hnj". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Hmong Njua"
msgstr ""

#. Translators: Language name for ISO code "ho". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2086,6 +2092,12 @@ msgstr ""
msgid "Limburgish"
msgstr ""

#. Translators: Language name for ISO code "lij". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Ligurian"
msgstr ""

#. Translators: Language name for ISO code "lki". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down Expand Up @@ -2740,6 +2752,12 @@ msgstr ""
msgid "Páez"
msgstr ""

#. Translators: Language name for ISO code "pcm". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
msgid "Pidgin (Nigeria)"
msgstr ""

#. Translators: Language name for ISO code "peo". The parenthesis clarifies
#. variant of the language. It could contain a region, age (Old, Middle, ...)
#. or other variant.
Expand Down
Loading

0 comments on commit d4ada97

Please sign in to comment.