Skip to content

Commit

Permalink
[ca] improve rules, more words
Browse files Browse the repository at this point in the history
  • Loading branch information
jaumeortola committed Jan 11, 2025
1 parent cb20c95 commit 207f988
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Mompó Mompó NCNSP0
NESE NESE NCFP000
Nel·lo Nel·lo NPCNSP0
No-Do No-Do NPMSO00
Némirovsky Némirovsky NPCNSP0
OIEC OIEC NPFSO00
Oreo Oreo NPCNO00
PKK PKK NPMSO00
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10815,7 +10815,7 @@ Copyright (C) 2012 Jaume Ortolà i Font
</rule>
<rule>
<pattern>
<token regexp="yes">ha</token>
<token>ha</token>
<marker>
<token postag="N.*" postag_regexp="yes" inflected="yes">tema</token>
</marker>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#Catalan multiwords file used for chunking
#separatorRegExp=[\t;]
Bernat Dedéu;NPMSSP0
Mohammed bin Salman;NPMSSP0
coop.;NCFN00
Costa Blanca;NPFSG00
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ Mompó
NESE
Nel·lo
No-Do
Némirovsky
OIEC
Oreo
PKK
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="2014">Divendres, 15 d'agost del <marker>2013</marker>.</example>
</rule>
<rule id="CA_DATE_WEEKDAY_CURRENTYEAR" name="Data incorrecta (any actual)">
<antipattern>
<token regexp="yes">&mesos_any;|&mesos_any_abrev;</token>
<token>d'</token>
<token postag="DD.*" postag_regexp="yes"/>
<token>any</token>
</antipattern>
<antipattern>
<marker>
<token regexp="yes">&mesos_any;|&mesos_any_abrev;</token>
Expand Down Expand Up @@ -1683,6 +1689,23 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example correction="calor extrema">Ser pobre és el factor determinant de les morts per <marker>calor extrem</marker>.</example>
</rule>
</rulegroup>
<rulegroup id="LLUM_MASCULI" name="la llum -> el llum" default="temp_off">
<rule>
<pattern>
<marker>
<token postag="_GN_F." postag_regexp="yes" regexp="yes">llums?</token>
</marker>
<token regexp="yes" min="0">de|d'</token>
<token inflected="yes" regexp="yes">estacionament|curt|llarg|encreuament|carretera|posició</token>
</pattern>
<filter class="org.languagetool.rules.ca.ConvertToGenderAndNumberFilter" args="lemmaSelect:[AN].* keepOriginal:true gender:M"/>
<message>En aquest context, "llum" és masculí.</message>
<example correction="Els llums"><marker>Les llums</marker> de posició.</example>
<example correction="Els llums llargs"><marker>Les llums llargues</marker>.</example>
<example>Llums curts</example>
<example>Llums llargs</example>
</rule>
</rulegroup>
<rulegroup id="VESSANT" name="vessant (preferible en masculí)">
<antipattern>
<token>vessant</token>
Expand Down Expand Up @@ -9966,19 +9989,28 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<token regexp="yes">santedat|altesa|excel·lència</token>
</antipattern>
<antipattern>
<marker>
<token>com</token>
<token>a</token>
<token regexp="yes">[mts]e(u|us|ua|ues|va|ves)|[nv]ostr(a|es?)</token>
</marker>
<token>com</token>
<token>a</token>
<token regexp="yes">[mts]e(u|us|ua|ues|va|ves)|[nv]ostr(a|es?)</token>
</antipattern>
<antipattern>
<marker>
<token inflected="yes" regexp="yes">el|un</token>
<token inflected="yes">altre</token>
<token regexp="yes">[mts]e(u|us|ua|ues|va|ves)|[nv]ostr(a|es?)</token>
</marker>
<token inflected="yes" regexp="yes">el|un</token>
<token inflected="yes">altre</token>
<token regexp="yes">[mts]e(u|us|ua|ues|va|ves)|[nv]ostr(a|es?)</token>
</antipattern>
<rule default="temp_off">
<pattern>
<marker>
<token regexp="yes">a|de|per</token>
<token regexp="yes">[mts]eus?|[nv]ostres?<exception postag="LOC_ADV|_C_LLOC" postag_regexp="yes"/></token>
</marker>
<token postag="N.[MC].*" postag_regexp="yes"/>
</pattern>
<message>Hi falta un article.</message>
<suggestion><match no="1" regexp_match="(?iu)per" regexp_replace="pe"/>l<match no="2" regexp_match="(?iu)^.*[eu](s?)" regexp_replace="$1"/> \2</suggestion>
<example correction="dels nostres">Això és <marker>de nostres</marker> germans.</example>
<example correction="als nostres">Això va per <marker>a nostres</marker> germans.</example>
</rule>
<rule>
<pattern>
<token><exception postag="D[DAIN].*|N.*|PP3MSA00|PP3CP000" postag_regexp="yes"/><exception inflected="yes">fer</exception><exception regexp="yes">tan|cal|le</exception></token>
Expand Down Expand Up @@ -47297,6 +47329,18 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<example>una mena de presa per subjectar l'oponent</example>
<example>Les disposicions preses per apropiar-se'n.</example>
</rule>
<rule>
<pattern>
<token>tot</token>
<token>són</token>
<marker>
<token>preses</token>
</marker>
</pattern>
<message>¿Volíeu dir <suggestion>presses</suggestion> (desig de fer una cosa ràpidament)?</message>
<short>Possible confusió</short>
<example correction="presses">Ara tot són <marker>preses</marker>!</example>
</rule>
<rule>
<pattern>
<token regexp="yes" inflected="yes">decisió|mesura</token>
Expand Down Expand Up @@ -76209,6 +76253,19 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
</rulegroup>
</category>
<category id="INCORRECT_EXPRESSIONS" name="Expressions incorrectes" type="grammar">
<rule id="A_BARCELONA_ESTANT" name="a Barcelona estant -> a Barcelona -> de Barcelona estant" default="temp_off">
<pattern>
<token>a</token>
<token postag="NP..G.*" postag_regexp="yes"/>
<token>estant</token>
</pattern>
<filter class="org.languagetool.rules.AdaptSuggestionsFilter" args="none:none"/>
<message>Construcció probablement incorrecta.</message>
<suggestion>a \2</suggestion>
<suggestion>de \2 \3</suggestion>
<example correction="A Barcelona|De Barcelona estant"><marker>A Barcelona estant</marker> es veuen les illes Balears.</example>
<example correction="a Eivissa|d'Eivissa estant">Jo recordo un estiu <marker>a Eivissa estant</marker>.</example>
</rule>
<rule id="JA_VEIG" name="ja veig -> ja ho veig">
<pattern>
<token postag="SENT_START"/>
Expand Down Expand Up @@ -91532,10 +91589,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<pattern>
<token>(</token>
<marker>
<token>veure</token>
<token regexp="yes">veure|veieu</token>
</marker>
</pattern>
<message>Com a fórmula de remissió, cal dir <suggestion>vegeu</suggestion>.</message>
<message>Com a fórmula de remissió, cal usar la forma en imperatiu <suggestion>vegeu</suggestion>.</message>
<example correction="vegeu">Maquiavel (<marker>veure</marker> la cita).</example>
<example>com a persona (veure's a un mateix com a persona)</example>
</rule>
Expand Down Expand Up @@ -103656,6 +103713,21 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
<suggestion><match no="3" postag="(V.*)" postag_regexp="yes" postag_replace="$1">sentir</match></suggestion>
<example correction="sentir">No es podia <marker>escoltar</marker> res a 70 m.</example>
</rule>
<rule default="temp_off">
<pattern>
<token regexp="yes">el|l</token>
<token>que</token>
<token min="0" max="2" regexp="yes">la|gent|les|persones|votants|els|la|persona</token>
<token inflected="yes">voler</token>
<marker>
<token>escoltar</token>
</marker>
</pattern>
<message>&msg_escoltar_sentir;</message>
<suggestion>sentir</suggestion>
<example correction="sentir">Només diu als votants el que la gent vol <marker>escoltar</marker>.</example>
<example correction="sentir">La gent és molt manipulable, sobretot quant li diuen el que vol <marker>escoltar</marker>.</example>
</rule>
</rulegroup>
<rule id="TOTA_VEGADA_QUE" name="tota vegada que">
<pattern>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -719,4 +719,5 @@ de pura cepa=de soca-rel
pura cepa=de soca-rel
caça bombarders|caça-bombarders|caçabombarders=caces bombarders Forma de plural adequada.
caçabombarder|caça-bombarder=caça bombarder S'escriu separat sense guionet.
costa de marfil|costa de vori=Costa d'Ivori Nom de país.
costa de marfil|costa de vori=Costa d'Ivori Nom de país.
famílies sòcies=famílies associades "Associat" és l'adjectiu corresponent al substantiu "soci".

0 comments on commit 207f988

Please sign in to comment.