diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/CaseConversionHelper.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/CaseConversionHelper.java
index b4945500e353..c99347f49fcf 100644
--- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/CaseConversionHelper.java
+++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/CaseConversionHelper.java
@@ -70,6 +70,9 @@ public static String convertCase(Match.CaseConversion conversion, String s, Stri
case ALLLOWER:
token = token.toLowerCase();
break;
+ case NOTASHKEEL:
+ token = StringTools.removeTashkeel(token);
+ break;
default:
break;
}
diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/Match.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/Match.java
index 4999f7779a9e..9d8654a67f6a 100644
--- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/Match.java
+++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/Match.java
@@ -36,7 +36,7 @@ public final class Match {
/** Possible string case conversions. **/
public enum CaseConversion {
- NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER, PRESERVE, FIRSTUPPER
+ NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER, PRESERVE, FIRSTUPPER, NOTASHKEEL
}
public enum IncludeRange {
diff --git a/languagetool-core/src/main/java/org/languagetool/rules/patterns/MatchState.java b/languagetool-core/src/main/java/org/languagetool/rules/patterns/MatchState.java
index 03f2087e4802..65decd290679 100644
--- a/languagetool-core/src/main/java/org/languagetool/rules/patterns/MatchState.java
+++ b/languagetool-core/src/main/java/org/languagetool/rules/patterns/MatchState.java
@@ -221,6 +221,9 @@ public final String[] toFinalString(Language lang) throws IOException {
Pattern pRegexMatch = match.getRegexMatch();
String regexReplace = match.getRegexReplace();
if (pRegexMatch != null) {
+ if (lang != null && lang.getShortCode().equals("ar")) {
+ formattedString[0] = StringTools.removeTashkeel(formattedString[0]);
+ }
formattedString[0] = pRegexMatch.matcher(formattedString[0]).replaceAll(regexReplace);
}
diff --git a/languagetool-core/src/main/java/org/languagetool/tools/StringTools.java b/languagetool-core/src/main/java/org/languagetool/tools/StringTools.java
index b13b5df71730..627ca58ebf45 100644
--- a/languagetool-core/src/main/java/org/languagetool/tools/StringTools.java
+++ b/languagetool-core/src/main/java/org/languagetool/tools/StringTools.java
@@ -660,4 +660,27 @@ public static String makeWrong(String s) {
}
return s + "-";
}
+
+ /**
+ * Return str
without tashkeel characters
+ * @param str input str
+ */
+ public static String removeTashkeel(String str) {
+ String striped = str.replaceAll("["
+ + "\u064B" // Fathatan
+ + "\u064C" // Dammatan
+ + "\u064D" // Kasratan
+ + "\u064E" // Fatha
+ + "\u064F" // Damma
+ + "\u0650" // Kasra
+ + "\u0651" // Shadda
+ + "\u0652" // Sukun
+ + "\u0653" // Maddah Above
+ + "\u0654" // Hamza Above
+ + "\u0655" // Hamza Below
+ + "\u0656" // Subscript Alef
+ + "\u0640" // Tatweel
+ + "]", "");
+ return striped;
+ }
}
diff --git a/languagetool-core/src/main/resources/org/languagetool/rules/pattern.xsd b/languagetool-core/src/main/resources/org/languagetool/rules/pattern.xsd
index 8ea7b4773c4e..d139f774228d 100644
--- a/languagetool-core/src/main/resources/org/languagetool/rules/pattern.xsd
+++ b/languagetool-core/src/main/resources/org/languagetool/rules/pattern.xsd
@@ -115,6 +115,7 @@
+
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdjectiveToExclamationFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdjectiveToExclamationFilter.java
new file mode 100644
index 000000000000..b777139acf80
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdjectiveToExclamationFilter.java
@@ -0,0 +1,183 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.jetbrains.annotations.Nullable;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.SimpleReplaceDataLoader;
+import org.languagetool.rules.patterns.RuleFilter;
+import org.languagetool.tagging.ar.ArabicTagger;
+import org.languagetool.tools.ArabicWordMaps;
+
+import java.util.*;
+
+/**
+ * Filter that maps suggestion from adverb to adjective.
+ *
+ * @since 5.8
+ */
+public class ArabicAdjectiveToExclamationFilter extends RuleFilter {
+
+ public ArabicAdjectiveToExclamationFilter() {
+ this.adj2compList = loadFromPath(FILE_NAME);
+ }
+
+ private final ArabicTagger tagger = new ArabicTagger();
+ private static final String FILE_NAME = "/ar/arabic_adjective_exclamation.txt";
+ private final Map> adj2compList;
+
+ private final Map adj2comp = new HashMap() {{
+ // tri letters verb:
+ put("رشيد", "أرشد");
+ put("طويل", "أطول");
+ put("بديع", "أبدع");
+ }};
+
+
+ @Nullable
+ @Override
+ public RuleMatch acceptRuleMatch(RuleMatch match, Map arguments, int patternTokenPos, AnalyzedTokenReadings[] patternTokens) {
+
+ // This rule return only the comparative according to given adjective
+ String adj = arguments.get("adj"); // extract adjective
+ String noun = arguments.get("noun"); // the second argument
+ int adjTokenIndex;
+ try {
+ adjTokenIndex = Integer.parseInt(arguments.get("adj_pos")) - 1;
+ } catch (NumberFormatException e) {
+ throw new RuntimeException("Error parsing adj_pos from : " + arguments.get("adj_pos"), e);
+ }
+
+ // filter tokens which have a lemma of adjective
+
+ // some cases can have multiple lemmas, but only adjective lemma are used
+ List adjLemmas = tagger.getLemmas(patternTokens[adjTokenIndex], "adj");
+
+ // get comparative from Adj/comp list
+ List compList = new ArrayList<>();
+
+ for (String adjLemma : adjLemmas) {
+ // get comparative suitable to adjective
+ List comparativeList = adj2compList.get(adjLemma);
+ if (comparativeList != null) {
+ compList.addAll(comparativeList);
+ }
+ }
+
+ // remove duplicates
+ compList = new ArrayList<>(new HashSet<>(compList));
+ RuleMatch newMatch = new RuleMatch(match.getRule(), match.getSentence(), match.getFromPos(), match.getToPos(), match.getMessage(), match.getShortMessage());
+ // generate suggestion
+ List suggestionList = prepareSuggestions(compList, noun);
+ for (String sug : suggestionList) {
+ newMatch.addSuggestedReplacement(sug);
+ }
+ return newMatch;
+ }
+
+ /* prepare suggestion for a list of comparative */
+ protected static List prepareSuggestions(List compList, String noun) {
+ List sugList = new ArrayList<>();
+ for (String comp : compList) {
+ sugList.addAll(prepareSuggestions(comp, noun));
+ }
+ return sugList;
+ }
+
+ protected static List prepareSuggestions(String comp, String noun) {
+ /*
+ الحالات:
+ الاسم ليس ضميرا
+
+
+ ال كم الولد جميل==> ما أجمل الولد
+ أجمل بالولد
+
+ حالة الضمير
+
+ كم هو جميل==> ما أجمله
+ أجمل به
+
+ حالة الضفة غير الثلاثية
+ اسم:
+ كم الطالب شديد الاستيعاب
+ ما أشد استيعاب الطالب
+ أشدد باستيعابه
+
+ ضمير
+ كم هو شديد الاستيعاب
+ ما أشد استيعابه
+ أشد باستيعابه
+ */
+
+ List sugList = new ArrayList<>();
+ StringBuilder suggestion = new StringBuilder();
+ suggestion.append(comp);
+ if (noun == null || noun.isEmpty()) {
+ } else if (isPronoun(noun)) {
+ // no space adding
+ suggestion.append(ArabicWordMaps.getAttachedPronoun(noun));
+ } else {
+ //if comparative is of second form don't add a space
+ if (!comp.endsWith(" ب")) {
+ suggestion.append(" ");
+ }
+ suggestion.append(noun);
+ }
+
+ // add suggestions
+ sugList.add(suggestion.toString());
+ return sugList;
+ }
+
+ /* test if the word is an isolated pronoun */
+ private static boolean isPronoun(String word) {
+ if (word == null) {
+ return false;
+ }
+ return word.equals("هو")
+ || word.equals("هي")
+ || word.equals("هم")
+ || word.equals("هما")
+ || word.equals("أنا");
+ }
+
+ /* get corresponding attached to unattached pronoun */
+ private static String getAttachedPronoun(String word) {
+ if (word == null) {
+ return "";
+ }
+ Map isolatedToAttachedPronoun = new HashMap<>();
+ isolatedToAttachedPronoun.put("هو", "ه");
+ isolatedToAttachedPronoun.put("هي", "ها");
+ isolatedToAttachedPronoun.put("هم", "هم");
+ isolatedToAttachedPronoun.put("هن", "هن");
+ isolatedToAttachedPronoun.put("نحن", "نا");
+ return isolatedToAttachedPronoun.getOrDefault(word, "");
+ }
+
+ protected static Map> loadFromPath(String path) {
+ return new SimpleReplaceDataLoader().loadWords(path);
+ }
+
+ public static String getDataFilePath() {
+ return FILE_NAME;
+ }
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdvancedSynthesizerFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdvancedSynthesizerFilter.java
new file mode 100644
index 000000000000..6cd9e9713a27
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicAdvancedSynthesizerFilter.java
@@ -0,0 +1,44 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package org.languagetool.rules.ar.filters;
+
+
+import org.languagetool.language.Arabic;
+import org.languagetool.rules.AbstractAdvancedSynthesizerFilter;
+import org.languagetool.synthesis.Synthesizer;
+import org.languagetool.synthesis.ar.ArabicSynthesizer;
+
+/*
+ * Synthesize suggestions using the lemma from one token (lemma_from)
+ * and the POS tag from another one (postag_from).
+ *
+ * The lemma_select and postag_select attributes are required
+ * to choose one among several possible readings.
+ */
+public class ArabicAdvancedSynthesizerFilter extends AbstractAdvancedSynthesizerFilter {
+
+ private final ArabicSynthesizer synth = new ArabicSynthesizer(new Arabic());
+
+ @Override
+ protected Synthesizer getSynthesizer() {
+ return synth;
+ }
+
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDMYDateCheckFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDMYDateCheckFilter.java
new file mode 100644
index 000000000000..e9724eb5d246
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDMYDateCheckFilter.java
@@ -0,0 +1,49 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.rules.RuleMatch;
+
+import java.util.Map;
+
+/**
+ * Date filter that expects a 'date' argument in the format 'dd-mm-yyyy'.
+ *
+ * @since 6.2
+ */
+public class ArabicDMYDateCheckFilter extends ArabicDateCheckFilter {
+
+ @Override
+ public RuleMatch acceptRuleMatch(RuleMatch match, Map args, int patternTokenPos, AnalyzedTokenReadings[] patternTokens) {
+ if (args.containsKey("year") || args.containsKey("month") || args.containsKey("day")) {
+ throw new RuntimeException("Set only 'weekDay' and 'date' for " + ArabicDMYDateCheckFilter.class.getSimpleName());
+ }
+ String dateString = getRequired("date", args);
+ String[] parts = dateString.split("-");
+ if (parts.length != 3) {
+ throw new RuntimeException("Expected date in format 'dd-mm-yyyy': '" + dateString + "'");
+ }
+ args.put("day", parts[0]);
+ args.put("month", parts[1]);
+ args.put("year", parts[2]);
+ return super.acceptRuleMatch(match, args, patternTokenPos, patternTokens);
+ }
+
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateCheckFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateCheckFilter.java
new file mode 100644
index 000000000000..127cc80d2232
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateCheckFilter.java
@@ -0,0 +1,64 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.languagetool.rules.AbstractDateCheckFilter;
+
+import java.util.Calendar;
+import java.util.Locale;
+
+/**
+ * Arabic localization of {@link AbstractDateCheckFilter}.
+ *
+ * @since 6.2
+ */
+public class ArabicDateCheckFilter extends AbstractDateCheckFilter {
+
+ private final ArabicDateFilterHelper dateFilterHelper = new ArabicDateFilterHelper();
+
+
+ @Override
+ protected Calendar getCalendar() {
+ return Calendar.getInstance(Locale.forLanguageTag("ar"));
+ }
+
+ @SuppressWarnings("ControlFlowStatementWithoutBraces")
+ @Override
+ protected int getDayOfWeek(String dayStr) {
+ return dateFilterHelper.getDayOfWeek(dayStr);
+
+ }
+
+ @Override
+ protected String getDayOfWeek(Calendar date) {
+ return date.getDisplayName(Calendar.DAY_OF_WEEK, Calendar.LONG, Locale.forLanguageTag("ar"));
+ }
+
+ protected String getDayOfWeek(int day) {
+ return dateFilterHelper.getDayOfWeekName(day);
+ }
+
+
+ @SuppressWarnings({"ControlFlowStatementWithoutBraces", "MagicNumber"})
+ @Override
+ protected int getMonth(String monthStr) {
+ return dateFilterHelper.getMonth(monthStr);
+ }
+
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateFilterHelper.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateFilterHelper.java
new file mode 100644
index 000000000000..0dedb9230960
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicDateFilterHelper.java
@@ -0,0 +1,164 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.languagetool.tools.StringTools;
+
+import java.util.Calendar;
+import java.util.Locale;
+
+/**
+ * @since 6.2
+ */
+class ArabicDateFilterHelper {
+
+ protected Calendar getCalendar() {
+ return Calendar.getInstance(Locale.UK);
+ }
+
+ @SuppressWarnings("ControlFlowStatementWithoutBraces")
+ protected int getDayOfWeek(String dayStr) {
+
+ switch (dayStr) {
+ case "السبت":
+ return Calendar.SATURDAY;
+ case "الأحد":
+ return Calendar.SUNDAY;
+ case "الإثنين":
+ return Calendar.MONDAY;
+ case "الاثنين":
+ return Calendar.MONDAY;
+ case "الثلاثاء":
+ return Calendar.TUESDAY;
+ case "الأربعاء":
+ return Calendar.WEDNESDAY;
+ case "الخميس":
+ return Calendar.THURSDAY;
+ case "الجمعة":
+ return Calendar.FRIDAY;
+ }
+ throw new RuntimeException("No day name found for " + dayStr + "'");
+ }
+
+
+ @SuppressWarnings({"ControlFlowStatementWithoutBraces", "MagicNumber"})
+ protected int getMonth(String monthStr) {
+ String mon = StringTools.trimSpecialCharacters(monthStr);
+ switch (mon) {
+ // الأشهر العربية بالسريانية
+ case "كانون الثاني":
+ return 1;
+ case "كانون ثاني":
+ return 1;
+ case "شباط":
+ return 2;
+ case "آذار":
+ return 3;
+ case "نيسان":
+ return 4;
+ case "أيار":
+ return 5;
+ case "حزيران":
+ return 6;
+ case "تموز":
+ return 7;
+ case "آب":
+ return 8;
+ case "أيلول":
+ return 9;
+ case "تشرين الأول":
+ return 10;
+ case "تشرين الثاني":
+ return 11;
+ case "كانون الأول":
+ return 12;
+ case "تشرين ثاني":
+ return 11;
+ case "كانون أول":
+ return 12;
+ // الأشهر المعربة عن الإنجليزية
+ case "يناير":
+ return 1;
+ case "فبراير":
+ return 2;
+ case "مارس":
+ return 3;
+ case "أبريل":
+ return 4;
+ case "مايو":
+ return 5;
+ case "يونيو":
+ return 6;
+ case "يوليو":
+ return 7;
+ case "أغسطس":
+ return 8;
+ case "سبتمبر":
+ return 9;
+ case "أكتوبر":
+ return 10;
+ case "نوفمبر":
+ return 11;
+ case "ديسمبر":
+ return 12;
+ // الأشهر المعربة عن الفرنسية
+ case "جانفي":
+ return 1;
+ case "جانفييه":
+ return 1;
+ case "فيفري":
+ return 2;
+ case "أفريل":
+ return 4;
+ case "ماي":
+ return 5;
+ case "جوان":
+ return 6;
+ case "جويلية":
+ return 7;
+ case "أوت":
+ return 8;
+ }
+ throw new RuntimeException("No month name for '" + monthStr + "'");
+ }
+
+
+ /* get day of week name */
+ protected String getDayOfWeekName(int day) {
+ switch (day) {
+ case Calendar.SATURDAY:
+ return "السبت";
+ case Calendar.SUNDAY:
+ return "الأحد";
+ case Calendar.MONDAY:
+ return "الإثنين";
+ case Calendar.TUESDAY:
+ return "الثلاثاء";
+ case Calendar.WEDNESDAY:
+ return "الأربعاء";
+ case Calendar.THURSDAY:
+ return "الخميس";
+ case Calendar.FRIDAY:
+ return "الجمعة";
+ default:
+ return "غير محدد";
+ }
+
+ }
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicMasdarToVerbFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicMasdarToVerbFilter.java
new file mode 100644
index 000000000000..2dd73530552b
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicMasdarToVerbFilter.java
@@ -0,0 +1,136 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.jetbrains.annotations.Nullable;
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.language.Arabic;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.SimpleReplaceDataLoader;
+import org.languagetool.rules.patterns.RuleFilter;
+import org.languagetool.synthesis.ar.ArabicSynthesizer;
+import org.languagetool.tagging.ar.ArabicTagger;
+
+import java.util.*;
+
+/**
+ * Filter that maps suggestion from adverb to adjective.
+ * Also see https://www.ef.com/wwen/english-resources/english-grammar/forming-adverbs-adjectives/
+ *
+ * @since 6.2
+ */
+public class ArabicMasdarToVerbFilter extends RuleFilter {
+
+ public ArabicMasdarToVerbFilter() {
+ this.masdar2verbList = loadFromPath(FILE_NAME);
+ }
+ private final ArabicTagger tagger = new ArabicTagger();
+ private static final String FILE_NAME = "/ar/arabic_masdar_verb.txt";
+ private Map> masdar2verbList;
+ private final ArabicSynthesizer synthesizer = new ArabicSynthesizer(new Arabic());
+
+ final List authorizeLemma = new ArrayList() {{
+ add("قَامَ");
+ }};
+
+ private final Map masdar2verb = new HashMap() {{
+ // tri letters verb:
+ put("عمل", "عَمِلَ");
+ put("إعمال", "أَعْمَلَ");
+ put("تعميل", "عَمَّلَ");
+ put("ضرب", "ضَرَبَ");
+ put("أكل", "أَكَلَ");
+ // regular ones:
+ // non tri letters verb
+ put("إجابة", "أَجَابَ");
+ }};
+
+
+ @Nullable
+ @Override
+ public RuleMatch acceptRuleMatch(RuleMatch match, Map arguments, int patternTokenPos, AnalyzedTokenReadings[] patternTokens) {
+
+ // The pattern is composed of the words
+ // قام بالأكل
+ // يقوم بالأكل
+ // يقومون بالأكل
+ // first token: auxialliary verb Qam
+ // second token: Noun as Masdar
+ // replace the Masdar by its verb
+ // inflect the verb according the auxilaiary verb inflection
+
+ String auxVerb = arguments.get("verb"); // الفعل قام أو ما شابهه
+ String masdar = arguments.get("noun"); // masdar
+
+ // filter tokens which have a lemma
+ // some cases can have multiple lemmas, but only auxilliry lemma are used
+ List auxVerbLemmasAll = tagger.getLemmas(patternTokens[0], "verb");
+ List auxVerbLemmas = filterLemmas(auxVerbLemmasAll);
+
+ // get all lemmas of the given masdar
+ List masdarLemmas = tagger.getLemmas(patternTokens[1], "masdar");
+
+ // generate multiple verb from masdar lemmas list
+ List verbList = new ArrayList<>();
+
+ // if the auxiliary verb has many lemmas, filter authorized lemma only
+ // the first token: auxiliary verb
+ for (AnalyzedToken auxVerbToken : patternTokens[0]) {
+ // if the token has an authorized lemma
+ if (auxVerbLemmas.contains(auxVerbToken.getLemma())) {
+ // for all masdar lemmas
+ for (String lemma : masdarLemmas) {
+ List verbLemmaList = masdar2verbList.get(lemma);
+ if (verbLemmaList != null) {
+ // if verb, inflect verd according to auxialiary verb inlfection
+ for (String vrbLem : verbLemmaList) {
+ List inflectedverbList = synthesizer.inflectLemmaLike(vrbLem, auxVerbToken);
+ verbList.addAll(inflectedverbList);
+ }
+ }
+ }
+ }
+ }
+
+ // remove duplicates
+ verbList = new ArrayList<>(new HashSet<>(verbList));
+ RuleMatch newMatch = new RuleMatch(match.getRule(), match.getSentence(), match.getFromPos(), match.getToPos(), match.getMessage(), match.getShortMessage());
+ // generate suggestion
+ for (String verb : verbList) {
+ newMatch.addSuggestedReplacement(verb);
+ }
+ return newMatch;
+ }
+
+ List filterLemmas(List lemmas) {
+ List filtred = new ArrayList<>();
+
+ for (String lem : authorizeLemma) {
+ if (lemmas.contains(lem)) {
+ filtred.add(lem);
+ }
+ }
+ return filtred;
+ }
+
+ protected static Map> loadFromPath(String path) {
+ return new SimpleReplaceDataLoader().loadWords(path);
+ }
+}
diff --git a/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicNumberPhraseFilter.java b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicNumberPhraseFilter.java
new file mode 100644
index 000000000000..69754747a8d6
--- /dev/null
+++ b/languagetool-language-modules/ar/src/main/java/org/languagetool/rules/ar/filters/ArabicNumberPhraseFilter.java
@@ -0,0 +1,261 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.ar.filters;
+
+import org.jetbrains.annotations.Nullable;
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.language.Arabic;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.patterns.RuleFilter;
+import org.languagetool.synthesis.ar.ArabicSynthesizer;
+import org.languagetool.tagging.ar.ArabicTagManager;
+import org.languagetool.tagging.ar.ArabicTagger;
+import org.languagetool.tools.ArabicNumbersWords;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import static java.util.Arrays.asList;
+
+/**
+ * Filter that maps suggestion for numeric phrases.
+ */
+public class ArabicNumberPhraseFilter extends RuleFilter {
+
+ private static final ArabicTagManager tagmanager = new ArabicTagManager();
+ private final ArabicSynthesizer synthesizer = new ArabicSynthesizer(new Arabic());
+
+
+ @Nullable
+ @Override
+ public RuleMatch acceptRuleMatch(RuleMatch match, Map arguments, int patternTokenPos, AnalyzedTokenReadings[] patternTokens) {
+
+ // get the previous word
+ String previousWord = arguments.getOrDefault("previous", "");
+ // previous word index in token list
+ int previousWordPos = getPreviousPos(arguments);
+
+ // get the inflect mark
+ String inflectArg = arguments.getOrDefault("inflect", "");
+ // get the next word as units
+ String nextWord = arguments.getOrDefault("next", "");
+
+ int nextWordPos = getNextPos(arguments, patternTokens.length);
+
+ List numWordTokens = new ArrayList<>();
+ /// get all numeric tokens
+ int startPos = (previousWordPos > 0) ? previousWordPos + 1 : 0;
+
+ int endPos = (nextWordPos > 0) ? Integer.min(nextWordPos, patternTokens.length) : patternTokens.length + nextWordPos;
+
+ for (int i = startPos; i < endPos; i++) {
+ numWordTokens.add(patternTokens[i].getToken().trim());
+ }
+
+ String numPhrase = String.join(" ", numWordTokens);
+ // extract features from previous
+ boolean feminine = false;
+ boolean attached = false;
+ String inflection = getInflectedCase(patternTokens, previousWordPos, inflectArg);
+ List suggestionList;
+ if (nextWord.isEmpty()) {
+ suggestionList = prepareSuggestion(numPhrase, previousWord, null, feminine, attached, inflection);
+ } else {
+ AnalyzedTokenReadings nextWordToken = null;
+ if (endPos > 0 && endPos < patternTokens.length) {
+ nextWordToken = patternTokens[endPos];
+ }
+ suggestionList = prepareSuggestionWithUnits(numPhrase, previousWord, nextWordToken, feminine, attached, inflection);
+ }
+ RuleMatch newMatch = new RuleMatch(match.getRule(), match.getSentence(), match.getFromPos(), match.getToPos(), match.getMessage(), match.getShortMessage());
+
+ if (!suggestionList.isEmpty()) {
+ for (String sug : suggestionList) {
+ newMatch.addSuggestedReplacement(sug);
+ }
+ }
+ return newMatch;
+ }
+
+ // extract inflection case
+ private static String getInflectedCase(AnalyzedTokenReadings[] patternTokens, int previousPos, String inflect) {
+ if (!Objects.equals(inflect, "")) {
+ return inflect;
+ }
+ // if the previous is Jar
+
+ if (previousPos >= 0 && previousPos < patternTokens.length) {
+ AnalyzedTokenReadings previousToken = patternTokens[previousPos];
+ for (AnalyzedToken tk : patternTokens[previousPos]) {
+ if (tk.getPOSTag() != null && tk.getPOSTag().startsWith("PR")) {
+ return "jar";
+ }
+ }
+
+ }
+ String firstWord = patternTokens[previousPos + 1].getToken();
+ if (firstWord.startsWith("ب")
+ || firstWord.startsWith("ل")
+ || firstWord.startsWith("ك")
+ ) {
+ return "jar";
+ }
+ return "";
+ }
+
+ // extract inflection case
+ private static boolean getFemininCase(AnalyzedTokenReadings[] patternTokens, int nextPos) {
+ // if the previous is Jar
+ for (AnalyzedToken tk : patternTokens[nextPos]) {
+ if (tagmanager.isFeminin(tk.getPOSTag())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /* prepare suggestion for given phrases */
+ public static List prepareSuggestion(String numPhrase, String previousWord, AnalyzedTokenReadings nextWord, boolean feminin, boolean attached, String inflection) {
+
+ List tmpsuggestionList = ArabicNumbersWords.getSuggestionsNumericPhrase(numPhrase, feminin, attached, inflection);
+ List suggestionList = new ArrayList<>();
+ if (!tmpsuggestionList.isEmpty()) {
+ for (String sug : tmpsuggestionList)
+ if (!previousWord.isEmpty()) {
+ suggestionList.add(previousWord + " " + sug);
+ }
+ }
+ return suggestionList;
+ }
+
+ /* prepare suggestion for given phrases */
+ public List prepareSuggestionWithUnits(String numPhrase, String previousWord, AnalyzedTokenReadings nextWord, boolean feminin, boolean attached, String inflection) {
+
+ String defaultUnit = "دينار";
+
+ List