Skip to content

Commit

Permalink
Master Merge Hawking Enhancement v0.1.9 (#43)
Browse files Browse the repository at this point in the history
* deleted test publish workflow

* Revert "deleted test publish workflow"

This reverts commit 159c473.

* ReadMe change

* Updated PR info with github sec

* Corenlp version 4.5.6 update, Parser Model Update, Junit Update to 4.13.1

* Supported new Cases like last week of may, 1st Monday of Jan

* Version Update
  • Loading branch information
ArulVendhan authored Sep 24, 2024
1 parent e4c2847 commit 3878fac
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>com.zoho</groupId>
<artifactId>hawking</artifactId>
<version>0.1.8</version>
<version>0.1.9</version>
<packaging>jar</packaging>
<name>Hawking</name>
<description>Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format.</description>
Expand Down
67 changes: 64 additions & 3 deletions src/main/java/com/zoho/hawking/datetimeparser/DateTimeParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public static DateAndTime timeParser(DateTime referenceDateTime, String tense, M
for (String timeSpan : spanHierarchy) {
if (componentsMap.get(timeSpan) != null) {
DateTimeComponent localDateTimeComponent = getInstance(timeSpan, componentsMap.get(timeSpan), tense, dateAndTime, abstractLanguage);
parseDateTimeComponent(localDateTimeComponent, abstractLanguage);
parseDateTimeComponent(localDateTimeComponent, abstractLanguage, componentsMap, dateAndTime);
}
}
return dateAndTime;
Expand Down Expand Up @@ -69,8 +69,51 @@ private static DateTimeComponent getInstance(String span, String sentenceToParse

return localDateTimeComponent;
}
public static int getMonthInt(String monthName) {
switch (monthName.toLowerCase()) {
case "january":
case "jan":
return 1;
case "february":
case "feb":
return 2;
case "march":
case "mar":
return 3;
case "april":
case "apr":
return 4;
case "may":
return 5;
case "june":
case "jun":
return 6;
case "july":
case "jul":
return 7;
case "august":
case "aug":
return 8;
case "september":
case "sep":
case "sept":
return 9;
case "october":
case "oct":
return 10;
case "november":
case "nov":
return 11;
case "december":
case "dec":
return 12;
default:
throw new IllegalArgumentException("Invalid month name: " + monthName);
}
}


public static void parseDateTimeComponent(DateTimeComponent dateTimeComponent, AbstractLanguage abstractLanguage) {
public static void parseDateTimeComponent(DateTimeComponent dateTimeComponent, AbstractLanguage abstractLanguage, Map<String, String> componentsMap, DateAndTime dateAndTime) {

if (dateTimeComponent.timeSpan.equals("")) {
return;
Expand All @@ -94,9 +137,26 @@ public static void parseDateTimeComponent(DateTimeComponent dateTimeComponent, A
}
} else {
if (dateTimeComponent.sentenceTense.equals("")) {
dateTimeComponent.sentenceTense = "PRESENT"; //No I18N
if (componentsMap.containsKey("month") &&
componentsMap.get("month") != null &&
componentsMap.get("month").contains("month_of_year") &&
componentsMap.entrySet().stream().allMatch(entry ->
(entry.getKey().equals("month") && entry.getValue() != null) || entry.getValue() == null)) {
String monthValue = componentsMap.get("month");
String monthOfYear = monthValue.replaceAll(".*<month_of_year>([^<]*)</month_of_year>.*", "$1").trim();
int monthOfYearInt = getMonthInt(monthOfYear);
int currentMonthInt = dateAndTime.getReferenceTime().getMonthOfYear();
if (monthOfYearInt >= currentMonthInt) {
dateTimeComponent.sentenceTense = "PRESENT"; // Current or future month
} else {
dateTimeComponent.sentenceTense = "PAST"; // Past month
}
} else {
dateTimeComponent.sentenceTense = "PRESENT"; // Default assignment
}
}


switch (dateTimeComponent.sentenceTense) {
case "PAST":
dateTimeComponent.past();
Expand All @@ -112,4 +172,5 @@ public static void parseDateTimeComponent(DateTimeComponent dateTimeComponent, A

dateTimeComponent.setPreviousDependency();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ private static ParsedDate tagShrinker(String parseText, ParsedDate parserDateCur
private static ParsedDate tagAlternator(String parseText, ParsedDate parserDateCurrent) {
List<Triple<String, Integer, Integer>> triples = parserDateCurrent.getOutputWithOffsets();
String tag_xml = parserDateCurrent.getTaggedWithXML();
if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) && (tag_xml.contains("month_of_year") || tag_xml.contains("month_span")) && tag_xml.contains("exact_number")) {
List<Triple<String, Integer, Integer>> triple = parserDateCurrent
.getOutputWithOffsets();
if ((tag_xml.contains("day_of_week") || tag_xml.contains("current_day")) &&
(tag_xml.contains("month_of_year") || tag_xml.contains("month_span")) &&
tag_xml.contains("exact_number") &&
!tag_xml.contains("</day_of_week> <implict_prefix>of</implict_prefix>")) {
List<Triple<String, Integer, Integer>> triple = parserDateCurrent.getOutputWithOffsets();
String date_xml = parserDateCurrent.getTaggedWithXML();
for (int i = 0; i < triple.size(); i++) {
Triple<String, Integer, Integer> triplet = triples.get(i);
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/zoho/hawking/utils/DateTimeProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ private String removeTimeZone(String parsedText) {
returnText = returnText.replaceAll("breakfast", "8 AM"); //No I18N
returnText = returnText.replaceAll("lunch", "1 PM"); //No I18N
returnText = returnText.replaceAll("dinner", "8 PM"); //No I18N
returnText = returnText.replaceAll("around", "from"); //No I18N
returnText = returnText.endsWith("before") ? returnText.replaceAll("before", "back") : returnText; //No I18N
returnText = returnText.replaceAll("(?i)(last week\\s+(of\\s+)?(January|February|March|April|May|June|July|August|September|October|November|December)|((January|February|March|April|May|June|July|August|September|October|November|December)\\s+last week))", "4th week $3$5");
returnText = returnText.replaceAll("null",""); //No I18N
returnText = returnText.replaceAll("\\s{2,}", " ").trim();//No I18N
return returnText;
Expand All @@ -197,6 +200,9 @@ private String removeTimeZone(String parsedText) {
returnText = returnText.replaceAll("breakfast", "8 AM"); //No I18N
returnText = returnText.replaceAll("lunch", "1 PM"); //No I18N
returnText = returnText.replaceAll("dinner", "8 PM"); //No I18N
returnText = returnText.replaceAll("around", "from"); //No I18N
returnText = returnText.endsWith("before") ? returnText.replaceAll("before", "back") : returnText; //No I18N
returnText = returnText.replaceAll("(?i)(last week\\s+(of\\s+)?(January|February|March|April|May|June|July|August|September|October|November|December)|((January|February|March|April|May|June|July|August|September|October|November|December)\\s+last week))", "4th week $3$5");
returnText = returnText.replaceAll("null",""); //No I18N
returnText = returnText.replaceAll("\\s{2,}", " ").trim();//No I18N
returnText = test.length() > 0 ? returnText : test;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/zoho/hawking/utils/RecognizerTagger.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
public class RecognizerTagger {

private final static Pattern implictPrefix = Pattern.compile(
"^(beginning|upcoming|starting|previous|current|between|coming|within|ending|before|until|after|since|start|forty|this|next|last|past|from|till|rest|most|with|the|for|few|end|in|at|on|of|by|an|a)$");
"^(beginning|upcoming|starting|previous|current|between|around|coming|within|ending|before|until|after|since|start|forty|this|next|last|past|from|till|rest|most|with|the|for|few|end|in|at|on|of|by|an|a)$");
private final static Pattern implictPostfix = Pattern.compile("^(back|ago)$");

private final static Pattern second_span = Pattern.compile("^(second|seconds|sec|secs)$");
Expand Down

0 comments on commit 3878fac

Please sign in to comment.