Skip to content

Commit

Permalink
RFC3339 timestamps (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattford63 authored and chaochenq committed Apr 22, 2016
1 parent 6a88340 commit 653bcf2
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 37 deletions.
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
/*
* Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
*
* Licensed under the Amazon Software License (the "License").
* You may not use this file except in compliance with the License.
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
*
* http://aws.amazon.com/asl/
*
* or in the "license" file accompanying this file.
* This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* or in the "license" file accompanying this file.
* This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package com.amazon.kinesis.streaming.agent.processing.parsers;

/**
* Common regex patterns used to construct log parsing format
*
*
* @author chaocheq
*
*/
Expand All @@ -28,14 +28,14 @@ public class PatternConstants {
public static final String WORD = "\\w+";
public static final String NOTSPACE = "\\S+";
public static final String NOQUOTE = "[^\"]+";

// TODO: can be more restrict
public static final String IP = "(?:[\\d\\.]+)";
public static final String HOSTNAME = "(?:[\\w\\-]+)(?:\\.(?:[\\w\\-]+))*(?:\\.?|\\b)";
public static final String IPORHOST = "(?:" + HOSTNAME + "|" + IP + ")";
public static final String URIHOST = "(?:" + IPORHOST + "(?::" + POSINT + ")?)";
public static final String USER = "(?:[a-zA-Z0-9._-]+)";

public static final String MONTHDAY = "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])";
public static final String DAY = "(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)";
public static final String MONTH = "\\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\\b";
Expand All @@ -47,32 +47,38 @@ public class PatternConstants {
public static final String HTTPDATE = "(?:" + MONTHDAY + "/" + MONTH + "/" + YEAR + ":" + TIME + "\\s" + INT + ")";
public static final String APACHEERRORLOGTIMESTAMP = "(?:" + DAY + "\\s+" + MONTH + "\\s+" + MONTHDAY + "\\s+" + TIME + "\\s+" + YEAR + ")";
public static final String SYSLOGTIMESTAMP = "(?:" + MONTH + "\\s+" + MONTHDAY + "\\s+" + TIME + ")";

public static final String RFC3339 = "(?:\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?)(?:Z|(?:[-+]\\d{2}:\\d{2}))";
public static final String SYSLOGFACILITY = "<" + NONNEGINT + "." + NONNEGINT + ">";
public static final String PROG = "([\\w\\._/%-]+)";
public static final String SYSLOGPROG = PROG + "(?:\\[(" + POSINT + ")\\])?";

public static final String COMMON_APACHE_LOG = "^(" + IPORHOST + ") " +
"(" + USER + ") " +
"(" + USER + ") " +
"\\[(" + HTTPDATE + ")\\] " +
"\"(" + NOQUOTE + ")\" " +
"(" + INT + "|\\-) " +
"(" + INT + "|\\-)";

public static final String COMBINED_APACHE_LOG = COMMON_APACHE_LOG + " " +
"\"(" + NOQUOTE + ")\" " +
"\"(" + NOQUOTE + ")\"";

public static final String APACHE_ERROR_LOG = "^\\[(" + APACHEERRORLOGTIMESTAMP + ")\\] " +
"\\[(?:(" + WORD + "):)?(" + WORD + ")\\] " +
"\\[pid (" + NONNEGINT + ")(?::tid (" + NONNEGINT + "))?\\] " +
"\\[client (" + URIHOST + ")\\] " +
"(" + ANYDATA + ")";

public static final String SYSLOG_BASE = "^(" + SYSLOGTIMESTAMP + ") " +
"(?:" + SYSLOGFACILITY + " )?" +
"(" + USER + ") " +
"(" + USER + ") " +
SYSLOGPROG + ": " +
"(" + ANYDATA + ")";

public static final String RFC3339_SYSLOG_BASE = "^(" + RFC3339 + ") " +
"(?:" + SYSLOGFACILITY + " )?" +
"(" + USER + ") " +
SYSLOGPROG + ": " +
"(" + ANYDATA + ")";
}
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/*
* Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
*
* Licensed under the Amazon Software License (the "License").
* You may not use this file except in compliance with the License.
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
*
* http://aws.amazon.com/asl/
*
* or in the "license" file accompanying this file.
* This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* or in the "license" file accompanying this file.
* This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and limitations under the License.
*/
package com.amazon.kinesis.streaming.agent.processing.parsers;
Expand All @@ -26,33 +26,36 @@

/**
* Class for parsing and transforming records of sys log files
*
*
* Syslog format can vary a lot across platforms depending on the configurations.
* We are using the most typical form which is composed of:
*
*
* timestamp, hostname, program, processid, message
*
*
* @author chaocheq
*
*/
public class SysLogParser extends BaseLogParser {

/**
* The fields below are present in most syslogs
*
*
* TODO: facility is currently omitted because it varies across platforms
*/
public static final List<String> SYSLOG_FIELDS =
public static final List<String> SYSLOG_FIELDS =
ImmutableList.of("timestamp",
"hostname",
"program",
"processid",
"message");
public static final Pattern BASE_SYSLOG_PATTERN =

public static final Pattern BASE_SYSLOG_PATTERN =
Pattern.compile(PatternConstants.SYSLOG_BASE);

public SysLogParser(LogFormat format, String matchPattern,

public static final Pattern RFC3339_SYSLOG_PATTERN =
Pattern.compile(PatternConstants.RFC3339_SYSLOG_BASE);

public SysLogParser(LogFormat format, String matchPattern,
List<String> customFields) {
super(format, matchPattern, customFields);
}
Expand All @@ -64,21 +67,21 @@ public Map<String, Object> parseLogRecord(String record, List<String> fields) th
}
final Map<String, Object> recordMap = new LinkedHashMap<String, Object>();
Matcher matcher = logEntryPattern.matcher(record);

if (!matcher.matches()) {
throw new LogParsingException("Invalid log entry given the entry pattern");
}

if (matcher.groupCount() != fields.size()) {
throw new LogParsingException("The parsed fields don't match the given fields");
}

for (int i = 0; i < fields.size(); i++) {
// FIXME: what do we deal with the field that's missing?
// shall we pass in as null or don't even pass in the result?
recordMap.put(fields.get(i), matcher.group(i + 1));
}

return recordMap;
}

Expand All @@ -89,9 +92,13 @@ protected void initializeByDefaultFormat(LogFormat format) {
this.logEntryPattern = BASE_SYSLOG_PATTERN;
this.fields = SYSLOG_FIELDS;
return;
case RFC3339SYSLOG:
this.logEntryPattern = RFC3339_SYSLOG_PATTERN;
this.fields = SYSLOG_FIELDS;
return;
default:
throw new ConfigurationException("Log format is not accepted");
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public static enum LogFormat {
COMMONAPACHELOG,
COMBINEDAPACHELOG,
APACHEERRORLOG,
SYSLOG
SYSLOG,
RFC3339SYSLOG
}

public static enum JSONFormat {
Expand Down Expand Up @@ -104,6 +105,8 @@ private static ILogParser buildLogParser(LogFormat format, String matchPattern,
return new ApacheLogParser(format, matchPattern, customFields);
case SYSLOG:
return new SysLogParser(format, matchPattern, customFields);
case RFC3339SYSLOG:
return new SysLogParser(format, matchPattern, customFields);
default:
throw new ConfigurationException("Log format " + format.name() + " is not accepted");
}
Expand Down

0 comments on commit 653bcf2

Please sign in to comment.