diff --git a/.gitignore b/.gitignore
index 8deed27..d918228 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,5 @@ ade-ext/derby.log
.project
.settings
-.DS_Store
\ No newline at end of file
+.DS_Store
+.vscode
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..e2b97b7
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,14 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..5ce4586
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
new file mode 100644
index 0000000..712ab9d
--- /dev/null
+++ b/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..f250cbb
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml
new file mode 100644
index 0000000..2b63946
--- /dev/null
+++ b/.idea/uiDesigner.xml
@@ -0,0 +1,124 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz b/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz
new file mode 100644
index 0000000..30be2c3
Binary files /dev/null and b/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz differ
diff --git a/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz b/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz
new file mode 100644
index 0000000..ec4b4f1
Binary files /dev/null and b/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz differ
diff --git a/ade-assembly/src/main/conf/setup.props b/ade-assembly/src/main/conf/setup.props
index cbc2c55..027aac3 100644
--- a/ade-assembly/src/main/conf/setup.props
+++ b/ade-assembly/src/main/conf/setup.props
@@ -7,7 +7,8 @@ adeext.parseErrorToKeep=100
adeext.parseErrorDaysTolerate=2
adeext.parseErrorTrackNullComponent=false
adeext.runtimeModelDataStoreAtSource=true
-adeext.useSparkLogs=true
+adeext.useSparkLogs=false
+adeext.useNginxLogs=true
adeext.msgRate10MinSlotsToKeep=24
adeext.msgRate10MinSubIntervalList=1,2,3,6,12,24
@@ -19,7 +20,9 @@ adeext.msgRateMergeSource=true
# are only used when ade.useSparkLogs=true
# --------------------------------------------------------------------
-ade.useSparkLogs=true
+ade.useNginxLogs=true
+ade.flowLayoutFileNginx=conf/xml/FlowLayoutNginx.xml
+ade.useSparkLogs=false
ade.flowLayoutFile=conf/xml/FlowLayout.xml
ade.flowLayoutFileSpark=conf/xml/FlowLayoutSpark.xml
ade.outputPath=output/
@@ -28,6 +31,7 @@ ade.xml.xsltDir=conf/xml
ade.criticalWords.file=conf/criticalWords.txt
ade.analysisGroupToFlowNameMapperClass=org.openmainframe.ade.ext.os.LinuxAnalysisGroupToFlowNameConstantMapper
ade.analysisGroupToFlowNameMapperClassSpark=org.openmainframe.ade.ext.os.SparkAnalysisGroupToFlowNameConstantMapper
+ade.analysisGroupToFlowNameMapperClassNginx=org.openmainframe.ade.ext.os.NginxAnalysisGroupToFlowNameConstantMapper
ade.outputFilenameGenerator=org.openmainframe.ade.ext.output.ExtOutputFilenameGenerator
ade.inputTimeZone=GMT+00:00
ade.outputTimeZone=GMT
diff --git a/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml
new file mode 100644
index 0000000..24c314f
--- /dev/null
+++ b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml
@@ -0,0 +1,181 @@
+
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+ ConsecutiveTimeFramer
+
+
+ ContinuousTimeFramer
+
+
+
+
+
+
+
+ oneMinuteTrain
+ 1
+ oneMinuteTrain
+
+ CriticalWordCountReporter
+
+
+ ClusteringContextScore
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ FullBernoulliClusterAwareScore
+
+
+ NGINX01
+
+
+ LastSeenLoggingScorerContinuous
+
+
+ LastSeenScorer
+ NGINX11
+
+
+ BestOfTwoScorer
+
+
+ NGINX02
+ NGINX12
+
+
+ SeverityScore
+
+
+ LogNormalScore
+
+
+
+
+ AdeWeightedMessageAnomalyScorerLogNormal
+
+
+
+
+ NGINX01
+ NGINX31
+ NGINX21
+ NGINX41
+
+
+ AdeAnomalyIntervalScorer
+ NGINX51
+
+
+ org.openmainframe.ade.ext.output.ExtendedAnalyzedIntervalDbStorer
+
+
+ org.openmainframe.ade.ext.output.ExtJaxbAnalyzedPeriodV2XmlStorer
+
+
+
+
+
+ org.openmainframe.ade.ext.output.ExtAnalyzedIntervalV2FullXmlStorer
+
+
+
+
+ org.openmainframe.ade.ext.output.ExtJaxbAnalyzedIntervalV2XmlStorer
+
+
+
+ NGINX51
+ NGINX52
+
+
\ No newline at end of file
diff --git a/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd
new file mode 100644
index 0000000..aac5564
--- /dev/null
+++ b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd
@@ -0,0 +1,175 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ade-assembly/src/main/test/nginx_analysis_comp_test.sh b/ade-assembly/src/main/test/nginx_analysis_comp_test.sh
new file mode 100644
index 0000000..64291c8
--- /dev/null
+++ b/ade-assembly/src/main/test/nginx_analysis_comp_test.sh
@@ -0,0 +1,298 @@
+#!/bin/bash
+#************************************************************************
+# Copyright Contributors to the ADE Project.
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Anomaly Detection Engine for Linux Logs (ADE). *
+# *
+# ADE is free software: you can redistribute it and/or modify *
+# it under the terms of the GNU General Public License as published by *
+# the Free Software Foundation, either version 3 of the License, or *
+# (at your option) any later version. *
+# *
+# ADE is distributed in the hope that it will be useful, *
+# but WITHOUT ANY WARRANTY; without even the implied warranty of *
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+# GNU General Public License for more details. *
+# *
+# You should have received a copy of the GNU General Public License *
+# along with ADE. If not, see . *
+#************************************************************************
+
+#************************************************************************
+# Script: analysis_comp_test.sh
+#
+# Usage: analysis_comp_test.sh
+#
+# This script will upload/train/analyze a baseline set of syslog files
+# so that the resulting analysis ouput (xml files) can be compared to the
+# baseline. The script is intended to be run after any change to the
+# analytics code to point any changes from the existing baseline. Changes
+# to the local constants below should be made to customize to your
+# environment.
+#
+#********************************************************************************
+
+ADE_HOME=`dirname "$0"`/../.. # assumes /bin/test/analysis_comp_test.sh
+ADE_HOME=`cd "$ADE_HOME" && pwd`
+
+#***********************************************
+# local constants
+#***********************************************
+BASELINE_DIR="$ADE_HOME/baseline"
+BASELINE_UPLOAD_DIR="$BASELINE_DIR/nginx/upload"
+BASELINE_ANALYZE_DIR="$BASELINE_DIR/nginx/analyze"
+BASELINE_OUTPUT_DIR="$BASELINE_DIR/output"
+
+BIN_DIR="$ADE_HOME/bin"
+
+ANALYSIS_COMPARE_LOG="/tmp/nginx_compare_`date "+%Y%m%d%H%M%S"`.out"
+
+#***********************************************
+# analysis group constants
+#***********************************************
+AG_PREFIX="regression_ag_"
+AG_NAME="$AG_PREFIX`date "+%Y%m%d%H%M%S"`"
+AG_JSON_DEF_FILENAME="/tmp/reg_ag.json"
+AG_JSON_DEF="{ \
+ \"groups\":{\"modelgroups\":[{\"name\" : \"$AG_NAME\", \"dataType\": \"syslog\", \"evaluationOrder\" : 1, \"ruleName\" : \"default\"}]}, \
+ \"rules\":[{\"name\" : \"default\", \"description\" : \"regression test rule to match all systems\", \"membershipRule\" : \"*\" }] \
+}"
+
+#***********************************************
+# Constants pointing to properties in setup
+# file (conf/setup.props)
+#***********************************************
+DB_URL_PROP="ade.databaseUrl"
+OUTDIR_PROP="ade.outputPath"
+ANALYSIS_OUTDIR_PROP="ade.analysisOutputPath"
+
+#***********************************************
+# sub-routines
+#***********************************************
+issue_command() {
+ cmd=$@
+
+ echo "**********************************"
+ echo "CMD = $cmd"
+
+ eval "$cmd >/tmp/cmdout 2>&1"
+ rc=$?
+
+ COMMAND_OUT=$(cat /tmp/cmdout)
+ echo "RC: $rc"
+ echo "$COMMAND_OUT"
+ echo "**********************************"
+ rm /tmp/cmdout
+
+ return $rc
+}
+
+get_current_prop_val() {
+ prop_name=$1
+
+ if [ -z $prop_name ]; then
+ echo "get_current_prop_val: no property name given"
+ return 1
+ fi
+
+ prop_val=`grep "$prop_name=" $ADE_SETUP_FILE | cut -d \= -f 2`
+
+ if [ -z prop_val ]; then
+ echo "get_current_prop_val: unable to get property value"
+ return 2
+ fi
+
+ echo "$prop_val"
+}
+
+update_setup_file() {
+ # if backup not created yet do it
+ if [ ! -f $ADE_SETUP_FILE.bak ]; then
+ cp $ADE_SETUP_FILE $ADE_SETUP_FILE.bak
+ fi
+
+ prop_name=$1
+ prop_val=$2
+
+ if [ -z $prop_name ]; then
+ echo "update_setup_file: no property name given."
+ return 1
+ elif [ -z $prop_val ]; then
+ echo "update_setup_file: no property value."
+ return 1
+ fi
+
+ grep "$prop_name=" $ADE_SETUP_FILE
+ if [ $? -ne 0 ]; then
+ echo "update_setup_file: property $prop_name not found in $ADE_SETUP_FILE"
+ return 1
+ fi
+
+ echo "update_setup_file: changing $prop_name to $prop_val"
+ tmpfilename="/tmp/`basename $ADE_SETUP_FILE`"
+ CMD=`sed "s~\(${prop_name}=\).*\$~\1\${prop_val}~" $ADE_SETUP_FILE > $tmpfilename`
+ mv -f $tmpfilename $ADE_SETUP_FILE
+
+ return 0
+}
+
+# The database name and output directory are defined in
+# the setup file (conf/setup.props). In order to prevent
+# contaminating the current database and output directory
+# while running the test this method will change the
+# values in the setup file.
+change_dbname_and_output_dir() {
+ curr_db_val=$( get_current_prop_val $DB_URL_PROP )
+ if [ -z curr_db_val ]; then
+ echo "Failed retrieving current database name value"
+ return 1
+ fi
+ echo "current database value: $curr_db_val"
+
+
+ curr_outdir_val=$( get_current_prop_val $OUTDIR_PROP )
+ if [ -z curr_outdir_val ]; then
+ echo "Failed retrieving current output directory value"
+ return 1
+ fi
+ echo "current output directory value: $curr_outdir_val"
+
+ # in setup.props change ade.databaseUrl to temp value
+ test_db_name="${curr_db_val}_regtest`date "+%Y%m%d%H%M%S"`"
+ update_setup_file "$DB_URL_PROP" $test_db_name
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ # in setup.props change ade.outputPath value to temp value
+ test_outdir_name="${curr_outdir_val}regtest`date "+%Y%m%d%H%M%S"`"
+ update_setup_file "$OUTDIR_PROP" $test_outdir_name
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ # in setup.props change ade.analysisOutputPath to temp value
+ test_analysis_outdir_name="$test_outdir_name/continuous"
+ update_setup_file "$ANALYSIS_OUTDIR_PROP" $test_analysis_outdir_name
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+check_test_env() {
+ if [ ! -d "$BASELINE_DIR" ]; then
+ echo "ERROR: Unable to locate baseline directory: $BASELINE_DIR"
+ exit 1
+ fi
+
+ if [ ! -d "$BASELINE_UPLOAD_DIR" ]; then
+ echo "ERROR: Unable to locate baseline test nginx upload directory: $BASELINE_UPLOAD_DIR"
+ exit 1
+ fi
+
+ if [ ! -d "$BASELINE_ANALYZE_DIR" ]; then
+ echo "ERROR: Unable to locate baseline test nginx analyze directory: $BASELINE_ANALYZE_DIR"
+ exit 1
+ fi
+
+ if [ ! -d "$BASELINE_OUTPUT_DIR" ]; then
+ echo "ERROR: Unable to locate baseline output directory: $BASELINE_OUTPUT_DIR"
+ exit 1
+ fi
+
+ if [ ! -d "$BIN_DIR" ]; then
+ echo "ERROR: Unable to find location of bin directory: $BIN_DIR"
+ exit 1
+ fi
+
+ return 0
+}
+
+cleanup_and_exit() {
+ exit_code=$1
+
+ if [ -z $exit_code ]; then
+ exit_code=0
+ fi
+
+ echo "Performing test cleanup..."
+
+ # reset db name and output directory (restore conf/setup.props)
+ if [ -f $ADE_SETUP_FILE.bak ]; then
+ mv $ADE_SETUP_FILE $ADE_SETUP_FILE.regtest
+ mv $ADE_SETUP_FILE.bak $ADE_SETUP_FILE
+ fi
+
+ echo "Completed with RC=$exit_code"
+ exit $exit_code
+}
+
+###################
+# main
+###################
+# change to ADE_HOME dir because setup.props contains relative paths
+eval "cd $ADE_HOME"
+
+. bin/env.sh
+
+check_test_env
+
+# decompress any compressed baseline files
+find $BASELINE_OUTPUT_DIR -maxdepth 1 -type f -name "*.tgz" -exec tar -zxf {} --directory=$BASELINE_OUTPUT_DIR \;
+
+# Change the database name in setup.props to prevent contaminating anything in current database.
+change_dbname_and_output_dir
+if [ $? -ne 0 ]; then
+ echo "Failed to perform temporary change to db name and output directory. Exiting"
+ cleanup_and_exit 1
+fi
+
+# Create the database specified in setup.props
+issue_command "$BIN_DIR/controldb create"
+if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to create database"
+ cleanup_and_exit 1
+fi
+
+## create test group for analysis
+echo $AG_JSON_DEF > $AG_JSON_DEF_FILENAME # copy group definition into a file
+issue_command "$BIN_DIR/updategroups -j $AG_JSON_DEF_FILENAME"
+if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to define analysis group. Exiting"
+ cleanup_and_exit 1
+fi
+
+## upload logfile for system
+issue_command "$BIN_DIR/upload -d $BASELINE_UPLOAD_DIR"
+if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to upload data from $BASELINE_UPLOAD_DIR. Exiting"
+ cleanup_and_exit 1
+fi
+
+## train analysis group
+issue_command "$BIN_DIR/train $AG_NAME"
+if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to train analysis group $AG_NAME. Exiting"
+ cleanup_and_exit 1
+fi
+
+## analyze logfile
+issue_command "$BIN_DIR/analyze -f $BASELINE_ANALYZE_DIR/nginx_analyze.tar.gz"
+if [ $? -ne 0 ]; then
+ echo "ERROR: Failed to analyze data from $BASELINE_ANALYZE_DIR. Exiting"
+ cleanup_and_exit 1
+fi
+
+echo
+echo "Performing compare of baseline to new analysis results..."
+
+$ADE_JAVA -cp $ADE_CLASSPATH -Dade.setUpFilePath=$ADE_SETUP_FILE org.openmainframe.ade.ext.regression.AdeAnalysisOutputCompare -b "$BASELINE_OUTPUT_DIR" >$ANALYSIS_COMPARE_LOG
+rc=$?
+
+echo "Analysis comparison output written to $ANALYSIS_COMPARE_LOG"
+
+cleanup_and_exit $rc
+
diff --git a/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java b/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java
index 306c3e3..3a9cfd6 100644
--- a/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java
+++ b/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java
@@ -55,9 +55,14 @@ public interface IAdeConfigProperties {
* Speciifies if running ADE on Spark logs.
* @return boolean : True if running on Spark logs.
*/
-
Boolean getUseSparkLogs();
+ /**
+ * Speciifies if running ADE on Nginx logs.
+ * @return boolean : True if running on Nginx logs.
+ */
+ Boolean getUseNginxLogs();
+
/**
* @return the mode of the period, which is an enum
* describing the duration (and alignment) of the period
diff --git a/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java b/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java
index 7c00709..4ae47c1 100644
--- a/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java
+++ b/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java
@@ -96,6 +96,9 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties {
@Property(key = ADE_PREFIX + "useSparkLogs", help = "Type of logs to run ade on")
private boolean m_useSparkLogs;
+ @Property(key = ADE_PREFIX + "useNginxLogs", help = "Type of logs to run ade on")
+ private boolean m_useNginxLogs;
+
@Property(key = ADE_PREFIX + "flowLayoutFile", help = "Path to Flow Layout file")
private String m_flowLayoutFile;
@@ -103,6 +106,10 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties {
help = "Path to Flow Layout file for spark (matters only when ade.useSparkLogs=true)")
private String m_flowLayoutFileSpark;
+ @Property(key = ADE_PREFIX + "flowLayoutFileNginx",
+ help = "Path to Flow Layout file for nginx (matters only when ade.useNginxLogs=true)")
+ private String m_flowLayoutFileNginx;
+
@Property(key = ADE_PREFIX + "userRulesFile", required = false, help = "Optional path to User Rules file")
private String m_userRulesFile = null;
@@ -204,6 +211,12 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties {
private Class extends AnalysisGroupToFlowNameMapper> m_analysisGroupToFlowNameMapperSpark
= AnalysisGroupToFlowNameUnityMapper.class;
+ @Property(key = ADE_PREFIX + "analysisGroupToFlowNameMapperClassNginx", required = false,
+ factory = FlowMapperClassFactory.class, help = "Optional class for mapping analysis groups to flow names.(Nginx)"
+ + "Must extend AnalysisGroupToFlowNameMapper. Used only when ade.useSparkLogs=true")
+ private Class extends AnalysisGroupToFlowNameMapper> m_analysisGroupToFlowNameMapperNginx
+ = AnalysisGroupToFlowNameUnityMapper.class;
+
@Property(key = ADE_OVERRIDE_VERSION_CHECK, required = false,
help = "Allow Ade to run with a database version different from the JAR version")
private boolean m_overrideVersionCheck = false;
@@ -273,6 +286,9 @@ private void validateProps() throws AdeUsageException {
if (m_useSparkLogs){
FileUtils.assertExists(new File(m_criticalWordsFile), new File(m_flowLayoutFileSpark));
}
+ if (m_useNginxLogs){
+ FileUtils.assertExists(new File(m_criticalWordsFile), new File(m_flowLayoutFileNginx));
+ }
} catch (FileNotFoundException e) {
throw new AdeUsageException("File specified in setup properties not found!", e);
@@ -308,6 +324,9 @@ public final String getFlowLayoutFile() {
if (m_useSparkLogs){
return m_flowLayoutFileSpark;
}
+ if (m_useNginxLogs){
+ return m_flowLayoutFileNginx;
+ }
return m_flowLayoutFile;
}
@@ -316,6 +335,11 @@ public final Boolean getUseSparkLogs() {
return m_useSparkLogs;
}
+ @Override
+ public final Boolean getUseNginxLogs() {
+ return m_useNginxLogs;
+ }
+
@Override
public final TimeZone getOutputTimeZone() {
return m_outputTimeZone;
@@ -386,6 +410,9 @@ public final Class extends AnalysisGroupToFlowNameMapper> getAnalysisGroupToFl
if (m_useSparkLogs){
return m_analysisGroupToFlowNameMapperSpark;
}
+ if (m_useNginxLogs){
+ return m_analysisGroupToFlowNameMapperNginx;
+ }
return m_analysisGroupToFlowNameMapper;
}
diff --git a/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java b/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java
index ea81601..5544d8d 100644
--- a/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java
+++ b/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java
@@ -81,7 +81,10 @@ public FlowFactory() throws AdeException {
if (Ade.getAde().getConfigProperties().getUseSparkLogs()){
FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayoutSpark.xsd";
}
- else{
+ else if (Ade.getAde().getConfigProperties().getUseNginxLogs()) {
+ FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayoutNginx.xsd";
+ }
+ else {
FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayout.xsd";
}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java
index b617d0b..a09c3f7 100644
--- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java
@@ -54,9 +54,11 @@ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserFreeForm;
import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserWithCompAndPid;
import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserWithMark;
+import org.openmainframe.ade.ext.os.parser.NginxLogParser;
import org.openmainframe.ade.ext.os.parser.SparklogParser;
import org.openmainframe.ade.ext.os.parser.LinuxSyslog5424ParserBase;
import org.openmainframe.ade.ext.os.parser.LinuxSyslogLineParser;
+import org.openmainframe.ade.ext.os.parser.NginxLogLineParser;
import org.openmainframe.ade.ext.os.parser.SparklogLineParser;
import org.openmainframe.ade.ext.service.AdeExtMessageHandler;
import org.openmainframe.ade.ext.os.AdeExtPropertiesFactory;
@@ -79,6 +81,8 @@ public class AdeMaskLog extends ExtControlProgram {
private static SparklogLineParser[] mSparkLineParsers;
+ private static NginxLogLineParser[] mNginxLineParsers;
+
private static Pattern validIPV4Pattern;
private static Pattern validIPV6Pattern;
private static Pattern validEmailPattern;
@@ -235,6 +239,12 @@ private static boolean isSpark() throws AdeException{
return AdeExt.getAdeExt().getConfigProperties().isSparkLog();
}
+ /**
+ * Check if we're using Nginx logs
+ */
+ private static boolean isNginx() throws AdeException{
+ return AdeExt.getAdeExt().getConfigProperties().isNginxLog();
+ }
/**
* Read and write file specified by input and output file name mask system
@@ -340,6 +350,12 @@ private static void createParsers() throws AdeInternalException {
};
SparklogParser.setAdeExtProperties((LinuxAdeExtProperties) linuxProperties);
}
+ else if (isNginx()) {
+ mNginxLineParsers = new NginxLogLineParser[] {
+ new NginxLogParser(),
+ };
+ NginxLogParser.setAdeExtProperties((LinuxAdeExtProperties) linuxProperties);
+ }
else{
mLineParsers = new LinuxSyslogLineParser[] {
new LinuxSyslog5424ParserBase(),
@@ -382,6 +398,17 @@ private String generateMaskedLine(String currentLine) throws AdeException{
return outline;
}
+ if (isNginx()){
+ for (NginxLogLineParser lineParser : mNginxLineParsers) {
+ gotLine = lineParser.parseLine(currentLine);
+ if (gotLine) {
+ String oldSystemName = lineParser.getRemoteAddress();
+ String oldText = lineParser.getRequest();
+ return createNewLine(currentLine, oldSystemName, oldText);
+ }
+ }
+ }
+
// Linux Syslogs
for (LinuxSyslogLineParser lineParser : mLineParsers) {
gotLine = lineParser.parseLine(currentLine);
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java
index 0ae25f8..c76ae19 100644
--- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java
@@ -101,7 +101,7 @@ public final void incomingStreamFromFile(File file) throws AdeException {
a_adeInputStream = new AdeInputStreamExt(is, props, m_adeExtProperties, parseReportFilename);
- /* Indicate this is a new file, this will allow an interval broken into
+ /* Indicate this is a new file, this will allow an interval broken into
* to log files. */
incomingSeparator(new FileSeperator(file.getName()));
@@ -112,7 +112,7 @@ public final void incomingStreamFromFile(File file) throws AdeException {
/**
* Get the parse report filename based on the logFileName.
*
- * @param logfileName
+ * @param name
* @throws AdeException
*/
protected final String getParseReportFilename(String name) throws AdeException {
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java
index a827644..fb7467a 100644
--- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java
@@ -31,6 +31,8 @@ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
import org.openmainframe.ade.ext.os.AdeExtProperties;
import org.openmainframe.ade.ext.os.parser.LinuxSyslogLineParser;
import org.openmainframe.ade.ext.os.parser.LinuxSyslogMessageReader;
+import org.openmainframe.ade.ext.os.parser.NginxLogLineParser;
+import org.openmainframe.ade.ext.os.parser.NginxLogMessageReader;
import org.openmainframe.ade.ext.os.parser.SparklogLineParser;
import org.openmainframe.ade.ext.os.parser.SparklogMessageReader;
import org.openmainframe.ade.ext.stats.MessageRateStats;
@@ -48,6 +50,9 @@ public AdeInputStreamHandlerLinux(AdeExtProperties adeExtProperties) throws AdeE
public static boolean isSpark() throws AdeException{
return AdeExt.getAdeExt().getConfigProperties().isSparkLog();
}
+ public static boolean isNginx() throws AdeException{
+ return AdeExt.getAdeExt().getConfigProperties().isNginxLog();
+ }
/**
* Handling a stream
@@ -79,6 +84,10 @@ protected final void beforeSendMessage(IMessageInstance mi) throws AdeException
// Keep statistics for this MI
msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), sparkReader.isWrapperMessage());
}
+ else if (isNginx()) {
+ final NginxLogMessageReader nginxReader = (NginxLogMessageReader) a_adeInputStream.getReader();
+ msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), nginxReader.isWrapperMessage());
+ }
else{
final LinuxSyslogMessageReader linuxReader = (LinuxSyslogMessageReader) a_adeInputStream.getReader();
msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), linuxReader.isWrapperMessage());
@@ -95,7 +104,7 @@ protected final void beforeSendMessage(IMessageInstance mi) throws AdeException
* @throws AdeFlowException
*/
private void handleLoggerUnavailable(IMessageInstance mi) throws AdeFlowException, AdeException {
- if (!isSpark()){
+ if (!isSpark() && !isNginx()){
if (LinuxSyslogLineParser.isSyslogNgRestarted(mi)) {
/* Indicate the SysLogNg has restarted. */
incomingSeparator(new FileSeperator(mi.getSourceId(), "syslog-ng starting"));
@@ -103,5 +112,4 @@ private void handleLoggerUnavailable(IMessageInstance mi) throws AdeFlowExceptio
}
}
}
-
}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java
new file mode 100644
index 0000000..ea313ac
--- /dev/null
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java
@@ -0,0 +1,37 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os;
+
+import org.openmainframe.ade.flow.AnalysisGroupToFlowNameConstantMapper;
+
+/**
+ * This class provide the mapping between AnalysisGroup (defined in the Reader, such as LinuxReader)
+ * and the FlowName defined in the FlowLayout.xml file.
+ */
+public class NginxAnalysisGroupToFlowNameConstantMapper extends AnalysisGroupToFlowNameConstantMapper {
+
+ public NginxAnalysisGroupToFlowNameConstantMapper() {
+ super("NGINX");
+ }
+
+}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java
new file mode 100644
index 0000000..ab109e9
--- /dev/null
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java
@@ -0,0 +1,283 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os.parser;
+
+import java.util.Date;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.openmainframe.ade.actions.IParsingQualityReporter;
+import org.openmainframe.ade.ext.os.LinuxAdeExtProperties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.joda.time.DateTime;
+
+/**
+ * An abstract class for extracting data from a Nginx log message.
+ * Subclasses are expected to implement the parseLine() method to parse
+ * a line and set the instance variable values as appropriate. A typical
+ * subclass will call the parseLine method with a regex pattern and capturing
+ * groups for each of the instance variables it wants to extract.
+ * The features we consider for Nginx logs are:
+ * 1. m_timestamp : Timestamp on the message
+ * 2. m_remoteAddress : Remote Address of the message
+ * 3. m_remoteUser : Remote User of the message
+ * 4. m_request : The request field on the message line
+ * 5. m_status: The status of the request
+ * 6. m_bytes: The number of bytes sent
+ */
+public abstract class NginxLogLineParser {
+
+ /**
+ * Default logger for this class.
+ */
+ private static final Logger logger = LoggerFactory.getLogger(NginxLogLineParser.class);
+
+ /**
+ * UTF8_BOM regex.
+ */
+ protected static final String UTF8_BOM = "\\xEF\\xBB\\xBF";
+
+ /**
+ * The optional BOM and PRI, this will be used in pattern searching.
+ */
+ protected static final String BOM_AND_PRI = "(?:" + UTF8_BOM + ")?" + "(?:<\\p{Digit}{1,2}>)?";
+
+ /**
+ * Component of the message
+ */
+ protected String m_component;
+
+ /**
+ * Whether the hostname truncation has already been logged.
+ */
+ private boolean isHostnameTruncationLogged = false;
+
+ /**
+ * The LinuxAdeExtProperties that contains configurations and properties from
+ * the start of AdeExt main class.
+ */
+ protected LinuxAdeExtProperties m_LinuxAdeExtProperties;
+
+ /**
+ * An object used for monitoring parsing quality, or null if none.
+ */
+ protected IParsingQualityReporter m_parsingQualityReport = null;
+
+ /**
+ * Time of the message.
+ */
+ protected Date m_timestamp;
+
+ /**
+ * The remote address of the request.
+ */
+ protected String m_remoteAddress;
+
+ /**
+ * The remote user of the request.
+ */
+ protected String m_remoteUser;
+
+ /**
+ * The message request.
+ */
+ protected String m_request;
+
+ /**
+ * Status code of the request.
+ */
+ protected int m_status;
+
+ /**
+ * Number of bytes of the request.
+ */
+ protected int m_bytes;
+
+ /**
+ * Parses a line and sets the instance variables from it.
+ *
+ * @param line The line to parse.
+ * @return false if the line could not be parsed.
+ */
+ public abstract boolean parseLine(String line);
+
+ /**
+ * Returns the property object containing configurations from the start of
+ * AdeExt main class.
+ *
+ * @return The AdeExtProperties object.
+ */
+ public final LinuxAdeExtProperties getLinuxAdeExtProperties() {
+ return m_LinuxAdeExtProperties;
+ }
+
+ /**
+ * Converts a date from String format to a Date object.
+ *
+ * @param source the source name.
+ * @param dateTimeString the date and time string value.
+ * @return Date object with date/time-stamp of the Linux log.
+ */
+ public abstract Date toDate(String source, String dateTimeString);
+
+ /**
+ * Returns the DateTimeZone determined from the toDate(String source, String dateTimeString)
+ * method.
+ *
+ * @return The date object with date/time-stamp of the Linux log.
+ */
+ public abstract DateTime getLastDeterminedDateTime();
+
+ /**
+ * Parses a line based on a regex Pattern. For each capturing group
+ * number that is non-zero, the corresponding instance variable
+ * is set. (Assigns m_component = master, remove this once we have newer logs)
+ * NOTE: There is no pid present in nginx logs.
+ *
+ * @param pattern - The pattern to parse.
+ * @param timestamp - Capturing group number https://quest.squadcast.tech/api/RA1911003010323/emailsfor the timestamp.
+ * @param remoteAddress - Capturing group number for the remote address.
+ * @param remoteUser - Capturing group number for the remote user.
+ * @param request - Capturing group for the request.
+ * @param status - Capturing group for the status code.
+ * @param bytes - Capturing group for the number of bytes sent.
+ * @return false if the line could not be parsed.
+ */
+ protected final boolean parseLine(Pattern pattern, int remoteAddress,
+ int remoteUser, int timestamp, int request, int status, int bytes, String line) {
+ final Matcher matcher = pattern.matcher(line);
+ if (matcher.matches()) {
+ try {
+ String msgTimeString = toString(matcher, timestamp);
+ m_timestamp = toDate("m_remoteAddress", msgTimeString);
+ m_remoteAddress = toString(matcher, remoteAddress);
+ m_remoteUser = toString(matcher, remoteUser);
+ m_request = toString(matcher, request);
+ m_status = Integer.parseInt(toString(matcher, status));
+ m_bytes = Integer.parseInt(toString(matcher, bytes));
+ // m_component = toString(matcher, comp);
+ m_component = "master";
+ return true;
+ } catch (IllegalArgumentException e) {
+ e.printStackTrace();
+ return false;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Captures the group passed in by matching against a pattern.
+ *
+ * @param m Matcher to compare against a pattern.
+ * @param group The capturing group value.
+ * @return empty string if the capturing group is 0 otherwise the pattern
+ * captured by the passed in group.
+ */
+ private String toString(Matcher m, int group) {
+ return (group == 0) ? "" : m.group(group);
+ }
+
+ /**
+ * Returns the message time-stamp
+ *
+ * @return the time-stamp.
+ */
+ public final Date getMsgTime() {
+ return m_timestamp;
+ }
+
+ /**
+ * Returns the component of the message.
+ *
+ * @return the component
+ */
+ public final String getComponent() {
+ return m_component;
+ }
+
+ /**
+ * Returns the remote address.
+ *
+ * @return the remote address string value.
+ */
+ public final String getRemoteAddress() {
+ return m_remoteAddress;
+ }
+
+ /**
+ * Returns the remote user.
+ *
+ * @return the remote user string value.
+ */
+ public final String getRemoteUser() {
+ return m_remoteUser;
+ }
+
+ /**
+ * Returns the status code.
+ *
+ * @return the status code int value.
+ */
+ public final int getStatus() {
+ return m_status;
+ }
+
+ /**
+ * Returns the number of bytes.
+ *
+ * @return the number of bytes.
+ */
+ public final int getBytes() {
+ return m_bytes;
+ }
+
+ /**
+ * Returns the request text.
+ *
+ * @return the request text string value.
+ */
+ public final String getRequest() {
+ return m_request;
+ }
+
+ /**
+ * Sets the parsingQualityReport for monitoring parsing quality of Linux logs.
+ *
+ * @param parsingQualityReport the ParsingQualityReporter object to be used.
+ */
+ public final void setParseQualityReport(IParsingQualityReporter parsingQualityReport) {
+ m_parsingQualityReport = parsingQualityReport;
+ }
+
+ /**
+ * The overridden toString method for this class. Prints out the captured groups
+ * from the message.
+ */
+ @Override
+ public String toString() {
+ return String.format("timestamp=(%s) remote_address=(%s) remote_user=(%s) request=(%s) status=(%s) bytes=(%s)",
+ m_timestamp, m_remoteAddress, m_remoteUser, m_request, m_status, m_bytes);
+ }
+}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java
new file mode 100644
index 0000000..d20d633
--- /dev/null
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java
@@ -0,0 +1,627 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os.parser;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.Format;
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+
+import org.openmainframe.ade.Ade;
+import org.openmainframe.ade.AdeInputStream;
+import org.openmainframe.ade.AdeMessageReader;
+import org.openmainframe.ade.actions.IParsingQualityReporter;
+import org.openmainframe.ade.data.IDataFactory;
+import org.openmainframe.ade.data.DataType;
+import org.openmainframe.ade.data.IMessageInstance;
+import org.openmainframe.ade.data.ISource;
+import org.openmainframe.ade.dataStore.IDataStoreSources;
+import org.openmainframe.ade.exceptions.AdeException;
+import org.openmainframe.ade.exceptions.AdeInternalException;
+import org.openmainframe.ade.exceptions.AdeParsingException;
+import org.openmainframe.ade.exceptions.AdeUsageException;
+import org.openmainframe.ade.ext.AdeExt;
+import org.openmainframe.ade.ext.data.GroupsQueryImpl;
+import org.openmainframe.ade.ext.data.ManagedSystemInfo;
+import org.openmainframe.ade.ext.main.helper.AdeExtRequestType;
+import org.openmainframe.ade.ext.os.LinuxAdeExtProperties;
+import org.openmainframe.ade.ext.stats.MessageRateStats;
+import org.openmainframe.ade.ext.stats.MessagesWithParseErrorStats;
+import org.openmainframe.ade.ext.stats.MessagesWithUnexpectedSource;
+import org.openmainframe.ade.ext.utils.ExtFileUtils;
+import org.openmainframe.ade.impl.data.TextClusteringComponentModel;
+import org.openmainframe.ade.impl.data.TextClusteringModel;
+import org.openmainframe.ade.impl.data.IThresholdSetter;
+import org.openmainframe.ade.utils.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.joda.time.DateTime;
+
+import static org.openmainframe.ade.ext.os.parser.ReaderLoggerMessages.*;
+
+/**
+ * The reader for Nginx Logs.
+ * Note: ParseQualityReport infrastructure is defined in this class. But, it's not being
+ * used to output any parse error messages. Parse Error messages are replaced by the
+ * MessagesWithParseErrorStats class.
+ */
+public class NginxLogMessageReader extends AdeMessageReader {
+
+ /**
+ * The default UNASSIGNED analysis group. Note: This is NOT the internal id of
+ * the UNASSIGNED analysis group.
+ */
+ public static final int UNASSIGNED_ANALYSIS_GROUP_ID = -1;
+ /**
+ * Default value for when a component doesn't exist in the message.
+ */
+ public static final String LINUX_LINE_NO_COMPONENT_NAME = "(NO_COMPONENT)";
+ /**
+ * The ASCII controlled characters, 0x00-0x1F and 0x7F.
+ */
+ public static final String ASCII_CONTROLLED_CHARACTERS = "\\p{Cntrl}";
+
+ /**
+ * Pattern of IO Exception messages that indicates the reading from the input stream
+ * should be terminated gracefully.
+ * Example of syntax:
+ * "(Connection reset by peer|.*connection terminated.*)"
+ */
+ final static String IOEXCEPTION_TERMINATE_GRACEFULLY_STRING = "(.*Connection reset by peer.*|.*Connection timed out.*)".toUpperCase();
+
+ /**
+ * The default value for when a GMT offset is invalid.
+ */
+ public static final long GMT_OFFSET_INVALID = 362340;
+
+ /**
+ * The threshold percentage to determine if the log data was successfully parsed.
+ */
+ private static final double goodPercentThreshold = .05;
+ /**
+ * The default logger for this class.
+ */
+ private static final Logger logger = LoggerFactory.getLogger(NginxLogMessageReader.class);
+ /**
+ * Object to create and keep track of textual clusters.
+ */
+ private TextClusteringComponentModel m_textClusteringComponentModel;
+ /**
+ * Keep track of message instances that are waiting to be read.
+ */
+ private IMessageInstance m_messageInstanceWaiting = null;
+ /**
+ * The previous message instance to be read.
+ */
+ private IMessageInstance m_prevMessageInstance = null;
+ /**
+ * DataFactory to create message instances.
+ */
+ private IDataFactory m_dataFactory;
+ /**
+ * For preprocessing Linux messages.
+ */
+ private LinuxMessageTextPreprocessor m_messageTextPreprocessor;
+
+ /**
+ * Number of lines that have parsing errors.
+ */
+ private int m_errorLineCount = 0;
+ /**
+ * Number of lines that do not have a source.
+ */
+ private int m_unexpectedSourceLineCount = 0;
+
+ /**
+ * Number of lines where the component name is missing.
+ */
+ private int m_componentMissingLineCount = 0;
+
+ /**
+ * The starting time of the parser.
+ */
+ private long m_parserStartTime;
+ /**
+ * The starting date of the parser.
+ */
+ private Date m_parserStartDate = new Date();
+
+ /**
+ * Whether the message returned from readMessageInstance() is the 2nd message
+ * generated from a wrapper message.
+ */
+ private boolean m_isWrapperMessage = false;
+ /**
+ * The number of wrapper messages.
+ */
+ private long m_wrapperMessageCount = 0;
+ /**
+ * The number of non-wrapper messages.
+ */
+ private long m_nonWrapperMessageCount = 0;
+
+ /**
+ * Number of suppressed messages remaining.
+ */
+ private int m_suppressedMessagesRemaining = 0;
+ /**
+ * Number of non-wrapper messages count.
+ */
+ private long m_suppressedNonWrapperMessageCount = 0;
+
+ /**
+ * Name of the last newly seen source.
+ */
+ private String m_lastNewlySeenSourceId = null;
+
+ /**
+ * The parser for a line of Nginx Logs.
+ */
+ private NginxLogLineParser[] m_lineParsers;
+
+ /**
+ * Hashmap mapping SysId to Source ID.
+ */
+ private Map sourceToSourceIdMap = new HashMap();
+
+ /**
+ * The Linux specific properties to be used containing configurations from start of AdeExt main class.
+ */
+ private LinuxAdeExtProperties m_adeExtProperties;
+
+ /**
+ * Holds system specific information.
+ */
+ private ManagedSystemInfo m_info = null;
+
+ /**
+ * An object used for monitoring parsing quality, or null if none.
+ */
+ private IParsingQualityReporter m_parsingQualityReport = null;
+
+ /**
+ * Constructs a reader for a given input stream and initializes member variables.
+ * @param stream Input stream for parsing.
+ * @param parseReportFilename the name of the parse report.
+ * @param adeExtProperties Configuration flags used to specify time zone and whether to use debug parser codes.
+ * @throws AdeInternalException
+ */
+ public NginxLogMessageReader(AdeInputStream stream, String parseReportFilename,
+ LinuxAdeExtProperties adeExtProperties) throws AdeException {
+ super(stream);
+ m_dataFactory = Ade.getAde().getDataFactory();
+
+ m_textClusteringComponentModel = Ade.getAde().getActionsFactory().getTextClusteringModel(true);
+ m_messageTextPreprocessor = new LinuxMessageTextPreprocessor();
+ m_textClusteringComponentModel.setMessageTextPreprocessor(m_messageTextPreprocessor);
+
+ initializeOtherInformation(adeExtProperties, parseReportFilename);
+ }
+
+ /**
+ * Main logic for this class. Reads the message and stores the information extracted from it in a
+ * MessageInstance object. First it checks if there is a wrapper or suppressed message if so,
+ * this message will be returned. Then we parse the current line. If the current line is null, then
+ * we are done reading the input stream. If it is not null, we check and see if it is a suppressed message.
+ * If so, we collect message information and return the previous message instance. If not, we loop
+ * through all possible line parsers and find one that can capture the current line. If one is found
+ * we generate a message id and process the source id. Then we return the message instance. If one is
+ * not found then we log this as an error.
+ * @return MessageInstance object that stores all the necessary information of a message.
+ */
+ @Override
+ public final IMessageInstance readMessageInstance() throws IOException, AdeException {
+ String currentLine;
+ boolean gotLine = false;
+ boolean unexpectedSource = false;
+ if (m_messageInstanceWaiting != null) {
+ return getMessageInstanceWaiting();
+ }
+ if (m_suppressedMessagesRemaining > 0) {
+ updateSuppressedMessageStats();
+ return m_prevMessageInstance;
+ }
+ while (!gotLine) {
+ currentLine = getCurrentLine();
+ if (currentLine != null) {
+ currentLine = currentLine.replaceAll(ASCII_CONTROLLED_CHARACTERS, "");
+ }
+ if (currentLine == null){
+ handleEndOfStream();
+ return null;
+ }
+ for (NginxLogLineParser lineParser : m_lineParsers) {
+ gotLine = lineParser.parseLine(currentLine);
+ if (gotLine) {
+ String msgId = getMessageId(lineParser);
+ DateTime dateTime = handleDateTime(lineParser);
+ final String sourceId = getAndProcessSourceId(lineParser.getRemoteAddress());
+ if (sourceId == null) {
+ System.out.println("source if is null");
+ System.exit(0);
+ gotLine = false;
+ unexpectedSource = true;
+ MessagesWithUnexpectedSource.addMessage(lineParser.getRemoteAddress(),
+ lineParser.m_timestamp.getTime(), currentLine);
+ break;
+ }
+ m_isWrapperMessage = false;
+ m_nonWrapperMessageCount++;
+ InputTimeZoneManager.updateTimezone(sourceId, dateTime);
+ m_prevMessageInstance = m_dataFactory.newMessageInstance(
+ sourceId,
+ lineParser.getMsgTime(),
+ msgId,
+ lineParser.getRequest(),
+ lineParser.getRemoteAddress(),
+ IMessageInstance.Severity.UNKNOWN); // Severity = null for Nginx
+ /* Setting the messageInstanceWaiting to null, which would stop wrappers such as SUDO or CRON
+ to be passed to ade. */
+ m_messageInstanceWaiting = null;
+ return m_prevMessageInstance;
+ }
+ }
+ if (!gotLine) {
+ if (unexpectedSource) {
+ m_unexpectedSourceLineCount++;
+ unexpectedSource = false;
+ } else if (currentLine.length() != 0) {
+ final MessagesWithParseErrorStats stats = MessagesWithParseErrorStats.getParserErrorStats();
+ stats.addMessage(currentLine);
+ m_errorLineCount++;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Returns the waiting message instance and records message diagnostics.
+ * @return the message instance waiting to be read.
+ */
+ private IMessageInstance getMessageInstanceWaiting(){
+ final IMessageInstance tmp = m_messageInstanceWaiting;
+ m_messageInstanceWaiting = null;
+ m_isWrapperMessage = true;
+ m_wrapperMessageCount++;
+ return tmp;
+ }
+ /**
+ * Returns the suppressed message and records message diagnostics. If the suppressed message
+ * is a wrapper, the wrapper message also need to be outputted.
+ * @return the suppressed message instance.
+ * @throws AdeException
+ * @throws AdeInternalException
+ */
+ private void updateSuppressedMessageStats() throws AdeException{
+ m_suppressedNonWrapperMessageCount++;
+ m_suppressedMessagesRemaining--;
+ m_messageInstanceWaiting = m_messageTextPreprocessor.getExtraMessage(m_prevMessageInstance);
+ }
+
+ /**
+ * Retrieves the current line using this reader.
+ * @return the current line that was parsed. If null, then we have reached the end of the input stream.
+ * @throws AdeException
+ */
+ private String getCurrentLine() throws AdeException{
+ String currentLine;
+ try {
+ currentLine = this.readLine();
+ } catch (IOException e) {
+ final String exceptionMsg = e.getMessage().toUpperCase();
+ if (exceptionMsg.matches(IOEXCEPTION_TERMINATE_GRACEFULLY_STRING)) {
+ logger.warn("Normal IOException: " + NginxLogMessageReader.class.getSimpleName()
+ + " will be terminated gracefully.", e);
+ currentLine = null;
+ } else {
+ throw new AdeParsingException("Failed reading from log file", e);
+ }
+ }
+ return currentLine;
+ }
+
+ /**
+ * Updates the last determined date time and returns the updated value.
+ * Note: dateTime is null when it is a 3164 message. so if the GMT offset is not defined in
+ * the -g option or if it is a 3164 message, it will still be set as the default
+ * invalid GMT offset value. If dateTime is not null then we want to update the GMT offset
+ * unless it was already previously defined by the -g option. In that case,
+ * we will just use that value.
+ * @param lineParser the parser being used to parse the line.
+ * @return the updated date time.
+ */
+ private DateTime handleDateTime(NginxLogLineParser lineParser){
+ final DateTime dateTime = lineParser.getLastDeterminedDateTime();
+ if (dateTime != null && !m_adeExtProperties.isGmtOffsetDefined()) {
+ updateGmtOffset(dateTime);
+ }
+ return dateTime;
+ }
+
+ /**
+ * Updates the time offset and sets it in the ManagedSystemInfo object.
+ * @param dateTime the date/time-stamp of the current message.
+ */
+ private void updateGmtOffset(DateTime dateTime) {
+ final long gmtOffsetInMillis = dateTime.getZone().getOffset(dateTime.getMillis());
+ final long gmtOffset = TimeUnit.MILLISECONDS.toHours(gmtOffsetInMillis);
+ m_info.setGmtOffset(gmtOffset);
+ }
+ /**
+ * Generates message id based on the component's clustering model.
+ * @param lineParser the parser being used to parse the line.
+ * @return the generated message id.
+ * @throws AdeException
+ */
+ private String getMessageId(NginxLogLineParser lineParser) throws AdeException{
+ final Pair p = m_messageTextPreprocessor.updateComponent(lineParser.m_component, lineParser.m_request);
+ lineParser.m_component = p.m_first;
+ final IThresholdSetter thresholdSetter = p.m_second;
+ return generateMessageId(lineParser, thresholdSetter);
+ }
+
+ /**
+ * Generates message id based on the component's clustering model.
+ * @param lineParser the parser being used to parse the line.
+ * @param thresholdSetter threshold for comparing two strings.
+ * @return the generated message id.
+ * @throws AdeException
+ */
+ private String generateMessageId(NginxLogLineParser lineParser, IThresholdSetter thresholdSetter) throws AdeException {
+ if (thresholdSetter == null) {
+ thresholdSetter = new TextClusteringComponentModel.SimpleThresholdSetter();
+ }
+ final TextClusteringModel model = m_textClusteringComponentModel.getTextClusteringModel(lineParser.m_component, thresholdSetter);
+ return model.getComponentName() + "_" + model.getOrAddCluster(lineParser.m_request, lineParser.m_timestamp).getClusterId();
+ }
+
+ /**
+ * Whether the message returned from readMessageInstance() is the generated message of a wrapper message.
+ * @return true if the message is a generated wrapper message.
+ */
+ public final boolean isWrapperMessage() {
+ return m_isWrapperMessage;
+ }
+
+ /**
+ * Returns the sourceID given a sysId. This method handles multiple sources coming from the log stream.
+ * However, this is not currently necessary because all streams are expected to contain messages from a
+ * single source. This method will adds the source ID to the database if it's not in the database already.
+ * This method provide an opportunity to transform the SysId into something else, which would be used as
+ * the sourceId processed by Ade. This method also sets the mapping between the sourceId to an Analysis Group.
+ * @throws AdeException
+ */
+ private String getAndProcessSourceId(String source) throws AdeException {
+ /* Make source case insensitive for Linux systems. */
+ source = source.toLowerCase();
+
+ /* Note: the source coming from Linux is the HOSTNAME field in the Syslog message. */
+ String sourceId = sourceToSourceIdMap.get(source);
+ if (sourceId != null) {
+ /**
+ * This SysId and SourceId have already been processed. Don't need to
+ * perform the rest of the processing.
+ */
+ return sourceId;
+ } else {
+ /**
+ * If option is provided, then need to make sure the only sources being
+ * analyzed is from the sources in the -s option.
+ */
+ final AdeExtRequestType requestType = m_adeExtProperties.getRequestType();
+ if (m_adeExtProperties.isSourceOptionProvided() && (requestType.name()).equalsIgnoreCase("ANALYZE")) {
+ final Collection sources = m_adeExtProperties.getSources();
+ boolean isValidSource = false;
+ for (ISource s : sources) {
+ if ((s.getSourceId()).equalsIgnoreCase(source)) {
+ isValidSource = true;
+ break;
+ }
+ }
+
+ /* Return null, and do not update the sourceToSourceIdMap */
+ if (!isValidSource) {
+ return null;
+ }
+ }
+
+ /* Update database to add the source, then update m_info with the source. */
+ final IDataStoreSources dataStoreSources = Ade.getAde().getDataStore().sources();
+
+ /* For Linux, the sourceId is the same as source. */
+ sourceId = source;
+ m_lastNewlySeenSourceId = sourceId;
+ m_adeExtProperties.setLastNewlySeenSourceId(m_lastNewlySeenSourceId);
+
+ /* Read the RuntimeModelData from file. */
+ final RuntimeModelDataManager runtimeModelDataManager = new RuntimeModelDataManager();
+ runtimeModelDataManager.readModelDataFromFile(source);
+
+ /* Provide a mapping between source to analysisGroup. */
+ dataStoreSources.addSourceAndAnalysisGroup(sourceId, UNASSIGNED_ANALYSIS_GROUP_ID);
+
+ final ISource S = dataStoreSources.getOrAddSource(source);
+ if (m_info != null) {
+ /* Add m_info to the database. It's only added when SysInfo is available. */
+ m_info.updateDataStore(S);
+ }
+
+ /* Update the analysis group by calling an atomic method that evaluates analysis group rules,
+ * and commits to the database in an atomic transaction. */
+ final String analysisGroup = GroupsQueryImpl.updateSourcesAnalysisGroup(sourceId);
+ MessageRateStats.addSourceAndAnalysisGroup(sourceId, analysisGroup);
+
+ logger.trace("Datastore updated for Linux system: " + source
+ + ", that maps to sourceId: " + sourceId);
+
+ /* Add the source to the mapping */
+ sourceToSourceIdMap.put(source, sourceId);
+
+ return sourceId;
+ }
+ }
+
+ /**
+ * Private method to initialize other information. When initializing the lineparsers,
+ * The order is important. The 5424 parser is first because we expect it to be the
+ * parser used during normal operations. The 3164 parsers are available for bulkload.
+ * Within the 3164 parsers, they must be ordered from most-specific match to most-generic
+ * match. We also set the parsing start time, the parsingQualityReporter and managed system
+ * information.
+ * @param adeExtProperties Properties file that contains AdeExt configurations.
+ * @param parseReportFilename the file name of the parse report.
+ * @throws AdeException
+ */
+ private void initializeOtherInformation(LinuxAdeExtProperties adeExtProperties, String parseReportFilename)
+ throws AdeException {
+ m_parserStartTime = System.nanoTime();
+ m_lineParsers = new NginxLogLineParser[] {
+ new NginxLogParser(),
+ };
+ m_adeExtProperties = adeExtProperties;
+ NginxLogParserBase.setAdeExtProperties(m_adeExtProperties);
+ setParsingQualityReporterIfRequested(parseReportFilename);
+ try {
+ if (m_adeExtProperties.isGmtOffsetDefined()) {
+ m_info = new ManagedSystemInfo(m_adeExtProperties.getGmtOffset(), "linux");
+ } else {
+ m_info = new ManagedSystemInfo(GMT_OFFSET_INVALID, "linux");
+ }
+ } catch (IllegalArgumentException e) {
+ throw new AdeUsageException("Invalid SysInfo argument(s)", e);
+ }
+ }
+
+ /**
+ * Sets the parse quality report. First it checks to see if it was requested, if not then we
+ * return. Otherwise, we initialize the parse report output directory and the parse report
+ * itself.
+ * @param parseReportFilename The name of the parse report file.
+ * @throws AdeException
+ */
+ public final void setParsingQualityReporterIfRequested(String parseReportFilename) throws AdeException {
+ if (!m_adeExtProperties.isParseReportRequested()) {
+ return;
+ }
+ final File sumLogDirectory = AdeExt.getAdeExt().getOutputDirectoryManager().getOutputHome();
+ ExtFileUtils.createDir(sumLogDirectory);
+ final File resultFile = new File(sumLogDirectory, parseReportFilename);
+ resultFile.getParentFile().mkdirs();
+ m_parsingQualityReport = Ade.getAde().getActionsFactory().createParsingQualityReporter();
+ m_parsingQualityReport.open(resultFile.getPath());
+
+ for (NginxLogLineParser lineParser : m_lineParsers) {
+ lineParser.setParseQualityReport(m_parsingQualityReport);
+ }
+ }
+
+ /**
+ * Logs summary messages indicating how the parsing went. If more than 5% of lines
+ * had messages than we write a success message otherwise, we write an error message.
+ * @throws AdeException
+ */
+ private void printStatEof() throws AdeException {
+
+ Format formatter;
+ formatter = new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z");
+ String message;
+ final int m_statCounterRawLines = getLineNumber();
+ final long endTime = System.nanoTime();
+ final long elapsedTime = endTime - m_parserStartTime;
+ final double seconds = Math.ceil(elapsedTime / 1.0E09);
+ final AdeExtRequestType requestType = m_adeExtProperties.getRequestType();
+
+ message = String.format(PARSED_DATA_STATS_MSG, m_statCounterRawLines,m_nonWrapperMessageCount,
+ m_suppressedNonWrapperMessageCount, m_wrapperMessageCount, m_errorLineCount,
+ m_componentMissingLineCount, m_unexpectedSourceLineCount, seconds);
+ logger.info(message);
+
+ final String StartDate = formatter.format(m_parserStartDate);
+ String sourceId;
+ if (m_prevMessageInstance != null) {
+ sourceId = m_prevMessageInstance.getSourceId();
+ } else if (m_adeExtProperties.isSourceOptionProvided()) {
+ final Collection sources = m_adeExtProperties.getSources();
+ sourceId = (sources.iterator().next()).getSourceId();
+ } else {
+ sourceId = "Unknown";
+ }
+ final double goodPercent = ((double) m_nonWrapperMessageCount) / ((double) m_statCounterRawLines);
+
+ switch (requestType) {
+ case UPLOAD: {
+ try {
+ if (goodPercent > goodPercentThreshold) {
+ message = String.format(GOOD_UPLOAD_MSG, StartDate, m_statCounterRawLines, sourceId,
+ m_nonWrapperMessageCount, DataType.SYSLOG.name());
+ } else {
+ message = String.format(BAD_UPLOAD_MSG, StartDate, m_statCounterRawLines, sourceId,
+ m_nonWrapperMessageCount, DataType.SYSLOG.name());
+ }
+ } catch (Throwable t) {
+ logger.error("An error occured - Internal Error: Building Notification ", t);
+ }
+ break;
+ }
+ case ANALYZE:
+ try {
+ if (goodPercent > goodPercentThreshold) {
+ message = String.format(GOOD_ANALYZE_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(),
+ sourceId, m_nonWrapperMessageCount);
+ } else {
+ if (goodPercent == 0) {
+ message = String.format(NO_MSGS_PARSED_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(),
+ sourceId);
+ } else {
+ message = String.format(BAD_ANALYZE_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(),
+ sourceId, m_nonWrapperMessageCount);
+ }
+ }
+ } catch (Throwable t) {
+ logger.error("An error occured - Internal Error: Building Notification ", t);
+ }
+ break;
+ }
+ logger.info(message);
+ }
+
+ /**
+ * Perform actions at the end of stream.
+ * @throws AdeException
+ */
+ private void handleEndOfStream() throws AdeException {
+ if (m_parsingQualityReport != null) {
+ m_parsingQualityReport.close();
+ }
+
+ printStatEof();
+ }
+
+}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java
new file mode 100644
index 0000000..df7df06
--- /dev/null
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java
@@ -0,0 +1,82 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+
+package org.openmainframe.ade.ext.os.parser;
+
+import java.util.regex.Pattern;
+
+import org.openmainframe.ade.exceptions.AdeException;
+import org.openmainframe.ade.ext.os.LinuxAdeExtProperties;
+
+/**
+ * An RFC3164 syslog parser that looks for a component id and process id
+ * in the message body (Note, this class also handles log messages with only
+ * the component id and not the process id). If present, they are
+ * extracted and the remainder of the message is returned as the message body.
+ */
+public class NginxLogParser extends NginxLogParserBase {
+
+ /**
+ * Pattern object for matching against the base RFC3164 header, the optional BOM_AND_PRI which gets the
+ * option UTF-8 Byte Order mark and priority values, and regex that finds the component id, process id,
+ * and message body.
+ */
+ private static final Pattern pattern = Pattern.compile(NGINX_LOG);
+// private static final Pattern pattern = Pattern.compile(NGINX_LOG
+// + "([-_./a-zA-Z0-9]*[-_./a-zA-Z])?: (.*)$");
+
+ /**
+ * Default constructor to call its parent constructor.
+ * @throws AdeException
+ */
+ public NginxLogParser() throws AdeException {
+ super();
+ }
+ /**
+ * Explicit-value constructor for getting the properties file and passing it to the parent explicit value
+ * constructor.
+ * @param linuxAdeExtProperties Contains property and configuration information from the start of AdeExt main class.
+ * @throws AdeException
+ */
+ public NginxLogParser(LinuxAdeExtProperties linuxAdeExtProperties)
+ throws AdeException {
+ super(linuxAdeExtProperties);
+ }
+
+ /**
+ * Parses the line by calling the super class's parseLine with the positions of the captured groups.
+ * As an example, it looks at the text following the RFC3164 header and tries to match text like:
+ * process-name[1234]: message body.
+ * If found, the component is set to "process-name" and the pid is set to "1234". The message body is
+ * set to "message body."
+ * @param line A line from the Linux syslog file.
+ * @return true if the line was parsed successfully.
+ */
+ @Override
+ public boolean parseLine(String line) {
+ return parseLine(pattern, NGINX_LOG_REMOTE_ADDRESS_GROUP, NGINX_LOG_REMOTE_USER_GROUP,
+ NGINX_LOG_TIMESTAMP_GROUP, NGINX_LOG_REQUEST_GROUP, NGINX_LOG_STATUS_GROUP,
+ NGINX_LOG_BYTES_GROUP, line);
+ }
+
+}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java
new file mode 100644
index 0000000..d535e0e
--- /dev/null
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java
@@ -0,0 +1,242 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os.parser;
+
+import java.util.Date;
+import java.util.TimeZone;
+
+import org.openmainframe.ade.Ade;
+import org.openmainframe.ade.IAdeConfigProperties;
+import org.openmainframe.ade.exceptions.AdeException;
+import org.openmainframe.ade.ext.os.LinuxAdeExtProperties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+
+/**
+ * An abstract base class for Nginx log parsers. This class defines regular expressions
+ * for the Nginx log header fields while leaving additional parsing of the message body to
+ * concrete subclasses.
+ */
+public abstract class NginxLogParserBase extends NginxLogLineParser {
+ /**
+ * Main logger for this class.
+ */
+ static final Logger s_logger = LoggerFactory.getLogger(NginxLogParserBase.class);
+
+ /**
+ * The end of today, when the parser was first loaded.
+ */
+ private static DateTime END_OF_TODAY = null;
+
+ /**
+ * The input time-zone specified in setup.props.
+ */
+ private static DateTimeZone INPUT_TIME_ZONE;
+
+ /**
+ * The output time-zone specified in setup.props.
+ */
+ private static DateTimeZone OUTPUT_TIME_ZONE;
+
+ /**
+ * LinuxAdeExtProperties object that contains properties and configurations information from the start
+ * of AdeExt main class.
+ */
+ private static LinuxAdeExtProperties s_linuxAdeExtProperties = null;
+
+ /**
+ * Regular expression to extract the time-stamp from the header.17/May/2015:08:05:57 +0000
+ */
+ public static final String NGINX_TIMESTAMP = "(\\d{2}/.{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{4})";
+ /**
+ * Regular expression to extract header information. (The priority, time-stamp, and host name)
+ */
+ public static final String NGINX_LOG = "^" + "(.*) - (.*) \\[" + NGINX_TIMESTAMP + "\\] \"(.*)\" (.*) (.*) \".*\" \".*\"";
+
+ /*
+ * Within the NGINX_LOG regex string above, identify the regex
+ * capturing groups for the parts that we want to extract.
+ */
+ protected static final int NGINX_LOG_REMOTE_ADDRESS_GROUP = 1;
+ protected static final int NGINX_LOG_REMOTE_USER_GROUP = 2;
+ protected static final int NGINX_LOG_TIMESTAMP_GROUP = 3;
+ protected static final int NGINX_LOG_REQUEST_GROUP = 4;
+ protected static final int NGINX_LOG_STATUS_GROUP = 5;
+ protected static final int NGINX_LOG_BYTES_GROUP = 6;
+
+ /**
+ * The current year. (Nginx logs already contain the year)
+ */
+ private final int curYear;
+
+ /*
+ * Setup an array of DateTimeFormatter objects that can parse the dates in a
+ * 3164 style message. Both are necessary because the DateTimeFormatter
+ * parseDateTime() method doesn't handle a variable number of spaces
+ * between the month and day.
+ */
+ protected static final DateTimeFormatter[] dt_formatters = {
+ DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z")
+ };
+ /**
+ * Constructor for initializing the properties file and various time properties.
+ * @param linuxAdeExtProperties Contains property and configuration information.
+ * @throws AdeException
+ */
+ public NginxLogParserBase(LinuxAdeExtProperties linuxAdeExtProperties) throws AdeException {
+ this.curYear = new DateTime().getYear();
+
+ if (linuxAdeExtProperties == null) {
+ m_LinuxAdeExtProperties = s_linuxAdeExtProperties;
+ } else {
+ m_LinuxAdeExtProperties = linuxAdeExtProperties;
+ }
+
+ /* Set the start of today and timezone*/
+ initializeTimeZoneAndStartOfToday();
+ }
+
+ /**
+ * Default constructor that sets the properties file to null.
+ * @throws AdeException
+ */
+ public NginxLogParserBase() throws AdeException {
+ this(null);
+ }
+
+ /**
+ * Set the AdeExt properties file.
+ * @param linuxAdeExtProperties The properties file that contains the configuration and properties information.
+ */
+ public static void setAdeExtProperties(LinuxAdeExtProperties linuxAdeExtProperties) {
+ s_linuxAdeExtProperties = linuxAdeExtProperties;
+ }
+
+ /**
+ * Returns the year stored in AdeExt properties file.
+ * @return the year as an int value.
+ */
+ public static int getAdeExtPropertiesYear() {
+ return s_linuxAdeExtProperties.getYear();
+ }
+ /**
+ * Returns the input time zone specified in setup.props
+ * @return The input time zone.
+ */
+ public static DateTimeZone getInputTimeZone() {
+ return INPUT_TIME_ZONE;
+ }
+ /**
+ * Returns the output time zone specified in setup.props
+ * @return The output time zone.
+ */
+ public static DateTimeZone getOutputTimeZone() {
+ return OUTPUT_TIME_ZONE;
+ }
+
+ /**
+ * Retrieves the date parsed from the header of a log. Unlike Syslog, Nginx logs come with year defined.
+ * Redefining the year in setup file is ineffective.
+ * After parsing the date, we need to correct the time-zone.
+ * Then we set the dateTime to the current year. Now we need to check the dateTime and see if it's after today.
+ * The logic is as follows:
+ * - If Log time-stamp < End of day of today
+ * (comparing Month, Day, Hour, Minutes, Seconds, with year missing),
+ * assume it's this year.
+ * - If Log time-stamp > End of day of today
+ * (comparing Month, Day, Hour, Minutes, Seconds, with year missing),
+ * assume it's previous year.
+ *
+ * The following restrictions will be made to customer for BulkLoad:
+ * - Cannot upload logs older than 11 months.
+ * - Cannot upload logs that are continuous for more than 11 months.
+ *
+ * Note: END OF TODAY is purposely chosen instead of START OF TODAY in case a user bulk loads logs that
+ * belongs to today. It's not possible/likely that a user would bulk load logs from last year of the
+ * same day with the restriction we specified above.
+ * @param source the source name string value.
+ * @param dateTimeString the date and time string value.
+ * @return Date object with date/time-stamp of the Linux log.
+ */
+ @Override
+ public final Date toDate(String source, String dateTimeString) {
+ System.out.println(source);
+ DateTime dt = null;
+ for (DateTimeFormatter fmt : dt_formatters) {
+ try {
+ dt = fmt.parseDateTime(dateTimeString);
+// dt = dt.withZoneRetainFields(INPUT_TIME_ZONE);
+ dt = dt.withZone(OUTPUT_TIME_ZONE);
+ /* AdeCore will take the Java Date object, and convert
+ * it to the output time-zone, then extract the hour. */
+ return dt.toDate();
+ } catch (IllegalArgumentException e) {
+ /* This exception can occur normally when iterating
+ * through the DateTimeFormatter objects. It is only
+ * an error worth noting when the dt object is not null.
+ */
+ if (dt != null) {
+ s_logger.error("Invalid argument encountered.", e);
+ }
+ }
+ }
+ throw new IllegalArgumentException("Failed to parse date " + dateTimeString);
+ }
+
+ /**
+ * Set the END_OF_TODAY value and time-zone values. The time-zone values are taken from the Ade
+ * configuration properties. End_OF_TODAY value is retrieved by getting the current date-time,
+ * adjust time-zone, add an additional day and set the time to the start of the day.
+ * Note: These only need to be set once.
+ * @throws AdeException
+ */
+ private static void initializeTimeZoneAndStartOfToday() throws AdeException {
+ synchronized (NginxLogParserBase.class) {
+ if (END_OF_TODAY == null) {
+ final IAdeConfigProperties adeConfig = Ade.getAde().getConfigProperties();
+ final TimeZone timeZone = adeConfig.getInputTimeZone();
+ final TimeZone outputTimezone = adeConfig.getOutputTimeZone();
+ INPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(timeZone.getRawOffset());
+ OUTPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(outputTimezone.getRawOffset());
+ END_OF_TODAY = DateTime.now();
+ END_OF_TODAY = END_OF_TODAY.withZone(OUTPUT_TIME_ZONE);
+ END_OF_TODAY = END_OF_TODAY.plusDays(1);
+ END_OF_TODAY = END_OF_TODAY.withTimeAtStartOfDay();
+ }
+ }
+ }
+
+ /**
+ * Return the DateTimeZone determined from toDate(String source, String s) method.
+ * For 3164 messages, the DateTimeZone is not included in the log.
+ * @return null since DateTimeZone is not included in the log.
+ */
+ public final DateTime getLastDeterminedDateTime() {
+ return null;
+ }
+
+}
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java
index fde4be2..e268b78 100644
--- a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java
@@ -49,6 +49,10 @@ public AdeMessageReader getReader(AdeInputStream stream, String parseReportFilen
return new SparklogMessageReader(stream, parseReportFilename,
(LinuxAdeExtProperties) adeExtProperties);
}
+ if (AdeExt.getAdeExt().getConfigProperties().isNginxLog()){
+ return new NginxLogMessageReader(stream, parseReportFilename,
+ (LinuxAdeExtProperties) adeExtProperties);
+ }
return new LinuxSyslogMessageReader(stream, parseReportFilename,
(LinuxAdeExtProperties) adeExtProperties);
} else {
diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java
index b4e57d4..0b55111 100644
--- a/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java
+++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java
@@ -49,6 +49,7 @@ public class AdeExtConfigProperties {
private static final String MSG_RATE_MERGE_SOURCE = "adeext.msgRateMergeSource";
private static final String STATS_ROOT_DIR = "adeext.statsRootDir";
private static final String USE_SPARK = "adeext.useSparkLogs";
+ private static final String USE_NGINX = "adeext.useNginxLogs";
/* Constants for config property default values */
private static final String DEFAULT_STATS_ROOT_DIR = "output/ade-stats";
@@ -68,6 +69,7 @@ public class AdeExtConfigProperties {
private final boolean m_isMsgRateMergeSource;
private final String m_statsRootDir;
private final boolean m_useSparkLogs;
+ private final boolean m_useNginxLogs;
/**
* Set the AdeExtConfigProperties from the specified property file.
@@ -163,13 +165,21 @@ public AdeExtConfigProperties(String propertyFile) throws AdeException {
m_statsRootDir = DEFAULT_STATS_ROOT_DIR;
}
- /* Type of logs to use. True: Spark logs. Defaults to Linux Syslogs */
+ /* Type of logs to use. Defaults to Linux Syslogs */
- if (m_props.containsKey(USE_SPARK)){
+ if (m_props.containsKey(USE_SPARK) && m_props.getBooleanProperty(USE_SPARK)){
+ System.out.println("USING SPARK");
m_useSparkLogs = m_props.getBooleanProperty(USE_SPARK);
+ m_useNginxLogs = false;
+ }
+ else if (m_props.containsKey(USE_NGINX) && m_props.getBooleanProperty(USE_NGINX)) {
+ System.out.println("USING NGINX");
+ m_useNginxLogs = m_props.getBooleanProperty(USE_NGINX);
+ m_useSparkLogs = false;
}
else{
m_useSparkLogs = false;
+ m_useNginxLogs = false;
}
m_props.verifyAllPropertiesUsed();
@@ -240,6 +250,11 @@ public final Boolean isSparkLog(){
return m_useSparkLogs;
}
+ /** Return if we're using Nginx logs or Linux Syslogs. (true implies Nginx logs) */
+ public final Boolean isNginxLog(){
+ return m_useNginxLogs;
+ }
+
/**
* Returns the root directory where statistics are written.
*
diff --git a/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java
new file mode 100644
index 0000000..b4e338b
--- /dev/null
+++ b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java
@@ -0,0 +1,106 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os.parser;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.openmainframe.ade.exceptions.AdeException;
+
+import java.util.regex.Pattern;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.openmainframe.ade.ext.os.parser.NginxLogParserBase.NGINX_LOG;
+
+public class TestNginxLogLineParser {
+ NginxLogLineParser slp;
+ String longString;
+ @Before
+ public void setup() throws AdeException {
+ slp = Mockito.spy(NginxLogLineParser.class);
+ longString = "(usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername"
+ + "usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername"
+ + "usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername)";
+ }
+
+ @Test
+ public void testWithRealLog() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "93.180.71.3 - - [17/May/2015:08:05:32 +0000] \"GET /downloads/product_1 HTTP/1.1\" 304 0 \"-\" \"Debian APT-HTTP/1.3 (0.8.16~exp12ubuntu10.21)\"";
+ assertEquals(true, slp.parseLine(pattern, 1,2,3,4,5,6,line));
+ }
+
+ @Test
+ public void testParseLineWithMatchingPattern() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\"";
+ assertEquals("Pattern matches for all parameters ",true, slp.parseLine(pattern,1,2,3,4,5,6,line));
+ }
+
+ @Test
+ public void testParseLineWith255CharacterHostname() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\"";
+ assertTrue("Pattern matches but hostname has over 255 chars ", slp.parseLine(pattern, 1, 2, 3, 4, 5, 6, longString + line));
+ }
+
+ @Test
+ public void testParseLineWith255CharacterHostnameSecondTime() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\"";
+ slp.parseLine(pattern,1,2,3,4,5,6,longString + line);
+
+ assertEquals("Hostname over 255 characters but we go through parseLine twice to skip the logging "
+ ,true,slp.parseLine(pattern,1,2,3,4,5,6,longString + line));
+ }
+
+ @Test
+ public void testGettersGetCorrectInfoAfterRunningParseLine() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\"";
+ slp.parseLine(pattern,1,2,3,4,5,6,line);
+
+ assertEquals(null, slp.getMsgTime());
+ assertEquals("address",slp.getRemoteAddress());
+ assertEquals("GET",slp.getRequest());
+ assertEquals("-", slp.getRemoteUser());
+ assertEquals(0, slp.getBytes());
+ assertEquals(0, slp.getStatus());
+ }
+
+ @Test
+ public void testToString() {
+ final Pattern pattern = Pattern.compile(NGINX_LOG);
+ final String line = "nub - nub [17/May/2015:08:05:32 +0000] \"nub\" 0 0 \"-\" \"nub\"";
+ slp.parseLine(pattern,1,2,3,4,5,6,line);
+ assertEquals("Testing to String works correctly "
+ , "timestamp=(null) "
+ + "remote_address=(nub) "
+ + "remote_user=(nub) "
+ + "request=(nub) "
+ + "status=(0) "
+ + "bytes=(0)"
+ ,slp.toString());
+ }
+}
diff --git a/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java
new file mode 100644
index 0000000..43a76b4
--- /dev/null
+++ b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java
@@ -0,0 +1,116 @@
+/*
+
+ Copyright Contributors to the ADE Project.
+
+ SPDX-License-Identifier: GPL-3.0-or-later
+
+ This file is part of Anomaly Detection Engine for Linux Logs (ADE).
+
+ ADE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ ADE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ADE. If not, see .
+
+*/
+package org.openmainframe.ade.ext.os.parser;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.TimeZone;
+import java.util.Calendar;
+import java.util.TimeZone;
+
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormat;
+import org.junit.Before;
+import org.junit.Test;
+import org.openmainframe.ade.Ade;
+import org.openmainframe.ade.exceptions.AdeException;
+import org.openmainframe.ade.ext.os.LinuxAdeExtProperties;
+import org.openmainframe.ade.ext.os.parser.SparklogParserBase;
+import org.openmainframe.ade.ext.os.parser.SparklogParser;
+import org.openmainframe.ade.utils.patches.Version;
+
+public class TestNginxLogParserBase {
+ Ade ade;
+
+ public void setup() throws AdeException{
+ ade = mock(Ade.class, RETURNS_DEEP_STUBS);
+ when(ade.getConfigProperties().database().getDatabaseDriver()).thenReturn("derby");
+ when(ade.getConfigProperties().getOverrideVersionCheck()).thenReturn(true);
+ when(ade.getDbVersion()).thenReturn(new Version(1, 0));
+ Ade.create(ade);
+ }
+
+ @Test
+ public void testSparklogParserBaseConstructor() throws AdeException {
+ TimeZone tz= ade.getAde().getConfigProperties().getInputTimeZone();
+ LinuxAdeExtProperties laep = mock(LinuxAdeExtProperties.class);
+ SparklogParserBase pid = new SparklogParser(laep);
+
+ assertEquals("Making a new constructor. It sets the timezone "
+ ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getInputTimeZone());
+ assertEquals("Making a new constructor. It sets the timezone "
+ ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getOutputTimeZone());
+ }
+
+ @Test
+ public void testSparklogParserBaseConstructorWithNullInput() throws AdeException {
+ TimeZone tz= ade.getAde().getConfigProperties().getInputTimeZone();
+ SparklogParserBase pid = new SparklogParser(null);
+ assertEquals("Making a new constructor wiht null value so LinuxAdeExtProperties is made. It sets the timezone"
+ ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getInputTimeZone());
+ }
+
+ @Test
+ public void testToDate() throws AdeException {
+ LinuxAdeExtProperties laep = mock(LinuxAdeExtProperties.class, RETURNS_DEEP_STUBS);
+ NginxLogParserBase pid = new NginxLogParser(laep);
+
+ when(laep.isYearDefined()).thenReturn(true);
+
+ pid.setAdeExtProperties(laep);
+ DateTime date = DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z").withZoneUTC().parseDateTime("17/May/2015:08:05:32 +0000");
+
+ assertEquals("toDate with good input. Since yearSetter is null the year will be 1 "
+ ,date.toDate(),pid.toDate("","17/May/2015:08:05:32 +0000"));
+ }
+
+
+ @Test
+ public void testRegexPatternsTimeStamp() throws AdeException{
+ setup();
+ String line = "17/06/08 14:37:39 INFO ExecutorRunnable: Starting Executor Container";
+ SparklogParser s = new SparklogParser(null);
+ s.parseLine(line);
+ Calendar c = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+
+ // Time Stamp checks
+ c.setTime(s.getMsgTime());
+ assertEquals(c.get(Calendar.YEAR), 2017);
+ assertEquals(c.get(Calendar.MONTH), 5);
+ assertEquals(c.get(Calendar.DAY_OF_MONTH), 8);
+ assertEquals(c.get(Calendar.HOUR_OF_DAY), 14);
+ assertEquals(c.get(Calendar.MINUTE), 37);
+ assertEquals(c.get(Calendar.SECOND), 39);
+
+ // Tests for source , component and message body
+ assertEquals("info", s.getSource());
+ //assertEquals("ExecutorRunnable", s.getComponent());
+ assertEquals("master", s.getComponent());
+ assertEquals("Starting Executor Container", s.getMessageBody());
+ }
+
+}