diff --git a/.gitignore b/.gitignore index 8deed27..d918228 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,5 @@ ade-ext/derby.log .project .settings -.DS_Store \ No newline at end of file +.DS_Store +.vscode diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..e2b97b7 --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..5ce4586 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f250cbb --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 0000000..2b63946 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz b/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz new file mode 100644 index 0000000..30be2c3 Binary files /dev/null and b/ade-assembly/src/main/baseline/nginx/analyze/nginx_analyze.tar.gz differ diff --git a/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz b/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz new file mode 100644 index 0000000..ec4b4f1 Binary files /dev/null and b/ade-assembly/src/main/baseline/nginx/upload/nginx.tar.gz differ diff --git a/ade-assembly/src/main/conf/setup.props b/ade-assembly/src/main/conf/setup.props index cbc2c55..027aac3 100644 --- a/ade-assembly/src/main/conf/setup.props +++ b/ade-assembly/src/main/conf/setup.props @@ -7,7 +7,8 @@ adeext.parseErrorToKeep=100 adeext.parseErrorDaysTolerate=2 adeext.parseErrorTrackNullComponent=false adeext.runtimeModelDataStoreAtSource=true -adeext.useSparkLogs=true +adeext.useSparkLogs=false +adeext.useNginxLogs=true adeext.msgRate10MinSlotsToKeep=24 adeext.msgRate10MinSubIntervalList=1,2,3,6,12,24 @@ -19,7 +20,9 @@ adeext.msgRateMergeSource=true # are only used when ade.useSparkLogs=true # -------------------------------------------------------------------- -ade.useSparkLogs=true +ade.useNginxLogs=true +ade.flowLayoutFileNginx=conf/xml/FlowLayoutNginx.xml +ade.useSparkLogs=false ade.flowLayoutFile=conf/xml/FlowLayout.xml ade.flowLayoutFileSpark=conf/xml/FlowLayoutSpark.xml ade.outputPath=output/ @@ -28,6 +31,7 @@ ade.xml.xsltDir=conf/xml ade.criticalWords.file=conf/criticalWords.txt ade.analysisGroupToFlowNameMapperClass=org.openmainframe.ade.ext.os.LinuxAnalysisGroupToFlowNameConstantMapper ade.analysisGroupToFlowNameMapperClassSpark=org.openmainframe.ade.ext.os.SparkAnalysisGroupToFlowNameConstantMapper +ade.analysisGroupToFlowNameMapperClassNginx=org.openmainframe.ade.ext.os.NginxAnalysisGroupToFlowNameConstantMapper ade.outputFilenameGenerator=org.openmainframe.ade.ext.output.ExtOutputFilenameGenerator ade.inputTimeZone=GMT+00:00 ade.outputTimeZone=GMT diff --git a/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml new file mode 100644 index 0000000..24c314f --- /dev/null +++ b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xml @@ -0,0 +1,181 @@ + + + + + + ConsecutiveTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + ConsecutiveTimeFramer + + + ContinuousTimeFramer + + + + + + + + oneMinuteTrain + 1 + oneMinuteTrain + + CriticalWordCountReporter + + + ClusteringContextScore + + + + + + + + + + + + + + + + FullBernoulliClusterAwareScore + + + NGINX01 + + + LastSeenLoggingScorerContinuous + + + LastSeenScorer + NGINX11 + + + BestOfTwoScorer + + + NGINX02 + NGINX12 + + + SeverityScore + + + LogNormalScore + + + + + AdeWeightedMessageAnomalyScorerLogNormal + + + + + NGINX01 + NGINX31 + NGINX21 + NGINX41 + + + AdeAnomalyIntervalScorer + NGINX51 + + + org.openmainframe.ade.ext.output.ExtendedAnalyzedIntervalDbStorer + + + org.openmainframe.ade.ext.output.ExtJaxbAnalyzedPeriodV2XmlStorer + + + + + + org.openmainframe.ade.ext.output.ExtAnalyzedIntervalV2FullXmlStorer + + + + + org.openmainframe.ade.ext.output.ExtJaxbAnalyzedIntervalV2XmlStorer + + + + NGINX51 + NGINX52 + + \ No newline at end of file diff --git a/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd new file mode 100644 index 0000000..aac5564 --- /dev/null +++ b/ade-assembly/src/main/conf/xml/FlowLayoutNginx.xsd @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/ade-assembly/src/main/test/nginx_analysis_comp_test.sh b/ade-assembly/src/main/test/nginx_analysis_comp_test.sh new file mode 100644 index 0000000..64291c8 --- /dev/null +++ b/ade-assembly/src/main/test/nginx_analysis_comp_test.sh @@ -0,0 +1,298 @@ +#!/bin/bash +#************************************************************************ +# Copyright Contributors to the ADE Project. +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Anomaly Detection Engine for Linux Logs (ADE). * +# * +# ADE is free software: you can redistribute it and/or modify * +# it under the terms of the GNU General Public License as published by * +# the Free Software Foundation, either version 3 of the License, or * +# (at your option) any later version. * +# * +# ADE is distributed in the hope that it will be useful, * +# but WITHOUT ANY WARRANTY; without even the implied warranty of * +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +# GNU General Public License for more details. * +# * +# You should have received a copy of the GNU General Public License * +# along with ADE. If not, see . * +#************************************************************************ + +#************************************************************************ +# Script: analysis_comp_test.sh +# +# Usage: analysis_comp_test.sh +# +# This script will upload/train/analyze a baseline set of syslog files +# so that the resulting analysis ouput (xml files) can be compared to the +# baseline. The script is intended to be run after any change to the +# analytics code to point any changes from the existing baseline. Changes +# to the local constants below should be made to customize to your +# environment. +# +#******************************************************************************** + +ADE_HOME=`dirname "$0"`/../.. # assumes /bin/test/analysis_comp_test.sh +ADE_HOME=`cd "$ADE_HOME" && pwd` + +#*********************************************** +# local constants +#*********************************************** +BASELINE_DIR="$ADE_HOME/baseline" +BASELINE_UPLOAD_DIR="$BASELINE_DIR/nginx/upload" +BASELINE_ANALYZE_DIR="$BASELINE_DIR/nginx/analyze" +BASELINE_OUTPUT_DIR="$BASELINE_DIR/output" + +BIN_DIR="$ADE_HOME/bin" + +ANALYSIS_COMPARE_LOG="/tmp/nginx_compare_`date "+%Y%m%d%H%M%S"`.out" + +#*********************************************** +# analysis group constants +#*********************************************** +AG_PREFIX="regression_ag_" +AG_NAME="$AG_PREFIX`date "+%Y%m%d%H%M%S"`" +AG_JSON_DEF_FILENAME="/tmp/reg_ag.json" +AG_JSON_DEF="{ \ + \"groups\":{\"modelgroups\":[{\"name\" : \"$AG_NAME\", \"dataType\": \"syslog\", \"evaluationOrder\" : 1, \"ruleName\" : \"default\"}]}, \ + \"rules\":[{\"name\" : \"default\", \"description\" : \"regression test rule to match all systems\", \"membershipRule\" : \"*\" }] \ +}" + +#*********************************************** +# Constants pointing to properties in setup +# file (conf/setup.props) +#*********************************************** +DB_URL_PROP="ade.databaseUrl" +OUTDIR_PROP="ade.outputPath" +ANALYSIS_OUTDIR_PROP="ade.analysisOutputPath" + +#*********************************************** +# sub-routines +#*********************************************** +issue_command() { + cmd=$@ + + echo "**********************************" + echo "CMD = $cmd" + + eval "$cmd >/tmp/cmdout 2>&1" + rc=$? + + COMMAND_OUT=$(cat /tmp/cmdout) + echo "RC: $rc" + echo "$COMMAND_OUT" + echo "**********************************" + rm /tmp/cmdout + + return $rc +} + +get_current_prop_val() { + prop_name=$1 + + if [ -z $prop_name ]; then + echo "get_current_prop_val: no property name given" + return 1 + fi + + prop_val=`grep "$prop_name=" $ADE_SETUP_FILE | cut -d \= -f 2` + + if [ -z prop_val ]; then + echo "get_current_prop_val: unable to get property value" + return 2 + fi + + echo "$prop_val" +} + +update_setup_file() { + # if backup not created yet do it + if [ ! -f $ADE_SETUP_FILE.bak ]; then + cp $ADE_SETUP_FILE $ADE_SETUP_FILE.bak + fi + + prop_name=$1 + prop_val=$2 + + if [ -z $prop_name ]; then + echo "update_setup_file: no property name given." + return 1 + elif [ -z $prop_val ]; then + echo "update_setup_file: no property value." + return 1 + fi + + grep "$prop_name=" $ADE_SETUP_FILE + if [ $? -ne 0 ]; then + echo "update_setup_file: property $prop_name not found in $ADE_SETUP_FILE" + return 1 + fi + + echo "update_setup_file: changing $prop_name to $prop_val" + tmpfilename="/tmp/`basename $ADE_SETUP_FILE`" + CMD=`sed "s~\(${prop_name}=\).*\$~\1\${prop_val}~" $ADE_SETUP_FILE > $tmpfilename` + mv -f $tmpfilename $ADE_SETUP_FILE + + return 0 +} + +# The database name and output directory are defined in +# the setup file (conf/setup.props). In order to prevent +# contaminating the current database and output directory +# while running the test this method will change the +# values in the setup file. +change_dbname_and_output_dir() { + curr_db_val=$( get_current_prop_val $DB_URL_PROP ) + if [ -z curr_db_val ]; then + echo "Failed retrieving current database name value" + return 1 + fi + echo "current database value: $curr_db_val" + + + curr_outdir_val=$( get_current_prop_val $OUTDIR_PROP ) + if [ -z curr_outdir_val ]; then + echo "Failed retrieving current output directory value" + return 1 + fi + echo "current output directory value: $curr_outdir_val" + + # in setup.props change ade.databaseUrl to temp value + test_db_name="${curr_db_val}_regtest`date "+%Y%m%d%H%M%S"`" + update_setup_file "$DB_URL_PROP" $test_db_name + if [ $? -ne 0 ]; then + return 1 + fi + + # in setup.props change ade.outputPath value to temp value + test_outdir_name="${curr_outdir_val}regtest`date "+%Y%m%d%H%M%S"`" + update_setup_file "$OUTDIR_PROP" $test_outdir_name + if [ $? -ne 0 ]; then + return 1 + fi + + # in setup.props change ade.analysisOutputPath to temp value + test_analysis_outdir_name="$test_outdir_name/continuous" + update_setup_file "$ANALYSIS_OUTDIR_PROP" $test_analysis_outdir_name + if [ $? -ne 0 ]; then + return 1 + fi + + return 0 +} + +check_test_env() { + if [ ! -d "$BASELINE_DIR" ]; then + echo "ERROR: Unable to locate baseline directory: $BASELINE_DIR" + exit 1 + fi + + if [ ! -d "$BASELINE_UPLOAD_DIR" ]; then + echo "ERROR: Unable to locate baseline test nginx upload directory: $BASELINE_UPLOAD_DIR" + exit 1 + fi + + if [ ! -d "$BASELINE_ANALYZE_DIR" ]; then + echo "ERROR: Unable to locate baseline test nginx analyze directory: $BASELINE_ANALYZE_DIR" + exit 1 + fi + + if [ ! -d "$BASELINE_OUTPUT_DIR" ]; then + echo "ERROR: Unable to locate baseline output directory: $BASELINE_OUTPUT_DIR" + exit 1 + fi + + if [ ! -d "$BIN_DIR" ]; then + echo "ERROR: Unable to find location of bin directory: $BIN_DIR" + exit 1 + fi + + return 0 +} + +cleanup_and_exit() { + exit_code=$1 + + if [ -z $exit_code ]; then + exit_code=0 + fi + + echo "Performing test cleanup..." + + # reset db name and output directory (restore conf/setup.props) + if [ -f $ADE_SETUP_FILE.bak ]; then + mv $ADE_SETUP_FILE $ADE_SETUP_FILE.regtest + mv $ADE_SETUP_FILE.bak $ADE_SETUP_FILE + fi + + echo "Completed with RC=$exit_code" + exit $exit_code +} + +################### +# main +################### +# change to ADE_HOME dir because setup.props contains relative paths +eval "cd $ADE_HOME" + +. bin/env.sh + +check_test_env + +# decompress any compressed baseline files +find $BASELINE_OUTPUT_DIR -maxdepth 1 -type f -name "*.tgz" -exec tar -zxf {} --directory=$BASELINE_OUTPUT_DIR \; + +# Change the database name in setup.props to prevent contaminating anything in current database. +change_dbname_and_output_dir +if [ $? -ne 0 ]; then + echo "Failed to perform temporary change to db name and output directory. Exiting" + cleanup_and_exit 1 +fi + +# Create the database specified in setup.props +issue_command "$BIN_DIR/controldb create" +if [ $? -ne 0 ]; then + echo "ERROR: Failed to create database" + cleanup_and_exit 1 +fi + +## create test group for analysis +echo $AG_JSON_DEF > $AG_JSON_DEF_FILENAME # copy group definition into a file +issue_command "$BIN_DIR/updategroups -j $AG_JSON_DEF_FILENAME" +if [ $? -ne 0 ]; then + echo "ERROR: Failed to define analysis group. Exiting" + cleanup_and_exit 1 +fi + +## upload logfile for system +issue_command "$BIN_DIR/upload -d $BASELINE_UPLOAD_DIR" +if [ $? -ne 0 ]; then + echo "ERROR: Failed to upload data from $BASELINE_UPLOAD_DIR. Exiting" + cleanup_and_exit 1 +fi + +## train analysis group +issue_command "$BIN_DIR/train $AG_NAME" +if [ $? -ne 0 ]; then + echo "ERROR: Failed to train analysis group $AG_NAME. Exiting" + cleanup_and_exit 1 +fi + +## analyze logfile +issue_command "$BIN_DIR/analyze -f $BASELINE_ANALYZE_DIR/nginx_analyze.tar.gz" +if [ $? -ne 0 ]; then + echo "ERROR: Failed to analyze data from $BASELINE_ANALYZE_DIR. Exiting" + cleanup_and_exit 1 +fi + +echo +echo "Performing compare of baseline to new analysis results..." + +$ADE_JAVA -cp $ADE_CLASSPATH -Dade.setUpFilePath=$ADE_SETUP_FILE org.openmainframe.ade.ext.regression.AdeAnalysisOutputCompare -b "$BASELINE_OUTPUT_DIR" >$ANALYSIS_COMPARE_LOG +rc=$? + +echo "Analysis comparison output written to $ANALYSIS_COMPARE_LOG" + +cleanup_and_exit $rc + diff --git a/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java b/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java index 306c3e3..3a9cfd6 100644 --- a/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java +++ b/ade-core/src/main/java/org/openmainframe/ade/IAdeConfigProperties.java @@ -55,9 +55,14 @@ public interface IAdeConfigProperties { * Speciifies if running ADE on Spark logs. * @return boolean : True if running on Spark logs. */ - Boolean getUseSparkLogs(); + /** + * Speciifies if running ADE on Nginx logs. + * @return boolean : True if running on Nginx logs. + */ + Boolean getUseNginxLogs(); + /** * @return the mode of the period, which is an enum * describing the duration (and alignment) of the period diff --git a/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java b/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java index 7c00709..4ae47c1 100644 --- a/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java +++ b/ade-core/src/main/java/org/openmainframe/ade/impl/AdeConfigPropertiesImpl.java @@ -96,6 +96,9 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties { @Property(key = ADE_PREFIX + "useSparkLogs", help = "Type of logs to run ade on") private boolean m_useSparkLogs; + @Property(key = ADE_PREFIX + "useNginxLogs", help = "Type of logs to run ade on") + private boolean m_useNginxLogs; + @Property(key = ADE_PREFIX + "flowLayoutFile", help = "Path to Flow Layout file") private String m_flowLayoutFile; @@ -103,6 +106,10 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties { help = "Path to Flow Layout file for spark (matters only when ade.useSparkLogs=true)") private String m_flowLayoutFileSpark; + @Property(key = ADE_PREFIX + "flowLayoutFileNginx", + help = "Path to Flow Layout file for nginx (matters only when ade.useNginxLogs=true)") + private String m_flowLayoutFileNginx; + @Property(key = ADE_PREFIX + "userRulesFile", required = false, help = "Optional path to User Rules file") private String m_userRulesFile = null; @@ -204,6 +211,12 @@ public class AdeConfigPropertiesImpl implements IAdeConfigProperties { private Class m_analysisGroupToFlowNameMapperSpark = AnalysisGroupToFlowNameUnityMapper.class; + @Property(key = ADE_PREFIX + "analysisGroupToFlowNameMapperClassNginx", required = false, + factory = FlowMapperClassFactory.class, help = "Optional class for mapping analysis groups to flow names.(Nginx)" + + "Must extend AnalysisGroupToFlowNameMapper. Used only when ade.useSparkLogs=true") + private Class m_analysisGroupToFlowNameMapperNginx + = AnalysisGroupToFlowNameUnityMapper.class; + @Property(key = ADE_OVERRIDE_VERSION_CHECK, required = false, help = "Allow Ade to run with a database version different from the JAR version") private boolean m_overrideVersionCheck = false; @@ -273,6 +286,9 @@ private void validateProps() throws AdeUsageException { if (m_useSparkLogs){ FileUtils.assertExists(new File(m_criticalWordsFile), new File(m_flowLayoutFileSpark)); } + if (m_useNginxLogs){ + FileUtils.assertExists(new File(m_criticalWordsFile), new File(m_flowLayoutFileNginx)); + } } catch (FileNotFoundException e) { throw new AdeUsageException("File specified in setup properties not found!", e); @@ -308,6 +324,9 @@ public final String getFlowLayoutFile() { if (m_useSparkLogs){ return m_flowLayoutFileSpark; } + if (m_useNginxLogs){ + return m_flowLayoutFileNginx; + } return m_flowLayoutFile; } @@ -316,6 +335,11 @@ public final Boolean getUseSparkLogs() { return m_useSparkLogs; } + @Override + public final Boolean getUseNginxLogs() { + return m_useNginxLogs; + } + @Override public final TimeZone getOutputTimeZone() { return m_outputTimeZone; @@ -386,6 +410,9 @@ public final Class getAnalysisGroupToFl if (m_useSparkLogs){ return m_analysisGroupToFlowNameMapperSpark; } + if (m_useNginxLogs){ + return m_analysisGroupToFlowNameMapperNginx; + } return m_analysisGroupToFlowNameMapper; } diff --git a/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java b/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java index ea81601..5544d8d 100644 --- a/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java +++ b/ade-core/src/main/java/org/openmainframe/ade/impl/flow/factory/FlowFactory.java @@ -81,7 +81,10 @@ public FlowFactory() throws AdeException { if (Ade.getAde().getConfigProperties().getUseSparkLogs()){ FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayoutSpark.xsd"; } - else{ + else if (Ade.getAde().getConfigProperties().getUseNginxLogs()) { + FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayoutNginx.xsd"; + } + else { FLOW_LAYOUT_XSD_File_Name = File.separator + "FlowLayout.xsd"; } diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java index b617d0b..a09c3f7 100644 --- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/AdeMaskLog.java @@ -54,9 +54,11 @@ This file is part of Anomaly Detection Engine for Linux Logs (ADE). import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserFreeForm; import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserWithCompAndPid; import org.openmainframe.ade.ext.os.parser.LinuxSyslog3164ParserWithMark; +import org.openmainframe.ade.ext.os.parser.NginxLogParser; import org.openmainframe.ade.ext.os.parser.SparklogParser; import org.openmainframe.ade.ext.os.parser.LinuxSyslog5424ParserBase; import org.openmainframe.ade.ext.os.parser.LinuxSyslogLineParser; +import org.openmainframe.ade.ext.os.parser.NginxLogLineParser; import org.openmainframe.ade.ext.os.parser.SparklogLineParser; import org.openmainframe.ade.ext.service.AdeExtMessageHandler; import org.openmainframe.ade.ext.os.AdeExtPropertiesFactory; @@ -79,6 +81,8 @@ public class AdeMaskLog extends ExtControlProgram { private static SparklogLineParser[] mSparkLineParsers; + private static NginxLogLineParser[] mNginxLineParsers; + private static Pattern validIPV4Pattern; private static Pattern validIPV6Pattern; private static Pattern validEmailPattern; @@ -235,6 +239,12 @@ private static boolean isSpark() throws AdeException{ return AdeExt.getAdeExt().getConfigProperties().isSparkLog(); } + /** + * Check if we're using Nginx logs + */ + private static boolean isNginx() throws AdeException{ + return AdeExt.getAdeExt().getConfigProperties().isNginxLog(); + } /** * Read and write file specified by input and output file name mask system @@ -340,6 +350,12 @@ private static void createParsers() throws AdeInternalException { }; SparklogParser.setAdeExtProperties((LinuxAdeExtProperties) linuxProperties); } + else if (isNginx()) { + mNginxLineParsers = new NginxLogLineParser[] { + new NginxLogParser(), + }; + NginxLogParser.setAdeExtProperties((LinuxAdeExtProperties) linuxProperties); + } else{ mLineParsers = new LinuxSyslogLineParser[] { new LinuxSyslog5424ParserBase(), @@ -382,6 +398,17 @@ private String generateMaskedLine(String currentLine) throws AdeException{ return outline; } + if (isNginx()){ + for (NginxLogLineParser lineParser : mNginxLineParsers) { + gotLine = lineParser.parseLine(currentLine); + if (gotLine) { + String oldSystemName = lineParser.getRemoteAddress(); + String oldText = lineParser.getRequest(); + return createNewLine(currentLine, oldSystemName, oldText); + } + } + } + // Linux Syslogs for (LinuxSyslogLineParser lineParser : mLineParsers) { gotLine = lineParser.parseLine(currentLine); diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java index 0ae25f8..c76ae19 100644 --- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerExt.java @@ -101,7 +101,7 @@ public final void incomingStreamFromFile(File file) throws AdeException { a_adeInputStream = new AdeInputStreamExt(is, props, m_adeExtProperties, parseReportFilename); - /* Indicate this is a new file, this will allow an interval broken into + /* Indicate this is a new file, this will allow an interval broken into * to log files. */ incomingSeparator(new FileSeperator(file.getName())); @@ -112,7 +112,7 @@ public final void incomingStreamFromFile(File file) throws AdeException { /** * Get the parse report filename based on the logFileName. * - * @param logfileName + * @param name * @throws AdeException */ protected final String getParseReportFilename(String name) throws AdeException { diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java index a827644..fb7467a 100644 --- a/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/main/helper/AdeInputStreamHandlerLinux.java @@ -31,6 +31,8 @@ This file is part of Anomaly Detection Engine for Linux Logs (ADE). import org.openmainframe.ade.ext.os.AdeExtProperties; import org.openmainframe.ade.ext.os.parser.LinuxSyslogLineParser; import org.openmainframe.ade.ext.os.parser.LinuxSyslogMessageReader; +import org.openmainframe.ade.ext.os.parser.NginxLogLineParser; +import org.openmainframe.ade.ext.os.parser.NginxLogMessageReader; import org.openmainframe.ade.ext.os.parser.SparklogLineParser; import org.openmainframe.ade.ext.os.parser.SparklogMessageReader; import org.openmainframe.ade.ext.stats.MessageRateStats; @@ -48,6 +50,9 @@ public AdeInputStreamHandlerLinux(AdeExtProperties adeExtProperties) throws AdeE public static boolean isSpark() throws AdeException{ return AdeExt.getAdeExt().getConfigProperties().isSparkLog(); } + public static boolean isNginx() throws AdeException{ + return AdeExt.getAdeExt().getConfigProperties().isNginxLog(); + } /** * Handling a stream @@ -79,6 +84,10 @@ protected final void beforeSendMessage(IMessageInstance mi) throws AdeException // Keep statistics for this MI msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), sparkReader.isWrapperMessage()); } + else if (isNginx()) { + final NginxLogMessageReader nginxReader = (NginxLogMessageReader) a_adeInputStream.getReader(); + msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), nginxReader.isWrapperMessage()); + } else{ final LinuxSyslogMessageReader linuxReader = (LinuxSyslogMessageReader) a_adeInputStream.getReader(); msgRateStats.addMessage(mi.getMessageId(), mi.getDateTime().getTime(), linuxReader.isWrapperMessage()); @@ -95,7 +104,7 @@ protected final void beforeSendMessage(IMessageInstance mi) throws AdeException * @throws AdeFlowException */ private void handleLoggerUnavailable(IMessageInstance mi) throws AdeFlowException, AdeException { - if (!isSpark()){ + if (!isSpark() && !isNginx()){ if (LinuxSyslogLineParser.isSyslogNgRestarted(mi)) { /* Indicate the SysLogNg has restarted. */ incomingSeparator(new FileSeperator(mi.getSourceId(), "syslog-ng starting")); @@ -103,5 +112,4 @@ private void handleLoggerUnavailable(IMessageInstance mi) throws AdeFlowExceptio } } } - } diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java new file mode 100644 index 0000000..ea313ac --- /dev/null +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/NginxAnalysisGroupToFlowNameConstantMapper.java @@ -0,0 +1,37 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os; + +import org.openmainframe.ade.flow.AnalysisGroupToFlowNameConstantMapper; + +/** + * This class provide the mapping between AnalysisGroup (defined in the Reader, such as LinuxReader) + * and the FlowName defined in the FlowLayout.xml file. + */ +public class NginxAnalysisGroupToFlowNameConstantMapper extends AnalysisGroupToFlowNameConstantMapper { + + public NginxAnalysisGroupToFlowNameConstantMapper() { + super("NGINX"); + } + +} diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java new file mode 100644 index 0000000..ab109e9 --- /dev/null +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogLineParser.java @@ -0,0 +1,283 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os.parser; + +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.openmainframe.ade.actions.IParsingQualityReporter; +import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.joda.time.DateTime; + +/** + * An abstract class for extracting data from a Nginx log message. + * Subclasses are expected to implement the parseLine() method to parse + * a line and set the instance variable values as appropriate. A typical + * subclass will call the parseLine method with a regex pattern and capturing + * groups for each of the instance variables it wants to extract. + * The features we consider for Nginx logs are: + * 1. m_timestamp : Timestamp on the message + * 2. m_remoteAddress : Remote Address of the message + * 3. m_remoteUser : Remote User of the message + * 4. m_request : The request field on the message line + * 5. m_status: The status of the request + * 6. m_bytes: The number of bytes sent + */ +public abstract class NginxLogLineParser { + + /** + * Default logger for this class. + */ + private static final Logger logger = LoggerFactory.getLogger(NginxLogLineParser.class); + + /** + * UTF8_BOM regex. + */ + protected static final String UTF8_BOM = "\\xEF\\xBB\\xBF"; + + /** + * The optional BOM and PRI, this will be used in pattern searching. + */ + protected static final String BOM_AND_PRI = "(?:" + UTF8_BOM + ")?" + "(?:<\\p{Digit}{1,2}>)?"; + + /** + * Component of the message + */ + protected String m_component; + + /** + * Whether the hostname truncation has already been logged. + */ + private boolean isHostnameTruncationLogged = false; + + /** + * The LinuxAdeExtProperties that contains configurations and properties from + * the start of AdeExt main class. + */ + protected LinuxAdeExtProperties m_LinuxAdeExtProperties; + + /** + * An object used for monitoring parsing quality, or null if none. + */ + protected IParsingQualityReporter m_parsingQualityReport = null; + + /** + * Time of the message. + */ + protected Date m_timestamp; + + /** + * The remote address of the request. + */ + protected String m_remoteAddress; + + /** + * The remote user of the request. + */ + protected String m_remoteUser; + + /** + * The message request. + */ + protected String m_request; + + /** + * Status code of the request. + */ + protected int m_status; + + /** + * Number of bytes of the request. + */ + protected int m_bytes; + + /** + * Parses a line and sets the instance variables from it. + * + * @param line The line to parse. + * @return false if the line could not be parsed. + */ + public abstract boolean parseLine(String line); + + /** + * Returns the property object containing configurations from the start of + * AdeExt main class. + * + * @return The AdeExtProperties object. + */ + public final LinuxAdeExtProperties getLinuxAdeExtProperties() { + return m_LinuxAdeExtProperties; + } + + /** + * Converts a date from String format to a Date object. + * + * @param source the source name. + * @param dateTimeString the date and time string value. + * @return Date object with date/time-stamp of the Linux log. + */ + public abstract Date toDate(String source, String dateTimeString); + + /** + * Returns the DateTimeZone determined from the toDate(String source, String dateTimeString) + * method. + * + * @return The date object with date/time-stamp of the Linux log. + */ + public abstract DateTime getLastDeterminedDateTime(); + + /** + * Parses a line based on a regex Pattern. For each capturing group + * number that is non-zero, the corresponding instance variable + * is set. (Assigns m_component = master, remove this once we have newer logs) + * NOTE: There is no pid present in nginx logs. + * + * @param pattern - The pattern to parse. + * @param timestamp - Capturing group number https://quest.squadcast.tech/api/RA1911003010323/emailsfor the timestamp. + * @param remoteAddress - Capturing group number for the remote address. + * @param remoteUser - Capturing group number for the remote user. + * @param request - Capturing group for the request. + * @param status - Capturing group for the status code. + * @param bytes - Capturing group for the number of bytes sent. + * @return false if the line could not be parsed. + */ + protected final boolean parseLine(Pattern pattern, int remoteAddress, + int remoteUser, int timestamp, int request, int status, int bytes, String line) { + final Matcher matcher = pattern.matcher(line); + if (matcher.matches()) { + try { + String msgTimeString = toString(matcher, timestamp); + m_timestamp = toDate("m_remoteAddress", msgTimeString); + m_remoteAddress = toString(matcher, remoteAddress); + m_remoteUser = toString(matcher, remoteUser); + m_request = toString(matcher, request); + m_status = Integer.parseInt(toString(matcher, status)); + m_bytes = Integer.parseInt(toString(matcher, bytes)); + // m_component = toString(matcher, comp); + m_component = "master"; + return true; + } catch (IllegalArgumentException e) { + e.printStackTrace(); + return false; + } + } + return false; + } + + /** + * Captures the group passed in by matching against a pattern. + * + * @param m Matcher to compare against a pattern. + * @param group The capturing group value. + * @return empty string if the capturing group is 0 otherwise the pattern + * captured by the passed in group. + */ + private String toString(Matcher m, int group) { + return (group == 0) ? "" : m.group(group); + } + + /** + * Returns the message time-stamp + * + * @return the time-stamp. + */ + public final Date getMsgTime() { + return m_timestamp; + } + + /** + * Returns the component of the message. + * + * @return the component + */ + public final String getComponent() { + return m_component; + } + + /** + * Returns the remote address. + * + * @return the remote address string value. + */ + public final String getRemoteAddress() { + return m_remoteAddress; + } + + /** + * Returns the remote user. + * + * @return the remote user string value. + */ + public final String getRemoteUser() { + return m_remoteUser; + } + + /** + * Returns the status code. + * + * @return the status code int value. + */ + public final int getStatus() { + return m_status; + } + + /** + * Returns the number of bytes. + * + * @return the number of bytes. + */ + public final int getBytes() { + return m_bytes; + } + + /** + * Returns the request text. + * + * @return the request text string value. + */ + public final String getRequest() { + return m_request; + } + + /** + * Sets the parsingQualityReport for monitoring parsing quality of Linux logs. + * + * @param parsingQualityReport the ParsingQualityReporter object to be used. + */ + public final void setParseQualityReport(IParsingQualityReporter parsingQualityReport) { + m_parsingQualityReport = parsingQualityReport; + } + + /** + * The overridden toString method for this class. Prints out the captured groups + * from the message. + */ + @Override + public String toString() { + return String.format("timestamp=(%s) remote_address=(%s) remote_user=(%s) request=(%s) status=(%s) bytes=(%s)", + m_timestamp, m_remoteAddress, m_remoteUser, m_request, m_status, m_bytes); + } +} diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java new file mode 100644 index 0000000..d20d633 --- /dev/null +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogMessageReader.java @@ -0,0 +1,627 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os.parser; + +import java.io.File; +import java.io.IOException; +import java.text.Format; +import java.text.SimpleDateFormat; +import java.util.Collection; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; + +import org.openmainframe.ade.Ade; +import org.openmainframe.ade.AdeInputStream; +import org.openmainframe.ade.AdeMessageReader; +import org.openmainframe.ade.actions.IParsingQualityReporter; +import org.openmainframe.ade.data.IDataFactory; +import org.openmainframe.ade.data.DataType; +import org.openmainframe.ade.data.IMessageInstance; +import org.openmainframe.ade.data.ISource; +import org.openmainframe.ade.dataStore.IDataStoreSources; +import org.openmainframe.ade.exceptions.AdeException; +import org.openmainframe.ade.exceptions.AdeInternalException; +import org.openmainframe.ade.exceptions.AdeParsingException; +import org.openmainframe.ade.exceptions.AdeUsageException; +import org.openmainframe.ade.ext.AdeExt; +import org.openmainframe.ade.ext.data.GroupsQueryImpl; +import org.openmainframe.ade.ext.data.ManagedSystemInfo; +import org.openmainframe.ade.ext.main.helper.AdeExtRequestType; +import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; +import org.openmainframe.ade.ext.stats.MessageRateStats; +import org.openmainframe.ade.ext.stats.MessagesWithParseErrorStats; +import org.openmainframe.ade.ext.stats.MessagesWithUnexpectedSource; +import org.openmainframe.ade.ext.utils.ExtFileUtils; +import org.openmainframe.ade.impl.data.TextClusteringComponentModel; +import org.openmainframe.ade.impl.data.TextClusteringModel; +import org.openmainframe.ade.impl.data.IThresholdSetter; +import org.openmainframe.ade.utils.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.joda.time.DateTime; + +import static org.openmainframe.ade.ext.os.parser.ReaderLoggerMessages.*; + +/** + * The reader for Nginx Logs. + * Note: ParseQualityReport infrastructure is defined in this class. But, it's not being + * used to output any parse error messages. Parse Error messages are replaced by the + * MessagesWithParseErrorStats class. + */ +public class NginxLogMessageReader extends AdeMessageReader { + + /** + * The default UNASSIGNED analysis group. Note: This is NOT the internal id of + * the UNASSIGNED analysis group. + */ + public static final int UNASSIGNED_ANALYSIS_GROUP_ID = -1; + /** + * Default value for when a component doesn't exist in the message. + */ + public static final String LINUX_LINE_NO_COMPONENT_NAME = "(NO_COMPONENT)"; + /** + * The ASCII controlled characters, 0x00-0x1F and 0x7F. + */ + public static final String ASCII_CONTROLLED_CHARACTERS = "\\p{Cntrl}"; + + /** + * Pattern of IO Exception messages that indicates the reading from the input stream + * should be terminated gracefully. + * Example of syntax: + * "(Connection reset by peer|.*connection terminated.*)" + */ + final static String IOEXCEPTION_TERMINATE_GRACEFULLY_STRING = "(.*Connection reset by peer.*|.*Connection timed out.*)".toUpperCase(); + + /** + * The default value for when a GMT offset is invalid. + */ + public static final long GMT_OFFSET_INVALID = 362340; + + /** + * The threshold percentage to determine if the log data was successfully parsed. + */ + private static final double goodPercentThreshold = .05; + /** + * The default logger for this class. + */ + private static final Logger logger = LoggerFactory.getLogger(NginxLogMessageReader.class); + /** + * Object to create and keep track of textual clusters. + */ + private TextClusteringComponentModel m_textClusteringComponentModel; + /** + * Keep track of message instances that are waiting to be read. + */ + private IMessageInstance m_messageInstanceWaiting = null; + /** + * The previous message instance to be read. + */ + private IMessageInstance m_prevMessageInstance = null; + /** + * DataFactory to create message instances. + */ + private IDataFactory m_dataFactory; + /** + * For preprocessing Linux messages. + */ + private LinuxMessageTextPreprocessor m_messageTextPreprocessor; + + /** + * Number of lines that have parsing errors. + */ + private int m_errorLineCount = 0; + /** + * Number of lines that do not have a source. + */ + private int m_unexpectedSourceLineCount = 0; + + /** + * Number of lines where the component name is missing. + */ + private int m_componentMissingLineCount = 0; + + /** + * The starting time of the parser. + */ + private long m_parserStartTime; + /** + * The starting date of the parser. + */ + private Date m_parserStartDate = new Date(); + + /** + * Whether the message returned from readMessageInstance() is the 2nd message + * generated from a wrapper message. + */ + private boolean m_isWrapperMessage = false; + /** + * The number of wrapper messages. + */ + private long m_wrapperMessageCount = 0; + /** + * The number of non-wrapper messages. + */ + private long m_nonWrapperMessageCount = 0; + + /** + * Number of suppressed messages remaining. + */ + private int m_suppressedMessagesRemaining = 0; + /** + * Number of non-wrapper messages count. + */ + private long m_suppressedNonWrapperMessageCount = 0; + + /** + * Name of the last newly seen source. + */ + private String m_lastNewlySeenSourceId = null; + + /** + * The parser for a line of Nginx Logs. + */ + private NginxLogLineParser[] m_lineParsers; + + /** + * Hashmap mapping SysId to Source ID. + */ + private Map sourceToSourceIdMap = new HashMap(); + + /** + * The Linux specific properties to be used containing configurations from start of AdeExt main class. + */ + private LinuxAdeExtProperties m_adeExtProperties; + + /** + * Holds system specific information. + */ + private ManagedSystemInfo m_info = null; + + /** + * An object used for monitoring parsing quality, or null if none. + */ + private IParsingQualityReporter m_parsingQualityReport = null; + + /** + * Constructs a reader for a given input stream and initializes member variables. + * @param stream Input stream for parsing. + * @param parseReportFilename the name of the parse report. + * @param adeExtProperties Configuration flags used to specify time zone and whether to use debug parser codes. + * @throws AdeInternalException + */ + public NginxLogMessageReader(AdeInputStream stream, String parseReportFilename, + LinuxAdeExtProperties adeExtProperties) throws AdeException { + super(stream); + m_dataFactory = Ade.getAde().getDataFactory(); + + m_textClusteringComponentModel = Ade.getAde().getActionsFactory().getTextClusteringModel(true); + m_messageTextPreprocessor = new LinuxMessageTextPreprocessor(); + m_textClusteringComponentModel.setMessageTextPreprocessor(m_messageTextPreprocessor); + + initializeOtherInformation(adeExtProperties, parseReportFilename); + } + + /** + * Main logic for this class. Reads the message and stores the information extracted from it in a + * MessageInstance object. First it checks if there is a wrapper or suppressed message if so, + * this message will be returned. Then we parse the current line. If the current line is null, then + * we are done reading the input stream. If it is not null, we check and see if it is a suppressed message. + * If so, we collect message information and return the previous message instance. If not, we loop + * through all possible line parsers and find one that can capture the current line. If one is found + * we generate a message id and process the source id. Then we return the message instance. If one is + * not found then we log this as an error. + * @return MessageInstance object that stores all the necessary information of a message. + */ + @Override + public final IMessageInstance readMessageInstance() throws IOException, AdeException { + String currentLine; + boolean gotLine = false; + boolean unexpectedSource = false; + if (m_messageInstanceWaiting != null) { + return getMessageInstanceWaiting(); + } + if (m_suppressedMessagesRemaining > 0) { + updateSuppressedMessageStats(); + return m_prevMessageInstance; + } + while (!gotLine) { + currentLine = getCurrentLine(); + if (currentLine != null) { + currentLine = currentLine.replaceAll(ASCII_CONTROLLED_CHARACTERS, ""); + } + if (currentLine == null){ + handleEndOfStream(); + return null; + } + for (NginxLogLineParser lineParser : m_lineParsers) { + gotLine = lineParser.parseLine(currentLine); + if (gotLine) { + String msgId = getMessageId(lineParser); + DateTime dateTime = handleDateTime(lineParser); + final String sourceId = getAndProcessSourceId(lineParser.getRemoteAddress()); + if (sourceId == null) { + System.out.println("source if is null"); + System.exit(0); + gotLine = false; + unexpectedSource = true; + MessagesWithUnexpectedSource.addMessage(lineParser.getRemoteAddress(), + lineParser.m_timestamp.getTime(), currentLine); + break; + } + m_isWrapperMessage = false; + m_nonWrapperMessageCount++; + InputTimeZoneManager.updateTimezone(sourceId, dateTime); + m_prevMessageInstance = m_dataFactory.newMessageInstance( + sourceId, + lineParser.getMsgTime(), + msgId, + lineParser.getRequest(), + lineParser.getRemoteAddress(), + IMessageInstance.Severity.UNKNOWN); // Severity = null for Nginx + /* Setting the messageInstanceWaiting to null, which would stop wrappers such as SUDO or CRON + to be passed to ade. */ + m_messageInstanceWaiting = null; + return m_prevMessageInstance; + } + } + if (!gotLine) { + if (unexpectedSource) { + m_unexpectedSourceLineCount++; + unexpectedSource = false; + } else if (currentLine.length() != 0) { + final MessagesWithParseErrorStats stats = MessagesWithParseErrorStats.getParserErrorStats(); + stats.addMessage(currentLine); + m_errorLineCount++; + } + } + } + return null; + } + + /** + * Returns the waiting message instance and records message diagnostics. + * @return the message instance waiting to be read. + */ + private IMessageInstance getMessageInstanceWaiting(){ + final IMessageInstance tmp = m_messageInstanceWaiting; + m_messageInstanceWaiting = null; + m_isWrapperMessage = true; + m_wrapperMessageCount++; + return tmp; + } + /** + * Returns the suppressed message and records message diagnostics. If the suppressed message + * is a wrapper, the wrapper message also need to be outputted. + * @return the suppressed message instance. + * @throws AdeException + * @throws AdeInternalException + */ + private void updateSuppressedMessageStats() throws AdeException{ + m_suppressedNonWrapperMessageCount++; + m_suppressedMessagesRemaining--; + m_messageInstanceWaiting = m_messageTextPreprocessor.getExtraMessage(m_prevMessageInstance); + } + + /** + * Retrieves the current line using this reader. + * @return the current line that was parsed. If null, then we have reached the end of the input stream. + * @throws AdeException + */ + private String getCurrentLine() throws AdeException{ + String currentLine; + try { + currentLine = this.readLine(); + } catch (IOException e) { + final String exceptionMsg = e.getMessage().toUpperCase(); + if (exceptionMsg.matches(IOEXCEPTION_TERMINATE_GRACEFULLY_STRING)) { + logger.warn("Normal IOException: " + NginxLogMessageReader.class.getSimpleName() + + " will be terminated gracefully.", e); + currentLine = null; + } else { + throw new AdeParsingException("Failed reading from log file", e); + } + } + return currentLine; + } + + /** + * Updates the last determined date time and returns the updated value. + * Note: dateTime is null when it is a 3164 message. so if the GMT offset is not defined in + * the -g option or if it is a 3164 message, it will still be set as the default + * invalid GMT offset value. If dateTime is not null then we want to update the GMT offset + * unless it was already previously defined by the -g option. In that case, + * we will just use that value. + * @param lineParser the parser being used to parse the line. + * @return the updated date time. + */ + private DateTime handleDateTime(NginxLogLineParser lineParser){ + final DateTime dateTime = lineParser.getLastDeterminedDateTime(); + if (dateTime != null && !m_adeExtProperties.isGmtOffsetDefined()) { + updateGmtOffset(dateTime); + } + return dateTime; + } + + /** + * Updates the time offset and sets it in the ManagedSystemInfo object. + * @param dateTime the date/time-stamp of the current message. + */ + private void updateGmtOffset(DateTime dateTime) { + final long gmtOffsetInMillis = dateTime.getZone().getOffset(dateTime.getMillis()); + final long gmtOffset = TimeUnit.MILLISECONDS.toHours(gmtOffsetInMillis); + m_info.setGmtOffset(gmtOffset); + } + /** + * Generates message id based on the component's clustering model. + * @param lineParser the parser being used to parse the line. + * @return the generated message id. + * @throws AdeException + */ + private String getMessageId(NginxLogLineParser lineParser) throws AdeException{ + final Pair p = m_messageTextPreprocessor.updateComponent(lineParser.m_component, lineParser.m_request); + lineParser.m_component = p.m_first; + final IThresholdSetter thresholdSetter = p.m_second; + return generateMessageId(lineParser, thresholdSetter); + } + + /** + * Generates message id based on the component's clustering model. + * @param lineParser the parser being used to parse the line. + * @param thresholdSetter threshold for comparing two strings. + * @return the generated message id. + * @throws AdeException + */ + private String generateMessageId(NginxLogLineParser lineParser, IThresholdSetter thresholdSetter) throws AdeException { + if (thresholdSetter == null) { + thresholdSetter = new TextClusteringComponentModel.SimpleThresholdSetter(); + } + final TextClusteringModel model = m_textClusteringComponentModel.getTextClusteringModel(lineParser.m_component, thresholdSetter); + return model.getComponentName() + "_" + model.getOrAddCluster(lineParser.m_request, lineParser.m_timestamp).getClusterId(); + } + + /** + * Whether the message returned from readMessageInstance() is the generated message of a wrapper message. + * @return true if the message is a generated wrapper message. + */ + public final boolean isWrapperMessage() { + return m_isWrapperMessage; + } + + /** + * Returns the sourceID given a sysId. This method handles multiple sources coming from the log stream. + * However, this is not currently necessary because all streams are expected to contain messages from a + * single source. This method will adds the source ID to the database if it's not in the database already. + * This method provide an opportunity to transform the SysId into something else, which would be used as + * the sourceId processed by Ade. This method also sets the mapping between the sourceId to an Analysis Group. + * @throws AdeException + */ + private String getAndProcessSourceId(String source) throws AdeException { + /* Make source case insensitive for Linux systems. */ + source = source.toLowerCase(); + + /* Note: the source coming from Linux is the HOSTNAME field in the Syslog message. */ + String sourceId = sourceToSourceIdMap.get(source); + if (sourceId != null) { + /** + * This SysId and SourceId have already been processed. Don't need to + * perform the rest of the processing. + */ + return sourceId; + } else { + /** + * If option is provided, then need to make sure the only sources being + * analyzed is from the sources in the -s option. + */ + final AdeExtRequestType requestType = m_adeExtProperties.getRequestType(); + if (m_adeExtProperties.isSourceOptionProvided() && (requestType.name()).equalsIgnoreCase("ANALYZE")) { + final Collection sources = m_adeExtProperties.getSources(); + boolean isValidSource = false; + for (ISource s : sources) { + if ((s.getSourceId()).equalsIgnoreCase(source)) { + isValidSource = true; + break; + } + } + + /* Return null, and do not update the sourceToSourceIdMap */ + if (!isValidSource) { + return null; + } + } + + /* Update database to add the source, then update m_info with the source. */ + final IDataStoreSources dataStoreSources = Ade.getAde().getDataStore().sources(); + + /* For Linux, the sourceId is the same as source. */ + sourceId = source; + m_lastNewlySeenSourceId = sourceId; + m_adeExtProperties.setLastNewlySeenSourceId(m_lastNewlySeenSourceId); + + /* Read the RuntimeModelData from file. */ + final RuntimeModelDataManager runtimeModelDataManager = new RuntimeModelDataManager(); + runtimeModelDataManager.readModelDataFromFile(source); + + /* Provide a mapping between source to analysisGroup. */ + dataStoreSources.addSourceAndAnalysisGroup(sourceId, UNASSIGNED_ANALYSIS_GROUP_ID); + + final ISource S = dataStoreSources.getOrAddSource(source); + if (m_info != null) { + /* Add m_info to the database. It's only added when SysInfo is available. */ + m_info.updateDataStore(S); + } + + /* Update the analysis group by calling an atomic method that evaluates analysis group rules, + * and commits to the database in an atomic transaction. */ + final String analysisGroup = GroupsQueryImpl.updateSourcesAnalysisGroup(sourceId); + MessageRateStats.addSourceAndAnalysisGroup(sourceId, analysisGroup); + + logger.trace("Datastore updated for Linux system: " + source + + ", that maps to sourceId: " + sourceId); + + /* Add the source to the mapping */ + sourceToSourceIdMap.put(source, sourceId); + + return sourceId; + } + } + + /** + * Private method to initialize other information. When initializing the lineparsers, + * The order is important. The 5424 parser is first because we expect it to be the + * parser used during normal operations. The 3164 parsers are available for bulkload. + * Within the 3164 parsers, they must be ordered from most-specific match to most-generic + * match. We also set the parsing start time, the parsingQualityReporter and managed system + * information. + * @param adeExtProperties Properties file that contains AdeExt configurations. + * @param parseReportFilename the file name of the parse report. + * @throws AdeException + */ + private void initializeOtherInformation(LinuxAdeExtProperties adeExtProperties, String parseReportFilename) + throws AdeException { + m_parserStartTime = System.nanoTime(); + m_lineParsers = new NginxLogLineParser[] { + new NginxLogParser(), + }; + m_adeExtProperties = adeExtProperties; + NginxLogParserBase.setAdeExtProperties(m_adeExtProperties); + setParsingQualityReporterIfRequested(parseReportFilename); + try { + if (m_adeExtProperties.isGmtOffsetDefined()) { + m_info = new ManagedSystemInfo(m_adeExtProperties.getGmtOffset(), "linux"); + } else { + m_info = new ManagedSystemInfo(GMT_OFFSET_INVALID, "linux"); + } + } catch (IllegalArgumentException e) { + throw new AdeUsageException("Invalid SysInfo argument(s)", e); + } + } + + /** + * Sets the parse quality report. First it checks to see if it was requested, if not then we + * return. Otherwise, we initialize the parse report output directory and the parse report + * itself. + * @param parseReportFilename The name of the parse report file. + * @throws AdeException + */ + public final void setParsingQualityReporterIfRequested(String parseReportFilename) throws AdeException { + if (!m_adeExtProperties.isParseReportRequested()) { + return; + } + final File sumLogDirectory = AdeExt.getAdeExt().getOutputDirectoryManager().getOutputHome(); + ExtFileUtils.createDir(sumLogDirectory); + final File resultFile = new File(sumLogDirectory, parseReportFilename); + resultFile.getParentFile().mkdirs(); + m_parsingQualityReport = Ade.getAde().getActionsFactory().createParsingQualityReporter(); + m_parsingQualityReport.open(resultFile.getPath()); + + for (NginxLogLineParser lineParser : m_lineParsers) { + lineParser.setParseQualityReport(m_parsingQualityReport); + } + } + + /** + * Logs summary messages indicating how the parsing went. If more than 5% of lines + * had messages than we write a success message otherwise, we write an error message. + * @throws AdeException + */ + private void printStatEof() throws AdeException { + + Format formatter; + formatter = new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z"); + String message; + final int m_statCounterRawLines = getLineNumber(); + final long endTime = System.nanoTime(); + final long elapsedTime = endTime - m_parserStartTime; + final double seconds = Math.ceil(elapsedTime / 1.0E09); + final AdeExtRequestType requestType = m_adeExtProperties.getRequestType(); + + message = String.format(PARSED_DATA_STATS_MSG, m_statCounterRawLines,m_nonWrapperMessageCount, + m_suppressedNonWrapperMessageCount, m_wrapperMessageCount, m_errorLineCount, + m_componentMissingLineCount, m_unexpectedSourceLineCount, seconds); + logger.info(message); + + final String StartDate = formatter.format(m_parserStartDate); + String sourceId; + if (m_prevMessageInstance != null) { + sourceId = m_prevMessageInstance.getSourceId(); + } else if (m_adeExtProperties.isSourceOptionProvided()) { + final Collection sources = m_adeExtProperties.getSources(); + sourceId = (sources.iterator().next()).getSourceId(); + } else { + sourceId = "Unknown"; + } + final double goodPercent = ((double) m_nonWrapperMessageCount) / ((double) m_statCounterRawLines); + + switch (requestType) { + case UPLOAD: { + try { + if (goodPercent > goodPercentThreshold) { + message = String.format(GOOD_UPLOAD_MSG, StartDate, m_statCounterRawLines, sourceId, + m_nonWrapperMessageCount, DataType.SYSLOG.name()); + } else { + message = String.format(BAD_UPLOAD_MSG, StartDate, m_statCounterRawLines, sourceId, + m_nonWrapperMessageCount, DataType.SYSLOG.name()); + } + } catch (Throwable t) { + logger.error("An error occured - Internal Error: Building Notification ", t); + } + break; + } + case ANALYZE: + try { + if (goodPercent > goodPercentThreshold) { + message = String.format(GOOD_ANALYZE_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(), + sourceId, m_nonWrapperMessageCount); + } else { + if (goodPercent == 0) { + message = String.format(NO_MSGS_PARSED_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(), + sourceId); + } else { + message = String.format(BAD_ANALYZE_MSG, StartDate, m_statCounterRawLines, DataType.SYSLOG.name(), + sourceId, m_nonWrapperMessageCount); + } + } + } catch (Throwable t) { + logger.error("An error occured - Internal Error: Building Notification ", t); + } + break; + } + logger.info(message); + } + + /** + * Perform actions at the end of stream. + * @throws AdeException + */ + private void handleEndOfStream() throws AdeException { + if (m_parsingQualityReport != null) { + m_parsingQualityReport.close(); + } + + printStatEof(); + } + +} diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java new file mode 100644 index 0000000..df7df06 --- /dev/null +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParser.java @@ -0,0 +1,82 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ + +package org.openmainframe.ade.ext.os.parser; + +import java.util.regex.Pattern; + +import org.openmainframe.ade.exceptions.AdeException; +import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; + +/** + * An RFC3164 syslog parser that looks for a component id and process id + * in the message body (Note, this class also handles log messages with only + * the component id and not the process id). If present, they are + * extracted and the remainder of the message is returned as the message body. + */ +public class NginxLogParser extends NginxLogParserBase { + + /** + * Pattern object for matching against the base RFC3164 header, the optional BOM_AND_PRI which gets the + * option UTF-8 Byte Order mark and priority values, and regex that finds the component id, process id, + * and message body. + */ + private static final Pattern pattern = Pattern.compile(NGINX_LOG); +// private static final Pattern pattern = Pattern.compile(NGINX_LOG +// + "([-_./a-zA-Z0-9]*[-_./a-zA-Z])?: (.*)$"); + + /** + * Default constructor to call its parent constructor. + * @throws AdeException + */ + public NginxLogParser() throws AdeException { + super(); + } + /** + * Explicit-value constructor for getting the properties file and passing it to the parent explicit value + * constructor. + * @param linuxAdeExtProperties Contains property and configuration information from the start of AdeExt main class. + * @throws AdeException + */ + public NginxLogParser(LinuxAdeExtProperties linuxAdeExtProperties) + throws AdeException { + super(linuxAdeExtProperties); + } + + /** + * Parses the line by calling the super class's parseLine with the positions of the captured groups. + * As an example, it looks at the text following the RFC3164 header and tries to match text like: + * process-name[1234]: message body. + * If found, the component is set to "process-name" and the pid is set to "1234". The message body is + * set to "message body." + * @param line A line from the Linux syslog file. + * @return true if the line was parsed successfully. + */ + @Override + public boolean parseLine(String line) { + return parseLine(pattern, NGINX_LOG_REMOTE_ADDRESS_GROUP, NGINX_LOG_REMOTE_USER_GROUP, + NGINX_LOG_TIMESTAMP_GROUP, NGINX_LOG_REQUEST_GROUP, NGINX_LOG_STATUS_GROUP, + NGINX_LOG_BYTES_GROUP, line); + } + +} diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java new file mode 100644 index 0000000..d535e0e --- /dev/null +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/NginxLogParserBase.java @@ -0,0 +1,242 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os.parser; + +import java.util.Date; +import java.util.TimeZone; + +import org.openmainframe.ade.Ade; +import org.openmainframe.ade.IAdeConfigProperties; +import org.openmainframe.ade.exceptions.AdeException; +import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; + +/** + * An abstract base class for Nginx log parsers. This class defines regular expressions + * for the Nginx log header fields while leaving additional parsing of the message body to + * concrete subclasses. + */ +public abstract class NginxLogParserBase extends NginxLogLineParser { + /** + * Main logger for this class. + */ + static final Logger s_logger = LoggerFactory.getLogger(NginxLogParserBase.class); + + /** + * The end of today, when the parser was first loaded. + */ + private static DateTime END_OF_TODAY = null; + + /** + * The input time-zone specified in setup.props. + */ + private static DateTimeZone INPUT_TIME_ZONE; + + /** + * The output time-zone specified in setup.props. + */ + private static DateTimeZone OUTPUT_TIME_ZONE; + + /** + * LinuxAdeExtProperties object that contains properties and configurations information from the start + * of AdeExt main class. + */ + private static LinuxAdeExtProperties s_linuxAdeExtProperties = null; + + /** + * Regular expression to extract the time-stamp from the header.17/May/2015:08:05:57 +0000 + */ + public static final String NGINX_TIMESTAMP = "(\\d{2}/.{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [+-]\\d{4})"; + /** + * Regular expression to extract header information. (The priority, time-stamp, and host name) + */ + public static final String NGINX_LOG = "^" + "(.*) - (.*) \\[" + NGINX_TIMESTAMP + "\\] \"(.*)\" (.*) (.*) \".*\" \".*\""; + + /* + * Within the NGINX_LOG regex string above, identify the regex + * capturing groups for the parts that we want to extract. + */ + protected static final int NGINX_LOG_REMOTE_ADDRESS_GROUP = 1; + protected static final int NGINX_LOG_REMOTE_USER_GROUP = 2; + protected static final int NGINX_LOG_TIMESTAMP_GROUP = 3; + protected static final int NGINX_LOG_REQUEST_GROUP = 4; + protected static final int NGINX_LOG_STATUS_GROUP = 5; + protected static final int NGINX_LOG_BYTES_GROUP = 6; + + /** + * The current year. (Nginx logs already contain the year) + */ + private final int curYear; + + /* + * Setup an array of DateTimeFormatter objects that can parse the dates in a + * 3164 style message. Both are necessary because the DateTimeFormatter + * parseDateTime() method doesn't handle a variable number of spaces + * between the month and day. + */ + protected static final DateTimeFormatter[] dt_formatters = { + DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z") + }; + /** + * Constructor for initializing the properties file and various time properties. + * @param linuxAdeExtProperties Contains property and configuration information. + * @throws AdeException + */ + public NginxLogParserBase(LinuxAdeExtProperties linuxAdeExtProperties) throws AdeException { + this.curYear = new DateTime().getYear(); + + if (linuxAdeExtProperties == null) { + m_LinuxAdeExtProperties = s_linuxAdeExtProperties; + } else { + m_LinuxAdeExtProperties = linuxAdeExtProperties; + } + + /* Set the start of today and timezone*/ + initializeTimeZoneAndStartOfToday(); + } + + /** + * Default constructor that sets the properties file to null. + * @throws AdeException + */ + public NginxLogParserBase() throws AdeException { + this(null); + } + + /** + * Set the AdeExt properties file. + * @param linuxAdeExtProperties The properties file that contains the configuration and properties information. + */ + public static void setAdeExtProperties(LinuxAdeExtProperties linuxAdeExtProperties) { + s_linuxAdeExtProperties = linuxAdeExtProperties; + } + + /** + * Returns the year stored in AdeExt properties file. + * @return the year as an int value. + */ + public static int getAdeExtPropertiesYear() { + return s_linuxAdeExtProperties.getYear(); + } + /** + * Returns the input time zone specified in setup.props + * @return The input time zone. + */ + public static DateTimeZone getInputTimeZone() { + return INPUT_TIME_ZONE; + } + /** + * Returns the output time zone specified in setup.props + * @return The output time zone. + */ + public static DateTimeZone getOutputTimeZone() { + return OUTPUT_TIME_ZONE; + } + + /** + * Retrieves the date parsed from the header of a log. Unlike Syslog, Nginx logs come with year defined. + * Redefining the year in setup file is ineffective. + * After parsing the date, we need to correct the time-zone. + * Then we set the dateTime to the current year. Now we need to check the dateTime and see if it's after today. + * The logic is as follows: + * - If Log time-stamp < End of day of today + * (comparing Month, Day, Hour, Minutes, Seconds, with year missing), + * assume it's this year. + * - If Log time-stamp > End of day of today + * (comparing Month, Day, Hour, Minutes, Seconds, with year missing), + * assume it's previous year. + * + * The following restrictions will be made to customer for BulkLoad: + * - Cannot upload logs older than 11 months. + * - Cannot upload logs that are continuous for more than 11 months. + * + * Note: END OF TODAY is purposely chosen instead of START OF TODAY in case a user bulk loads logs that + * belongs to today. It's not possible/likely that a user would bulk load logs from last year of the + * same day with the restriction we specified above. + * @param source the source name string value. + * @param dateTimeString the date and time string value. + * @return Date object with date/time-stamp of the Linux log. + */ + @Override + public final Date toDate(String source, String dateTimeString) { + System.out.println(source); + DateTime dt = null; + for (DateTimeFormatter fmt : dt_formatters) { + try { + dt = fmt.parseDateTime(dateTimeString); +// dt = dt.withZoneRetainFields(INPUT_TIME_ZONE); + dt = dt.withZone(OUTPUT_TIME_ZONE); + /* AdeCore will take the Java Date object, and convert + * it to the output time-zone, then extract the hour. */ + return dt.toDate(); + } catch (IllegalArgumentException e) { + /* This exception can occur normally when iterating + * through the DateTimeFormatter objects. It is only + * an error worth noting when the dt object is not null. + */ + if (dt != null) { + s_logger.error("Invalid argument encountered.", e); + } + } + } + throw new IllegalArgumentException("Failed to parse date " + dateTimeString); + } + + /** + * Set the END_OF_TODAY value and time-zone values. The time-zone values are taken from the Ade + * configuration properties. End_OF_TODAY value is retrieved by getting the current date-time, + * adjust time-zone, add an additional day and set the time to the start of the day. + * Note: These only need to be set once. + * @throws AdeException + */ + private static void initializeTimeZoneAndStartOfToday() throws AdeException { + synchronized (NginxLogParserBase.class) { + if (END_OF_TODAY == null) { + final IAdeConfigProperties adeConfig = Ade.getAde().getConfigProperties(); + final TimeZone timeZone = adeConfig.getInputTimeZone(); + final TimeZone outputTimezone = adeConfig.getOutputTimeZone(); + INPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(timeZone.getRawOffset()); + OUTPUT_TIME_ZONE = DateTimeZone.forOffsetMillis(outputTimezone.getRawOffset()); + END_OF_TODAY = DateTime.now(); + END_OF_TODAY = END_OF_TODAY.withZone(OUTPUT_TIME_ZONE); + END_OF_TODAY = END_OF_TODAY.plusDays(1); + END_OF_TODAY = END_OF_TODAY.withTimeAtStartOfDay(); + } + } + } + + /** + * Return the DateTimeZone determined from toDate(String source, String s) method. + * For 3164 messages, the DateTimeZone is not included in the log. + * @return null since DateTimeZone is not included in the log. + */ + public final DateTime getLastDeterminedDateTime() { + return null; + } + +} diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java index fde4be2..e268b78 100644 --- a/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/os/parser/ReaderFactory.java @@ -49,6 +49,10 @@ public AdeMessageReader getReader(AdeInputStream stream, String parseReportFilen return new SparklogMessageReader(stream, parseReportFilename, (LinuxAdeExtProperties) adeExtProperties); } + if (AdeExt.getAdeExt().getConfigProperties().isNginxLog()){ + return new NginxLogMessageReader(stream, parseReportFilename, + (LinuxAdeExtProperties) adeExtProperties); + } return new LinuxSyslogMessageReader(stream, parseReportFilename, (LinuxAdeExtProperties) adeExtProperties); } else { diff --git a/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java b/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java index b4e57d4..0b55111 100644 --- a/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java +++ b/ade-ext/src/main/java/org/openmainframe/ade/ext/utils/AdeExtConfigProperties.java @@ -49,6 +49,7 @@ public class AdeExtConfigProperties { private static final String MSG_RATE_MERGE_SOURCE = "adeext.msgRateMergeSource"; private static final String STATS_ROOT_DIR = "adeext.statsRootDir"; private static final String USE_SPARK = "adeext.useSparkLogs"; + private static final String USE_NGINX = "adeext.useNginxLogs"; /* Constants for config property default values */ private static final String DEFAULT_STATS_ROOT_DIR = "output/ade-stats"; @@ -68,6 +69,7 @@ public class AdeExtConfigProperties { private final boolean m_isMsgRateMergeSource; private final String m_statsRootDir; private final boolean m_useSparkLogs; + private final boolean m_useNginxLogs; /** * Set the AdeExtConfigProperties from the specified property file. @@ -163,13 +165,21 @@ public AdeExtConfigProperties(String propertyFile) throws AdeException { m_statsRootDir = DEFAULT_STATS_ROOT_DIR; } - /* Type of logs to use. True: Spark logs. Defaults to Linux Syslogs */ + /* Type of logs to use. Defaults to Linux Syslogs */ - if (m_props.containsKey(USE_SPARK)){ + if (m_props.containsKey(USE_SPARK) && m_props.getBooleanProperty(USE_SPARK)){ + System.out.println("USING SPARK"); m_useSparkLogs = m_props.getBooleanProperty(USE_SPARK); + m_useNginxLogs = false; + } + else if (m_props.containsKey(USE_NGINX) && m_props.getBooleanProperty(USE_NGINX)) { + System.out.println("USING NGINX"); + m_useNginxLogs = m_props.getBooleanProperty(USE_NGINX); + m_useSparkLogs = false; } else{ m_useSparkLogs = false; + m_useNginxLogs = false; } m_props.verifyAllPropertiesUsed(); @@ -240,6 +250,11 @@ public final Boolean isSparkLog(){ return m_useSparkLogs; } + /** Return if we're using Nginx logs or Linux Syslogs. (true implies Nginx logs) */ + public final Boolean isNginxLog(){ + return m_useNginxLogs; + } + /** * Returns the root directory where statistics are written. * diff --git a/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java new file mode 100644 index 0000000..b4e338b --- /dev/null +++ b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogLineParser.java @@ -0,0 +1,106 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os.parser; + +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.openmainframe.ade.exceptions.AdeException; + +import java.util.regex.Pattern; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.openmainframe.ade.ext.os.parser.NginxLogParserBase.NGINX_LOG; + +public class TestNginxLogLineParser { + NginxLogLineParser slp; + String longString; + @Before + public void setup() throws AdeException { + slp = Mockito.spy(NginxLogLineParser.class); + longString = "(usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername" + + "usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername" + + "usernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusernameusername)"; + } + + @Test + public void testWithRealLog() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "93.180.71.3 - - [17/May/2015:08:05:32 +0000] \"GET /downloads/product_1 HTTP/1.1\" 304 0 \"-\" \"Debian APT-HTTP/1.3 (0.8.16~exp12ubuntu10.21)\""; + assertEquals(true, slp.parseLine(pattern, 1,2,3,4,5,6,line)); + } + + @Test + public void testParseLineWithMatchingPattern() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\""; + assertEquals("Pattern matches for all parameters ",true, slp.parseLine(pattern,1,2,3,4,5,6,line)); + } + + @Test + public void testParseLineWith255CharacterHostname() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\""; + assertTrue("Pattern matches but hostname has over 255 chars ", slp.parseLine(pattern, 1, 2, 3, 4, 5, 6, longString + line)); + } + + @Test + public void testParseLineWith255CharacterHostnameSecondTime() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\""; + slp.parseLine(pattern,1,2,3,4,5,6,longString + line); + + assertEquals("Hostname over 255 characters but we go through parseLine twice to skip the logging " + ,true,slp.parseLine(pattern,1,2,3,4,5,6,longString + line)); + } + + @Test + public void testGettersGetCorrectInfoAfterRunningParseLine() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "address - - [17/May/2015:08:05:32 +0000] \"GET\" 0 0 \"-\" \"-\""; + slp.parseLine(pattern,1,2,3,4,5,6,line); + + assertEquals(null, slp.getMsgTime()); + assertEquals("address",slp.getRemoteAddress()); + assertEquals("GET",slp.getRequest()); + assertEquals("-", slp.getRemoteUser()); + assertEquals(0, slp.getBytes()); + assertEquals(0, slp.getStatus()); + } + + @Test + public void testToString() { + final Pattern pattern = Pattern.compile(NGINX_LOG); + final String line = "nub - nub [17/May/2015:08:05:32 +0000] \"nub\" 0 0 \"-\" \"nub\""; + slp.parseLine(pattern,1,2,3,4,5,6,line); + assertEquals("Testing to String works correctly " + , "timestamp=(null) " + + "remote_address=(nub) " + + "remote_user=(nub) " + + "request=(nub) " + + "status=(0) " + + "bytes=(0)" + ,slp.toString()); + } +} diff --git a/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java new file mode 100644 index 0000000..43a76b4 --- /dev/null +++ b/ade-ext/src/test/java/org/openmainframe/ade/ext/os/parser/TestNginxLogParserBase.java @@ -0,0 +1,116 @@ +/* + + Copyright Contributors to the ADE Project. + + SPDX-License-Identifier: GPL-3.0-or-later + + This file is part of Anomaly Detection Engine for Linux Logs (ADE). + + ADE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + ADE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ADE. If not, see . + +*/ +package org.openmainframe.ade.ext.os.parser; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.TimeZone; +import java.util.Calendar; +import java.util.TimeZone; + +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.junit.Before; +import org.junit.Test; +import org.openmainframe.ade.Ade; +import org.openmainframe.ade.exceptions.AdeException; +import org.openmainframe.ade.ext.os.LinuxAdeExtProperties; +import org.openmainframe.ade.ext.os.parser.SparklogParserBase; +import org.openmainframe.ade.ext.os.parser.SparklogParser; +import org.openmainframe.ade.utils.patches.Version; + +public class TestNginxLogParserBase { + Ade ade; + + public void setup() throws AdeException{ + ade = mock(Ade.class, RETURNS_DEEP_STUBS); + when(ade.getConfigProperties().database().getDatabaseDriver()).thenReturn("derby"); + when(ade.getConfigProperties().getOverrideVersionCheck()).thenReturn(true); + when(ade.getDbVersion()).thenReturn(new Version(1, 0)); + Ade.create(ade); + } + + @Test + public void testSparklogParserBaseConstructor() throws AdeException { + TimeZone tz= ade.getAde().getConfigProperties().getInputTimeZone(); + LinuxAdeExtProperties laep = mock(LinuxAdeExtProperties.class); + SparklogParserBase pid = new SparklogParser(laep); + + assertEquals("Making a new constructor. It sets the timezone " + ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getInputTimeZone()); + assertEquals("Making a new constructor. It sets the timezone " + ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getOutputTimeZone()); + } + + @Test + public void testSparklogParserBaseConstructorWithNullInput() throws AdeException { + TimeZone tz= ade.getAde().getConfigProperties().getInputTimeZone(); + SparklogParserBase pid = new SparklogParser(null); + assertEquals("Making a new constructor wiht null value so LinuxAdeExtProperties is made. It sets the timezone" + ,DateTimeZone.forOffsetMillis(tz.getRawOffset()),pid.getInputTimeZone()); + } + + @Test + public void testToDate() throws AdeException { + LinuxAdeExtProperties laep = mock(LinuxAdeExtProperties.class, RETURNS_DEEP_STUBS); + NginxLogParserBase pid = new NginxLogParser(laep); + + when(laep.isYearDefined()).thenReturn(true); + + pid.setAdeExtProperties(laep); + DateTime date = DateTimeFormat.forPattern("dd/MMM/yyyy:HH:mm:ss Z").withZoneUTC().parseDateTime("17/May/2015:08:05:32 +0000"); + + assertEquals("toDate with good input. Since yearSetter is null the year will be 1 " + ,date.toDate(),pid.toDate("","17/May/2015:08:05:32 +0000")); + } + + + @Test + public void testRegexPatternsTimeStamp() throws AdeException{ + setup(); + String line = "17/06/08 14:37:39 INFO ExecutorRunnable: Starting Executor Container"; + SparklogParser s = new SparklogParser(null); + s.parseLine(line); + Calendar c = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + + // Time Stamp checks + c.setTime(s.getMsgTime()); + assertEquals(c.get(Calendar.YEAR), 2017); + assertEquals(c.get(Calendar.MONTH), 5); + assertEquals(c.get(Calendar.DAY_OF_MONTH), 8); + assertEquals(c.get(Calendar.HOUR_OF_DAY), 14); + assertEquals(c.get(Calendar.MINUTE), 37); + assertEquals(c.get(Calendar.SECOND), 39); + + // Tests for source , component and message body + assertEquals("info", s.getSource()); + //assertEquals("ExecutorRunnable", s.getComponent()); + assertEquals("master", s.getComponent()); + assertEquals("Starting Executor Container", s.getMessageBody()); + } + +}