-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
114 lines (88 loc) · 4.58 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import csv
import re
import sys
import os
import pandas as pd
from typing import Any
FILTERING = "filtering"
SORTING = "sorting"
if len(sys.argv) == 1:
sys.stdout.write("Usage: %s <nginx.log> <accesslog.csv>\n" % sys.argv[0])
sys.exit(0)
path = os.path.abspath("nginx.log")
csv_file_name = sys.argv[1]
pattern = re.compile(
r'(?P<ip>\S+) (?P<http_auto>.) (?P<http_auto2>.) \[(?P<time>\S+ \+[0-9]{4})] \"(?P<request>.*)\" (?P<status>\S+) (?P<size>[0-9]+) \"(?P<referer>.*)\" \"(?P<user_agent>.*)\" (?P<somenumber>[0-9]+) (?P<somenumber2>\S+) (?P<ipadress_port>\S+) \[] (?P<host2>\S+) (?P<status2>[0-9]+) (?P<somenumber3>\S+) (?P<status3>[0-9]+) (?P<sometext>\S+)')
file = open(path)
with open(csv_file_name, 'w') as out:
csv_out = csv.writer(out)
csv_out.writerow(['ip', 'http_auto', 'http_auto2', 'time', 'request', 'response_status', 'bytes_from_server', 'referer', 'user_agent', 'bytes_from_server_2',
'response_time', 'monitoring_system', 'ipadress_port', 'bytes_from_server_3', 'response_time_2', 'response_status_2', 'key'])
for line in file:
m = pattern.match(line)
result = m.groups()
csv_out.writerow(result)
# Reading the file
data_frame = pd.read_csv(csv_file_name)
with open(csv_file_name) as f:
first_line = f.readline()
header = [first_line]
# User choose what he want to do with the log file
method = input(
f"Please write down what do you want to do with log file: {FILTERING}, {SORTING} or nothing(just press Enter)?\n")
if method == FILTERING:
# Storing input values from user
filtering_array = []
column_filtering = input(
"Please write columns you want to filter, for example ip,response_status.\nHere are the available headers: " + ",".join(header))
for a in column_filtering.split(","):
filtering_array.append(a)
data_filtered = data_frame.filter(items=filtering_array)
value_filtering_answer = input(
"Do you want to find exactly value in the filtered logs?\nY/y or N/n : ")
if value_filtering_answer.lower() == "y":
value_filter = input(
"Please write what value you want to find in filtered columns: ")
value_filter_data = data_filtered.filter(like=value_filter, axis=0)
question_filtering_and_finding = input(
"Do you want to sort it?\nY/y or N/n : ")
if question_filtering_and_finding.lower() == "y":
value_filter_data.sort_values(
by=filtering_array, axis=0, ascending=True, inplace=True, na_position='first')
value_filter_data.to_csv(
column_filtering + value_filter + '_filtered_and_sorted_and_finded.csv', index=False)
print("Your log file is filtered by exact value and sorted successfully!")
elif question_filtering_and_finding.lower() == "n":
value_filter_data.to_csv(
column_filtering + '_just_filtered_and_finded.csv', index=False)
print("Your log file is filtered and finded the exact value successfully!")
elif value_filtering_answer.lower() == "n":
question_filtering = input(
"Do you want to sort it?\nY/y or N/n : ")
if question_filtering.lower() == "y":
data_filtered.sort_values(
by=filtering_array, axis=0, ascending=True, inplace=True, na_position='first')
data_filtered.to_csv(
column_filtering + '_just_filtered_and_sorted.csv', index=False)
print("Your log file is filtered and sorted successfully!")
elif question_filtering.lower() == "n":
print("Your log file was filtered successfully!")
data_filtered.to_csv(
column_filtering + '_just_filtered.csv', index=False)
# Sorting method
elif method == SORTING:
# Sorting file by the input value
value_sort = input(
"Please input columns separated by comma which you want sort it by, for example status,ip : " + ",".join(header))
sorting_array = []
for f in value_sort.split(","):
sorting_array.append(f)
data_frame.sort_values(by=sorting_array, axis=0, ascending=True,
inplace=True, na_position='first')
# Saving the new file
data_frame.to_csv(value_sort + '_sorted.csv', index=False)
print("Your data after sorting was saved in new csv file successfully!")
# If user press just Enter
elif method == "" or "nothing":
data_frame.to_csv('accesslog.csv', index=False)
print("Your log file is ready to read in csv format!")