-
Notifications
You must be signed in to change notification settings - Fork 2
Analytics
select remoteAddr,
count(*) as count,
(sum(elapsedTime) *1.66667e-5) as elapsed_sum
from requests_table
where methodName = 'GET'
group by remoteAddr
order by count DESC;
Create and fill calendar table to join with requests
DROP PROCEDURE IF EXISTS FillCalendar;
DROP TABLE IF EXISTS calendar;
CREATE TABLE IF NOT EXISTS calendar(calendar_date DATE NOT NULL PRIMARY KEY, INDEX(calendar_date));
DELIMITER $$
CREATE PROCEDURE FillCalendar(start_date DATE, end_date DATE)
BEGIN
DECLARE crt_date DATE;
SET crt_date = start_date;
WHILE crt_date <= end_date DO
INSERT IGNORE INTO calendar VALUES(crt_date);
SET crt_date = ADDDATE(crt_date, INTERVAL 1 DAY);
END WHILE;
END$$
DELIMITER ;
CALL FillCalendar('2013-10-01', '2015-10-31');
Create temporary table to hold the results. Joining the requests_table and calendar takes for ever.
DROP TABLE IF EXISTS requests_table_tmp;
CREATE TEMPORARY TABLE IF NOT EXISTS requests_table_tmp (
uid SERIAL PRIMARY KEY,
req_count INT,
elapsed_sum DOUBLE,
req_date TIMESTAMP , INDEX(req_date)
) ENGINE=InnoDB;
Run the query and insert it to the requests_table_tmp.
TRUNCATE TABLE requests_table_tmp;
insert into requests_table_tmp (req_count,elapsed_sum,req_date)
select
count(*) as req_num,
(sum(elapsedTime) *1.66667e-5) as elapsed_sum,
timeStamp as req_date
from requests_table
where methodName = 'GET'
and userName != 'tester'
and remoteAddr ='192.168.100.1'
GROUP BY YEAR(timeStamp), MONTH(timeStamp), DAY(timeStamp);
Now do the join.
SELECT calendar_date, req_count, elapsed_sum
from calendar c
left join requests_table_tmp pi on
date(c.calendar_date) = date(pi.req_date)
GROUP BY date(c.calendar_date);
If you want to parse logs created by LOBCDER for logstash and store them in elasticsearch you can use the configuration file in https://github.com/skoulouzis/lobcder/blob/dev/lobcder-tests/etc/logstash-lobcder1.2.2.conf. You will need to edit the configuration file and set the paths that correspond to your setup.
#Query Elasocsearch To export usage data to csv use these commands.
##Uploads
for((i=0;i<=2000000;i+=1000)); do curl "localhost:9200/_all/_search?q=rx_size:*&from=$i&size=1000" | jq --raw-output '.hits.hits | .[] | ._source | "\(.logdate)\t\(.rx_source)\t\(.rx_destination)\t\(.rx_speed)\t\(.rx_size)\t\(.rx_sourceLoc.country_code2)\t\(.rx_sourceLoc.country_name)"'; done >> rx_size.csv
##Downloads
for((i=0;i<=2000000;i+=1000)); do curl "http://localhost:9200/_all/_search?q=tx_size:*&from=$i&size=1000" | jq --raw-output '.hits.hits | .[] | ._source | "\(.logdate)\t\(.tx_source)\t\(.tx_destination)\t\(.tx_speed)\t\(.tx_size)\t\(.tx_destinationLoc.country_code2)\t\(.tx_destinationLoc.country_name)"'; done >> tx_size.csv
##Replication
for((i=0;i<=2000000;i+=1000)); do curl "http://localhost:9200/_all/_search?q=repl_source:*&from=$i&size=1000" | jq --raw-output '.hits.hits | .[] | ._source | "\(.logdate)\t\(.repl_source)\t\(.repl_destination)\t\(.repl_speed)\t\(.repl_size)"' && sleep 1; done >> repl_size.csv
##All client requests
for((i=0;i<=2000000;i+=1000)); do curl "http://localhost:9200/_all/_search?q=req_source:*&from=$i&size=1000" | jq --raw-output '.hits.hits | .[] | ._source | "\(.logdate)\t\(.req_sourceLoc.country_code2)\t\(.req_sourceLoc.country_name)\t\(.req_sourceLoc.city_name)\t\(.req_userAgent)\t\(.req_user)\t\(.req_verb)\t\(.req_contLen)\t\(.req_contType)\t\(.req_elapsed)\t\(.req_contType)\t\(.req_queryString)\t\(.req_URL)"'; done >> req_size.csv
For specific dates for example all of October :
for((i=0;i<=2000000;i+=1000)); do curl "http://localhost:9200/logstash-2015.10.*/_search?q=req_source:*&from=$i&size=1000" | jq --raw-output '.hits.hits | .[] | ._source | "\(.logdate)\t\(.req_sourceLoc.country_code2)\t\(.req_sourceLoc.country_name)\t\(.req_sourceLoc.city_name)\t\(.req_userAgent)\t\(.req_user)\t\(.req_verb)\t\(.req_contLen)\t\(.req_contType)\t\(.req_elapsed)\t\(.req_contType)\t\(.req_queryString)\t\(.req_URL)"'; done >> req_size.csv
To list all indexes (dates in our case) in elasicsearch:
curl 'localhost:9200/_cat/indices?v'
To check the status of elasticsearch:
curl -XGET 'http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=5s&pretty=true'
##Import the exported csv files to mysql Create the tables.
For uploads:
CREATE TABLE rx_stats (
logdate DATE,
rx_source VARCHAR(5240),
rx_destination VARCHAR(5240),
rx_speed DOUBLE,
rx_SIZE DOUBLE
) ENGINE=InnoDB;
For downloads:
CREATE TABLE tx_stats (
logdate DATE,
tx_source VARCHAR(5240),
tx_destination VARCHAR(5240),
tx_speed DOUBLE,
tx_SIZE DOUBLE
) ENGINE=InnoDB;
For replications:
CREATE TABLE repl_stats (
logdate DATE,
repl_source VARCHAR(5240),
repl_destination VARCHAR(5240),
repl_speed DOUBLE,
repl_SIZE DOUBLE
) ENGINE=InnoDB;
For requests:
CREATE TABLE req_stats (
logdate DATE,
req_source_country_code2 VARCHAR(5240),
req_source_country_name VARCHAR(5240),
req_source_city_name VARCHAR(5240),
req_userAgent VARCHAR(5240),
req_user VARCHAR(5240),
req_verb VARCHAR(5240),
req_contLen BIGINT,
req_contType VARCHAR(5240),
req_elapsed DOUBLE,
req_contType1 VARCHAR(5240),
req_queryString VARCHAR(5240),
req_URL VARCHAR(5240)
) ENGINE=InnoDB;
Import the csv. For uploads:
LOAD DATA LOCAL INFILE 'rx_size.csv'
INTO TABLE repl_stats FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
(@var1,rx_source,rx_destination,rx_speed,rx_size)
set logdate = STR_TO_DATE(@var1, '%M %d,%Y %h:%i:%s %p')
For downloads:
LOAD DATA LOCAL INFILE 'tx_size.csv'
INTO TABLE repl_stats FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
(@var1,tx_source,tx_destination,tx_speed,tx_size)
set logdate = STR_TO_DATE(@var1, '%M %d,%Y %h:%i:%s %p')
For replications:
LOAD DATA LOCAL INFILE 'repl_size.csv'
INTO TABLE repl_stats FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
(@var1,repl_source,repl_destination,repl_speed,repl_size)
set logdate = STR_TO_DATE(@var1, '%M %d,%Y %h:%i:%s %p')
For requests:
LOAD DATA LOCAL INFILE 'req_size.csv'
INTO TABLE req_stats FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
(@var1,req_source_country_code2,req_source_country_name,req_source_city_name,req_userAgent,req_user,req_verb,req_contLen,req_contType,
req_elapsed,req_contType1,req_queryString,req_URL)
set logdate = STR_TO_DATE(@var1, '%M %d,%Y %h:%i:%s %p')
It's not pretty but it works.
DROP TABLE IF EXISTS requests_table_tmp1;
CREATE TEMPORARY TABLE IF NOT EXISTS requests_table_tmp1 ( uid SERIAL PRIMARY KEY, req_date TIMESTAMP , INDEX(req_date), folder varchar(512) ) ENGINE=InnoDB;
insert into requests_table_tmp1(uid,req_date,folder) select uid,
timeStamp, SUBSTRING_INDEX(requestURL, '/', LENGTH(requestURL) - LENGTH(REPLACE(requestURL, '/', '')) ) as folder from requests_table where methodName = 'GET' and userName !='tester' and remoteAddr like '10.%' group by timeStamp;
DROP TABLE IF EXISTS requests_table_tmp2;
CREATE TEMPORARY TABLE IF NOT EXISTS requests_table_tmp2 ( uid SERIAL PRIMARY KEY, req_date TIMESTAMP , INDEX(req_date), folder varchar(512) ) ENGINE=InnoDB;
insert into requests_table_tmp2(uid,req_date,folder) select uid,
timeStamp, SUBSTRING_INDEX(requestURL, '/', LENGTH(requestURL) - LENGTH(REPLACE(requestURL, '/', '')) ) as folder from requests_table where methodName = 'GET' and userName !='tester' and remoteAddr like '10.%' group by timeStamp;
select requests_table_tmp1.folder as fromFolder, requests_table_tmp2.folder as toFolder, (requests_table_tmp1.req_date - requests_table_tmp2.req_date) AS timedifference from requests_table_tmp1 JOIN requests_table_tmp2 on requests_table_tmp1.uid = requests_table_tmp2.uid+1