Skip to content
This repository has been archived by the owner on Jul 22, 2024. It is now read-only.

Commit

Permalink
Merge pull request #688 from mew2057/json-fix
Browse files Browse the repository at this point in the history
Reducing number of json files #679
  • Loading branch information
John Dunham authored Jun 17, 2019
2 parents 04be4b2 + 9280605 commit 67cd5c6
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 6 deletions.
10 changes: 9 additions & 1 deletion csm_big_data/beats/config/filebeat.yml
Original file line number Diff line number Diff line change
@@ -1,24 +1,32 @@
filebeat.inputs:
- type: log
enabled: true
close_removed: 1h
close_inactive: 1h
paths:
- /var/log/ibm/csm/csm_transaction.log.*
tags: ["transaction"]

- type: log
enabled: true
close_removed: 1h
close_inactive: 1h
paths:
- /var/log/ibm/csm/csm_allocation_metrics.log.*
tags: ["allocation","metrics"]

- type: log
enabled: true
close_removed: 1h
close_inactive: 1h
paths:
- "/var/log/ibm/csm/archive/*.json"
tags: ["archive"]

- type: log
enabled: true
close_removed: 1h
close_inactive: 1h
paths:
- /var/log/ibm/csm/csm_ras_events.log
tags: ["csm","ras"]
Expand All @@ -27,8 +35,8 @@ filebeat.config.modules:
path: ${path.config}/modules.d/*.yml
reload.enabled: false

name: "master"

name: "master"
setup.kibana:
host: _KIBANA_HOST_PORT_

Expand Down
34 changes: 29 additions & 5 deletions csmdb/sql/csm_db_history_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from datetime import date
import threading
from multiprocessing.dummy import Pool as ThreadPool
from csm_db_rollup import rollupDir

DEFAULT_LOG='''/var/log/ibm/csm/db/csm_db_archive_script.log'''
DEFAULT_TARGET='''/var/log/ibm/csm/archive'''
Expand All @@ -47,6 +48,19 @@
# Additional Formatting style
line1 = "---------------------------------------------------------------------------------------------------------"

def sanitize_string(v):
return v.decode('utf-8', 'ignore')

def sanitize_dict(d):
for k, v in d.iteritems():
if isinstance(v, dict):
d[k] = sanitize_dict(v)
elif isinstance(v, str):
d[k] = sanitize_string(v)
else:
d[k] = v
return d

# username defined
username = commands.getoutput("whoami")

Expand Down Expand Up @@ -168,11 +182,11 @@ def dump_table( db, user, table_name, count, target_dir, is_ras=False ):
# Append the logs to the file.
try:
with open(file_name, 'a') as file:

colnames = [desc[0] for desc in cursor.description]
for row in cursor:
file.write('{{ "type":"db-{0}", "data":{1} }}\n'.format(
table_name, json.dumps(dict(zip(colnames, row)), default=str)))
table_name, json.dumps(sanitize_dict(dict(zip(colnames, row))),
default=str)))
except Exception as e:
print "[INFO] Exception caught: {0}".format(e)
logger.info("Exception caught: {0}".format(e))
Expand Down Expand Up @@ -226,6 +240,13 @@ def main(args):
# Verifies path exists.
if not os.path.exists(args.target):
os.makedirs(args.target)

# Make the temp directory for staging the tables for dumping.
# This is used before writing to mitigate potential data loss.
temp_dir = "{0}/tmp".format(args.target)
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)


# Process the script detail info. for screen and logging.
logging.info("DB Name: | {0}".format(args.db))
Expand All @@ -244,18 +265,21 @@ def main(args):

pool = ThreadPool(int(args.threads))

tmp_list = pool.map( lambda table: dump_table( args.db, args.user, table, args.count, args.target ), TABLES )
tmp_list = pool.map( lambda table: dump_table( args.db, args.user, table, args.count, temp_dir ), TABLES )

for entry in tmp_list:
if entry is None:0
else:
print entry

for table in RAS_TABLES:
entry = dump_table( args.db, args.user, table, args.count, args.target, True)
entry = dump_table( args.db, args.user, table, args.count, temp_dir, True)
if entry is None:0
else:
print entry
print (entry, args.target)

# After the tables are dumped, it's time to merge them into the weekly report.
rollupDir(temp_dir,"..")

# Process the finishing info. for screen and logging.
ft = datetime.datetime.now()
Expand Down
86 changes: 86 additions & 0 deletions csmdb/sql/csm_db_rollup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/bin/python
# encoding: utf-8
#================================================================================
#
# csm_db_rollup.py
#
# © Copyright IBM Corporation 2015-2019. All Rights Reserved
#
# This program is licensed under the terms of the Eclipse Public License
# v1.0 as published by the Eclipse Foundation and available at
# http://www.eclipse.org/legal/epl-v10.html
#
# U.S. Government Users Restricted Rights: Use, duplication or disclosure
# restricted by GSA ADP Schedule Contract with IBM Corp.
#
#================================================================================

#================================================================================
# usage ./csm_db_rollup.py <archive directory> # DEFAULT :/var/log/ibm/csm/archive
# current_version 1.0
# date_created: 05-17-2019
# date_modified: 05-17-2019
#================================================================================
import os
import sys
from datetime import datetime

DEFAULT_TARGET='''/var/log/ibm/csm/archive'''

def rollupDir (directory, archiveTarget="old"):
''' Performs a rollup operation on a directory, designed to condense in to weeks of data.
This requires the files provided are in the format `<table>.archive.<date>.json`
directory -- A directory containing a collection of archive files to condense (string).
archiveTarget -- A directory to store the rollups in.
'''
# If the directory is not present return early.
if not os.path.exists(directory):
print("{0} does not exist".format(directory))
return

# Create an archive directory as needed.
archiveDir="{0}/{1}".format(directory,archiveTarget)
if not os.path.exists(archiveDir):
os.makedirs(archiveDir)

# Iterate over a list of strings
files=[f for f in os.listdir(directory) if os.path.isfile("{0}/{1}".format(directory,f))]
for f in files:
# Skip Hidden Files.
if f[0] == '.':
continue

iFile="{0}/{1}".format(directory,f)

# If the file doesn't match our pattern ignore it.
try:
sFile=f.split(".")
weekStr=datetime.strptime(sFile[2],"%Y-%m-%d").strftime("%Y-%U")
oFile="{0}/{1}-{2}.json".format(archiveDir, sFile[0], weekStr)
except:
continue

# Get the contents first
contents=""
with open(iFile,'r') as inputFile:
contents=inputFile.read()
# remove and skip empties.
if len(contents) == 0:
os.remove(iFile)
continue

# Archive the contents.
with open(oFile, 'a') as ofile:
ofile.write(contents)
os.remove(iFile) # Remove only if the write was good!

def main(args):
target = DEFAULT_TARGET
if len(sys.argv) > 1:
target=sys.argv[0]

rollupDir(target)

if __name__ == "__main__":
sys.exit(main(sys.argv))

0 comments on commit 67cd5c6

Please sign in to comment.