Merge pull request #688 from mew2057/json-fix

Reducing number of json files #679
IBM · Jun 17, 2019 · 67cd5c6 · 67cd5c6
2 parents 04be4b2 + 9280605
commit 67cd5c6
Show file tree

Hide file tree

Showing 3 changed files with 124 additions and 6 deletions.
diff --git a/csm_big_data/beats/config/filebeat.yml b/csm_big_data/beats/config/filebeat.yml
@@ -1,24 +1,32 @@
 filebeat.inputs:
 - type: log
   enabled: true
+  close_removed: 1h
+  close_inactive: 1h
   paths:
     - /var/log/ibm/csm/csm_transaction.log.*
   tags: ["transaction"]
 
 - type: log
   enabled: true
+  close_removed: 1h
+  close_inactive: 1h
   paths:
     - /var/log/ibm/csm/csm_allocation_metrics.log.*
   tags: ["allocation","metrics"]
 
 - type: log 
   enabled: true
+  close_removed: 1h
+  close_inactive: 1h
   paths:
     - "/var/log/ibm/csm/archive/*.json"
   tags: ["archive"]
 
 - type: log 
   enabled: true
+  close_removed: 1h
+  close_inactive: 1h
   paths:
     - /var/log/ibm/csm/csm_ras_events.log
   tags: ["csm","ras"]
@@ -27,8 +35,8 @@ filebeat.config.modules:
   path: ${path.config}/modules.d/*.yml
   reload.enabled: false
 
-name: "master"
 
+name: "master"
 setup.kibana:
   host: _KIBANA_HOST_PORT_
 

diff --git a/csmdb/sql/csm_db_history_archive.py b/csmdb/sql/csm_db_history_archive.py
@@ -36,6 +36,7 @@
 from datetime import date
 import threading
 from multiprocessing.dummy import Pool as ThreadPool
+from csm_db_rollup import  rollupDir
 
 DEFAULT_LOG='''/var/log/ibm/csm/db/csm_db_archive_script.log'''
 DEFAULT_TARGET='''/var/log/ibm/csm/archive'''
@@ -47,6 +48,19 @@
 # Additional Formatting style
 line1 = "---------------------------------------------------------------------------------------------------------"
 
+def sanitize_string(v):
+    return v.decode('utf-8', 'ignore')
+
+def sanitize_dict(d):
+  for k, v in d.iteritems():
+    if isinstance(v, dict):
+      d[k] = sanitize_dict(v)
+    elif isinstance(v, str):
+      d[k] = sanitize_string(v)
+    else:
+      d[k] = v
+  return d
+
 # username defined
 username = commands.getoutput("whoami")
 
@@ -168,11 +182,11 @@ def dump_table( db, user, table_name, count, target_dir, is_ras=False ):
     # Append the logs to the file.
     try:
         with open(file_name, 'a') as file:
-
             colnames = [desc[0] for desc in cursor.description]
             for row in cursor:
                 file.write('{{ "type":"db-{0}", "data":{1} }}\n'.format(
-                    table_name, json.dumps(dict(zip(colnames, row)), default=str)))
+                    table_name, json.dumps(sanitize_dict(dict(zip(colnames, row))), 
+                    default=str)))
     except Exception as e:
         print "[INFO] Exception caught: {0}".format(e)
         logger.info("Exception caught: {0}".format(e))
@@ -226,6 +240,13 @@ def main(args):
     # Verifies path exists.
     if not os.path.exists(args.target):
         os.makedirs(args.target)
+
+    # Make the temp directory for staging the tables for dumping.
+    # This is used before writing to mitigate potential data loss.
+    temp_dir = "{0}/tmp".format(args.target)
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+
 
     # Process the script detail info. for screen and logging.
     logging.info("DB Name:                           | {0}".format(args.db))
@@ -244,18 +265,21 @@ def main(args):
 
     pool = ThreadPool(int(args.threads))
 
-    tmp_list =  pool.map( lambda table: dump_table( args.db, args.user, table, args.count, args.target ), TABLES )
+    tmp_list =  pool.map( lambda table: dump_table( args.db, args.user, table, args.count, temp_dir ), TABLES )
 
     for entry in tmp_list:
         if entry is None:0
         else:
             print entry
 
     for table in RAS_TABLES:
-        entry = dump_table( args.db, args.user, table, args.count, args.target, True)
+        entry = dump_table( args.db, args.user, table, args.count, temp_dir, True)
         if entry is None:0
         else:
-            print entry
+            print (entry, args.target)
+
+    # After the tables are dumped, it's time to merge them into the weekly report.
+    rollupDir(temp_dir,"..")
 
     # Process the finishing info. for screen and logging.
     ft = datetime.datetime.now()

diff --git a/csmdb/sql/csm_db_rollup.py b/csmdb/sql/csm_db_rollup.py
@@ -0,0 +1,86 @@
+#!/bin/python
+# encoding: utf-8
+#================================================================================
+#   
+#    csm_db_rollup.py
+# 
+#    © Copyright IBM Corporation 2015-2019. All Rights Reserved
+#
+#    This program is licensed under the terms of the Eclipse Public License
+#    v1.0 as published by the Eclipse Foundation and available at
+#    http://www.eclipse.org/legal/epl-v10.html
+#
+#    U.S. Government Users Restricted Rights:  Use, duplication or disclosure
+#    restricted by GSA ADP Schedule Contract with IBM Corp.
+# 
+#================================================================================
+
+#================================================================================
+# usage             ./csm_db_rollup.py <archive directory> # DEFAULT :/var/log/ibm/csm/archive 
+# current_version   1.0
+# date_created:     05-17-2019
+# date_modified:    05-17-2019
+#================================================================================
+import os
+import sys
+from datetime import datetime
+
+DEFAULT_TARGET='''/var/log/ibm/csm/archive'''
+
+def rollupDir (directory, archiveTarget="old"):
+    ''' Performs a rollup operation on a directory, designed to condense in to weeks of data. 
+    This requires the files provided are in the format `<table>.archive.<date>.json`
+
+    directory -- A directory containing a collection of archive files to condense (string).
+    archiveTarget -- A directory to store the rollups in.
+    '''
+    # If the directory is not present return early.
+    if not os.path.exists(directory):
+        print("{0} does not exist".format(directory))
+        return
+
+    # Create an archive directory as needed. 
+    archiveDir="{0}/{1}".format(directory,archiveTarget)
+    if not os.path.exists(archiveDir): 
+        os.makedirs(archiveDir)
+
+    # Iterate over a list of strings
+    files=[f for f in os.listdir(directory) if os.path.isfile("{0}/{1}".format(directory,f))]
+    for f in files:
+        # Skip Hidden Files.
+        if f[0] == '.':
+            continue
+
+        iFile="{0}/{1}".format(directory,f)
+
+        # If the file doesn't match our pattern ignore it.
+        try:
+            sFile=f.split(".")
+            weekStr=datetime.strptime(sFile[2],"%Y-%m-%d").strftime("%Y-%U")
+            oFile="{0}/{1}-{2}.json".format(archiveDir, sFile[0], weekStr)
+        except:
+            continue
+
+        # Get the contents first
+        contents=""
+        with open(iFile,'r') as inputFile:
+            contents=inputFile.read()
+            # remove and skip empties. 
+            if len(contents) == 0:
+                os.remove(iFile) 
+                continue
+
+        # Archive the contents.
+        with open(oFile, 'a') as ofile:
+            ofile.write(contents)
+            os.remove(iFile) # Remove only if the write was good!
+
+def main(args):
+    target = DEFAULT_TARGET
+    if len(sys.argv) > 1:
+        target=sys.argv[0]
+
+    rollupDir(target)
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))