Merge pull request #29 from nens/reinout-friendlier-geoserver

Better geoserver parsing
nens · Jan 5, 2024 · b98c033 · b98c033
2 parents 2f59539 + f17ce1b
commit b98c033
Show file tree

Hide file tree

Showing 14 changed files with 66 additions and 585 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -5,7 +5,12 @@ Changelog of serverscripts
 2.11 (unreleased)
 -----------------
 
-- Nothing changed yet.
+- Removed pbis and rabbitmq: both are unused.
+
+- Sped up geoserver log extraction and made sure it didn't consume so
+  much memory anymore.
+
+- Added JNDI info for geoserver.
 
 
 2.10 (2023-04-05)

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM fkrull/multi-python:focal
+FROM ghcr.io/fkrull/docker-multi-python:bionic
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
 

diff --git a/README.rst b/README.rst
@@ -77,37 +77,6 @@ to run as root)::
     */5 * * * * /usr/local/bin/checkout-info > /dev/null 2>&1
 
 
-Rabbitmq-checker
-----------------
-
-``bin/rabbitmq-checker`` The script checks the length of messages per queue and
-amount of the queues per vhost. When the limit of queues or messages is reached it
-saves warnings in ``/var/local/serverscripts/nens.rabbitmq.message`` and a number of
-warnings to ``/var/local/serverscripts/nens.num_rabbitmq_too_big.warnings``.
-The configuration file is optionally in ``/etc/serverscripts/rabbitmq_zabbix.json``,
-for example see ``tests/example_rabbitmq_zabbix.json``. If configuration is not
-specified the scritp uses defaults values, queues=100 and messages=200.
-
-configuration::
-
-  {
-    'lizard-nxt': { // vhost in rabbitmq
-        'queues_limit': 10,
-        'messages_limit': 300
-        },
-    ...
-  }
-
-Retrieve vhosts on rabbitmq-server::
-
-    $ sudo rabbitmqctl list_vhosts
-
-
-Before the taking it in production run the file manually in debug mode like::
-
-    $ sudo bin/rabbitmq-checker -v
-
-
 Docker-info
 ------------
 

diff --git a/serverscripts/geoserver.py b/serverscripts/geoserver.py
@@ -3,12 +3,12 @@
 """
 from collections import Counter
 from serverscripts.clfparser import CLFParser
-from serverscripts.utils import get_output
 from urllib.parse import parse_qs
 from urllib.parse import urlparse
 
 import argparse
 import glob
+import gzip
 import json
 import logging
 import os
@@ -122,26 +122,23 @@ def extract_from_line(line):
 
 def extract_from_logfiles(logfile):
     if not os.path.exists(logfile):
-        return []
-
-    logfile_pattern = logfile + "*"
-    cmd = "zcat --force %s" % logfile_pattern
-    logger.debug("Grabbing logfile output with: %s", cmd)
-    output, _ = get_output(cmd)
-    lines = output.split("\n")
-    logger.debug("Grabbed %s lines", len(lines))
-
-    results = []
-    for line in lines:
-        if "/geoserver/" not in line:
-            continue
-        if "GetMap" not in line:
-            continue
-        result = extract_from_line(line)
-        if result:
-            results.append(result)
-    logger.debug("After filtering, we have %s lines", len(results))
-    return results
+        return
+
+    logfiles = glob.glob(logfile + "*")
+    for logfile in logfiles:
+        if logfile.endswith(".gz"):
+            f = gzip.open(logfile, "rt")
+        else:
+            f = open(logfile, "rt")
+        for line in f:
+            if "/geoserver/" not in line:
+                continue
+            if "GetMap" not in line:
+                continue
+            result = extract_from_line(line)
+            if result:
+                yield result
+        f.close()
 
 
 def get_text_or_none(element, tag):
@@ -168,9 +165,11 @@ def extract_datastore_info(datastore_file):
             connection, "./entry[@key='database']"
         )
         result["database_user"] = get_text_or_none(connection, "./entry[@key='user']")
-        # result["database_namespace"] = get_text_or_none(
-        #     connection, "./entry[@key='namespace']"
-        # )
+        jndi_connection = get_text_or_none(
+            connection, "./entry[@key='jndiReferenceName']"
+        )
+        if jndi_connection:
+            result["database_name"] = jndi_connection
 
     return result
 
@@ -213,32 +212,39 @@ def extract_from_dirs(data_dir):
 
 def extract_workspaces_info(geoserver_configuration):
     """Return list of workspaces with all info"""
-    log_lines = extract_from_logfiles(geoserver_configuration["logfile"])
     workspaces = {}
     datastores_info = extract_from_dirs(geoserver_configuration["data_dir"])
 
-    workspace_names = Counter(
-        [log_line["workspace"] for log_line in log_lines]
+    workspace_names_and_referers = Counter(
+        (
+            (log_line["workspace"], log_line["referer"])
+            for log_line in extract_from_logfiles(geoserver_configuration["logfile"])
+        )
     ).most_common()
+
+    workspace_names_counter = Counter()
+    for (workspace_name, referer), workspace_count in workspace_names_and_referers:
+        workspace_names_counter.update({workspace_name: workspace_count})
+    workspace_names = workspace_names_counter.most_common()
+
     for workspace_name, workspace_count in workspace_names:
         if workspace_name not in datastores_info:
             logger.warn(
                 "Workspace %s from nginx logfile is missing in workspaces dir.",
-                workspace_name
+                workspace_name,
             )
             continue
-        workspaces[workspace_name] = {}
-        workspace_lines = [
-            log_line
-            for log_line in log_lines
-            if log_line["workspace"] == workspace_name
+
+        referers = Counter()
+        for (found_workspace_name, referer), count in workspace_names_and_referers:
+            if found_workspace_name != workspace_name:
+                continue
+            referers.update({referer: count})
+        common_referers = [
+            "%s (%d)" % (referer, count) for (referer, count) in referers.most_common(5)
         ]
-        referers = Counter(
-            [log_line["referer"] for log_line in workspace_lines if log_line["referer"]]
-        )
-        common_referers = [referer for (referer, count) in referers.most_common(5)]
         workspaces[workspace_name] = {
-            "usage": len(workspace_lines),
+            "usage": workspace_count,
             "referers": " + ".join(common_referers),
         }
 

diff --git a/serverscripts/pbis.py b/serverscripts/pbis.py