Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add prometheus metrics #346

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,21 @@ jobs:
username: ${{ env.REGISTRY_USERNAME }}
password: ${{ env.REGISTRY_PASSWORD }}

- name: 'Build and push image'
- name: 'Build and push main image'
run: |
IMAGE_NAME=$REGISTRY_LOGIN_SERVER/$NAME-$ENVIRONMENT:$TAG
echo "IMAGE_NAME=$IMAGE_NAME" >> $GITHUB_ENV
docker build . -f Dockerfile -t $IMAGE_NAME
docker push $IMAGE_NAME

- name: 'Build and push nginx reverse proxy image'
run: |
htpasswd -c -b ./nginx-reverse-proxy/htpasswd prom "${{ secrets.PROM_NGINX_REVERSE_PROXY_PASSWORD }}"
NGINX_IMAGE_NAME=$REGISTRY_LOGIN_SERVER/prom-nginx-reverse-proxy-$ENVIRONMENT:$TAG
echo "NGINX_IMAGE_NAME=$NGINX_IMAGE_NAME" >> $GITHUB_ENV
docker build nginx-reverse-proxy -f nginx-reverse-proxy/Dockerfile -t $NGINX_IMAGE_NAME
docker push $NGINX_IMAGE_NAME

- name: 'Delete existing container group'
uses: 'azure/CLI@v1'
with:
Expand All @@ -91,6 +99,8 @@ jobs:

sed -i ''s^#IMAGE_NAME#^$IMAGE_NAME^g'' ./deployment/deployment.yml

sed -i ''s^#NGINX_IMAGE_NAME#^$NGINX_IMAGE_NAME^g'' ./deployment/deployment.yml

sed -i ''s^#REGISTRY_LOGIN_SERVER#^$REGISTRY_LOGIN_SERVER^g'' ./deployment/deployment.yml
sed -i ''s^#REGISTRY_USERNAME#^$REGISTRY_USERNAME^g'' ./deployment/deployment.yml
sed -i ''s^#REGISTRY_PASSWORD#^$REGISTRY_PASSWORD^g'' ./deployment/deployment.yml
Expand Down
38 changes: 36 additions & 2 deletions deployment/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ properties: # Properties of container group
properties: # Properties of an instance
resources: # Resource requirements of the instance
requests:
memoryInGB: 4
memoryInGB: 3.7
cpu: 0.5
image: '#IMAGE_NAME#' # Container image used to create the instance
command:
Expand Down Expand Up @@ -61,7 +61,7 @@ properties: # Properties of container group
properties:
resources:
requests:
memoryInGB: 4
memoryInGB: 3.7
cpu: 0.1
image: '#IMAGE_NAME#'
command:
Expand Down Expand Up @@ -292,3 +292,37 @@ properties: # Properties of container group
secureValue: '#COMMSHUB_KEY#'
- name: LOG_LEVEL
secureValue: '#LOG_LEVEL#'
- name: 'nginx-proxy-for-prometheus'
properties:
image: "#NGINX_IMAGE_NAME#"
ports:
- port: 9158
protocol: TCP
- port: 9159
protocol: TCP
- port: 9160
protocol: TCP
- port: 9161
protocol: TCP
- port: 9162
protocol: TCP
- port: 9163
protocol: TCP
resources:
requests:
cpu: 0.1
memoryInGB: 0.6
ipAddress:
type: "public"
dnsNameLabel: "#NAME#-#ENVIRONMENT#-1"
ports:
- port: 9158
- port: 9159
- port: 9160
ipAddress: # split across two IPs because there is hard limit of 5-public ports per IP
type: "public"
dnsNameLabel: "#NAME#-#ENVIRONMENT#-2"
ports:
- port: 9161
- port: 9162
- port: 9163
3 changes: 3 additions & 0 deletions nginx-reverse-proxy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM nginx
COPY nginx.conf /etc/nginx/nginx.conf
COPY htpasswd /etc/nginx/htpasswd
120 changes: 120 additions & 0 deletions nginx-reverse-proxy/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
user nginx;
worker_processes auto;

error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;


events {
worker_connections 1024;
}


http {
default_type application/octet-stream;

log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';

keepalive_timeout 65;

gzip on;

server {
listen 9158;
listen [::]:9158;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9091;
}

auth_basic "Unified Pipelne Metrics Exporter - Refresh";
auth_basic_user_file htpasswd;
}

server {
listen 9159;
listen [::]:9159;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9092;
}

auth_basic "Unified Pipelne Metrics Exporter - Validate";
auth_basic_user_file htpasswd;
}

server {
listen 9160;
listen [::]:9160;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9093;
}

auth_basic "Unified Pipelne Metrics Exporter - Clean";
auth_basic_user_file htpasswd;
}

server {
listen 9161;
listen [::]:9161;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9094;
}

auth_basic "Unified Pipelne Metrics Exporter - Flatten";
auth_basic_user_file htpasswd;
}

server {
listen 9162;
listen [::]:9162;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9095;
}

auth_basic "Unified Pipelne Metrics Exporter - Lakify";
auth_basic_user_file htpasswd;
}

server {
listen 9163;
listen [::]:9163;

root /var/www/html;

server_name _;

location / {
proxy_pass http://localhost:9096;
}

auth_basic "Unified Pipelne Metrics Exporter - Solrize";
auth_basic_user_file htpasswd;
}
}

1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ requests
pysolr
chardet
python-dateutil
prometheus-client
6 changes: 4 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.11
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile requirements.in
# pip-compile
#
azure-core==1.15.0
# via
Expand Down Expand Up @@ -39,6 +39,8 @@ msrest==0.6.21
# azure-storage-queue
oauthlib==3.2.2
# via requests-oauthlib
prometheus-client==0.20.0
# via -r requirements.in
psycopg2==2.9.6
# via -r requirements.in
pycparser==2.21
Expand Down
18 changes: 4 additions & 14 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=requirements_dev.txt requirements_dev.in
Expand Down Expand Up @@ -35,8 +35,6 @@ cryptography==42.0.8
# via
# azure-storage-blob
# azure-storage-queue
exceptiongroup==1.2.1
# via pytest
flake8==7.0.0
# via
# -r requirements_dev.in
Expand Down Expand Up @@ -82,6 +80,8 @@ platformdirs==4.2.2
# via black
pluggy==1.5.0
# via pytest
prometheus-client==0.20.0
# via -r requirements.in
psycopg2==2.9.9
# via -r requirements.in
pycodestyle==2.11.1
Expand Down Expand Up @@ -114,24 +114,14 @@ six==1.16.0
# azure-core
# isodate
# python-dateutil
tomli==2.0.1
# via
# black
# build
# flake8-pyproject
# mypy
# pip-tools
# pytest
types-psycopg2==2.9.21.20240417
# via -r requirements_dev.in
types-python-dateutil==2.9.0.20240316
# via -r requirements_dev.in
types-requests==2.32.0.20240712
# via -r requirements_dev.in
typing-extensions==4.12.2
# via
# black
# mypy
# via mypy
urllib3==2.2.2
# via
# requests
Expand Down
35 changes: 32 additions & 3 deletions src/constants/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@
DOCUMENT_SAFETY_PERCENTAGE=50,
# Maximum number of blobs to delete in a single request when cleaning up blob containers
MAX_BLOB_DELETE=250,
PROM_PORT=9091,
PROM_METRIC_DEFS=[
("registered_publishers", "The number of publishers on the CKAN Registry"),
("registered_datasets", "The number of datasets on the CKAN Registry"),
("datasets_changed", "The number of changed datasets that have been changed"),
("datasets_to_download", "The number of datasets that need re-downloading"),
],
),
VALIDATION=dict(
# Number of parallel processes to run the validation loop with
Expand All @@ -64,18 +71,36 @@
SAFETY_CHECK_THRESHOLD=100,
# Hours
SAFETY_CHECK_PERIOD=2,
PROM_PORT=9092,
PROM_METRIC_DEFS=[
("new_flagged_publishers", "The number of publishers that have been newly flagged"),
("datasets_to_validate", "The number of datasets that need validating"),
],
),
CLEAN=dict(
# Number of parallel processes to run the clean loop with
PARALLEL_PROCESSES=1
PARALLEL_PROCESSES=1,
PROM_PORT=9093,
PROM_METRIC_DEFS=[
("valid_datasets_to_progress", "The number of valid datasets to progress to flatten stage"),
("invalid_datasets_to_clean", "The number of invalid datasets that need cleaning"),
],
),
FLATTEN=dict(
# Number of parallel processes to run the flatten loop with
PARALLEL_PROCESSES=1
PARALLEL_PROCESSES=1,
PROM_PORT=9094,
PROM_METRIC_DEFS=[
("datasets_to_flatten", "The number of datasets that need flattening"),
],
),
LAKIFY=dict(
# Number of parallel processes to run the lakify loop with
PARALLEL_PROCESSES=10
PARALLEL_PROCESSES=10,
PROM_PORT=9095,
PROM_METRIC_DEFS=[
("datasets_to_lakify", "The number of datasets that need lakifying"),
],
),
SOLRIZE=dict(
# Number of parallel processes to run the solrize loop with
Expand All @@ -92,5 +117,9 @@
PYSOLR_TIMEOUT=600,
# Time in seconds to sleep after receiving a 5XX error from Solr
SOLR_500_SLEEP=os.getenv("SOLR_500_SLEEP"),
PROM_PORT=9096,
PROM_METRIC_DEFS=[
("datasets_to_solrize", "The number of datasets that need solrizing"),
],
),
)
22 changes: 22 additions & 0 deletions src/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@
import library.refresher as refresher
import library.solrize as solrize
import library.validate as validate
from constants.config import config
from library.logger import getLogger
from library.prometheus import initialise_prom_metrics_and_start_server

logger = getLogger("handler")


def main(args):
try:
initialise_prom_metrics(args.type)

if args.type == "refresh":
db.migrateIfRequired()
refresher.refresh()
Expand Down Expand Up @@ -58,6 +62,24 @@ def main(args):
logger.error("{} Failed. {}".format(args.type, str(e).strip()))


def initialise_prom_metrics(operation: str):
if not operation.endswith("loop"):
return

logger.info("Starting prometheus metrics exporter...")

if operation == "validateloop":
container_conf_name = "VALIDATION"
elif operation == "refreshloop":
container_conf_name = "REFRESHER"
else:
container_conf_name = operation[:-4].upper()

initialise_prom_metrics_and_start_server(
config[container_conf_name]["PROM_METRIC_DEFS"], config[container_conf_name]["PROM_PORT"]
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Refresh from IATI Registry")
parser.add_argument("-t", "--type", dest="type", default="refresh", help="Trigger 'refresh' or 'validate'")
Expand Down
Loading
Loading