diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b1cb3b1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,42 @@ +--- +name: ci + +on: [push] + +jobs: + build: + runs-on: ubuntu-22.04 + strategy: + matrix: + goos: [linux] + goarch: [amd64] + steps: + - name: check out code + uses: actions/checkout@v3 + + - name: install updates and python packages + run: | + sudo apt-get update -qq + sudo apt-get upgrade -yqq + sudo apt-get install -y python3-pip + pip install pylint + + - name: verify code to confirm to python + run: | + pylint --rcfile .pylintrc src/* + + - name: tar python package for release + run: | + mkdir erhchecker + cp src/* erhchecker/ + python3 -m pip install -r files/requirements.txt --target erhchecker --upgrade + python3 -m zipapp -p "/usr/bin/python3" --compress --output erhchecker.pyz erhchecker + tar -cvzf erhchecker_${{matrix.goos}}_${{matrix.goarch}}.tar.gz erhchecker.pyz + + - name: upload a built artifact for testing + if: startsWith(github.ref, 'refs/heads/main') == false + uses: actions/upload-artifact@v3 + with: + name: erhchecker_${{matrix.goos}}_${{matrix.goarch}} + path: erhchecker_${{matrix.goos}}_${{matrix.goarch}}.tar.gz + retention-days: 5 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..5488796 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,58 @@ +--- +name: release + +on: + pull_request: + types: [closed] + +jobs: + build_release: + runs-on: ubuntu-22.04 + strategy: + matrix: + goos: [linux] + goarch: [amd64] + + steps: + - name: check out code + uses: actions/checkout@v3 + + - name: install updates and python packages + run: | + sudo apt-get update -qq + sudo apt-get upgrade -yqq + sudo apt-get install -y python3-pip + + - name: zip python package for release + run: | + mkdir erhchecker + cp src/* erhchecker/ + python3 -m pip install -r files/requirements.txt --target erhchecker --upgrade + python3 -m zipapp -p "/usr/bin/python3" --compress --output erhchecker.pyz erhchecker + tar -cvzf erhchecker_${{matrix.goos}}_${{matrix.goarch}}.gz erhchecker.pyz + + - name: get version + id: version + run: | + echo "version=$(./erhchecker.pyz -v)" >> $GITHUB_ENV + + - name: release + uses: actions/create-release@v1 + id: release + with: + draft: false + prerelease: false + release_name: v${{ env.version }} + tag_name: v${{ env.version }} + env: + GITHUB_TOKEN: ${{ github.token }} + + - name: upload artifact for release + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + upload_url: ${{ steps.release.outputs.upload_url }} + asset_path: erhchecker_${{matrix.goos}}_${{matrix.goarch}}.tar.gz + asset_name: erhchecker_${{matrix.goos}}_${{matrix.goarch}}.tar.gz + asset_content_type: application/gzip diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..e73e01f --- /dev/null +++ b/.pylintrc @@ -0,0 +1,570 @@ +[MAIN] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Files or directories to be skipped. They should be base names, not +# paths. +ignore=CVS + +# Add files or directories matching the regex patterns to the ignore-list. The +# regex matches against paths and can be in Posix or Windows format. +ignore-paths= + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns=^\.# + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + pylint.extensions.check_elif, + pylint.extensions.bad_builtin, + pylint.extensions.docparams, + pylint.extensions.for_any_all, + pylint.extensions.set_membership, + pylint.extensions.code_style, + pylint.extensions.overlapping_exceptions, + pylint.extensions.typing, + pylint.extensions.redefined_variable_type, + pylint.extensions.comparison_placement, + pylint.extensions.mccabe, + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use. +jobs=0 + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-allow-list= + +# Minimum supported python version +py-version = 3.10 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# Specify a score threshold to be exceeded before program exits with error. +fail-under=9.5 + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +# confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + use-symbolic-message-instead, + useless-suppression, + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" + +disable= + attribute-defined-outside-init, + invalid-name, + missing-docstring, + protected-access, + too-few-public-methods, + # handled by black + format, + # We anticipate #3512 where it will become optional + fixme, + cyclic-import, + import-error, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables 'fatal', 'error', 'warning', 'refactor', 'convention' +# and 'info', which contain the number of messages in each category, as +# well as 'statement', which is the total number of statements analyzed. This +# score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + +# Activate the evaluation score. +score=yes + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + +# Regular expression of note tags to take in consideration. +#notes-rgx= + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=6 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore. +ignored-argument-names=_.* + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=120 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,}$ + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. +#class-const-rgx= + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,}$ + +# Regular expression which can overwrite the naming style set by typevar-naming-style. +#typevar-rgx= + +# Regular expression which should only match function or class names that do +# not require a docstring. Use ^(?!__init__$)_ to also check __init__. +no-docstring-rgx=__.*__ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# List of decorators that define properties, such as abc.abstractproperty. +property-classes=abc.abstractproperty + + +[TYPECHECK] + +# Regex pattern to define which classes are considered mixins if ignore-mixin- +# members is set to 'yes' +mixin-class-rgx=.*MixIn + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent,argparse.Namespace + +# List of decorators that create context managers from functions, such as +# contextlib.contextmanager. +contextmanager-decorators=contextlib.contextmanager + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# List of comma separated words that should be considered directives if they +# appear and the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:,pragma:,# noinspection + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file=.pyenchant_pylint_custom_dict.txt + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=2 + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=10 + +# Maximum number of locals for function / method body +max-locals=25 + +# Maximum number of return / yield for function / method body +max-returns=11 + +# Maximum number of branch for function / method body +max-branches=27 + +# Maximum number of statements in function / method body +max-statements=100 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# List of qualified class names to ignore when counting class parents (see R0901). +ignored-parents= + +# Maximum number of attributes for a class (see R0902). +max-attributes=11 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=25 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# List of regular expressions of class ancestor names to +# ignore when counting public methods (see R0903). +exclude-too-few-public-methods= + +max-complexity=10 + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp,__post_init__ + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=builtins.BaseException, + builtins.Exception + + +[TYPING] + +# Set to ``no`` if the app / library does **NOT** need to support runtime +# introspection of type annotations. If you use type annotations +# **exclusively** for type checking of an application, you're probably fine. +# For libraries, evaluate if some users what to access the type hints at +# runtime first, e.g., through ``typing.get_type_hints``. Applies to Python +# versions 3.7 - 3.9 +runtime-typing = no + + +[DEPRECATED_BUILTINS] + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,input + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[CODE_STYLE] + +# Max line length for which to sill emit suggestions. Used to prevent optional +# suggestions which would get split by a code formatter (e.g., black). Will +# default to the setting for ``max-line-length``. +#max-line-length-suggestions= diff --git a/BUILD.md b/BUILD.md new file mode 100644 index 0000000..9cdc536 --- /dev/null +++ b/BUILD.md @@ -0,0 +1,5 @@ +## Build from source +--- +1. Create folder called erhchecker and add the __main__.py and hcapp.py files in there +1. python3 -m pip install -r requirements.txt --target erhchecker --upgrade +1. python3 -m zipapp -p "/usr/bin/python3" erhchecker \ No newline at end of file diff --git a/README.md b/README.md index 0ff448b..b578d7b 100644 --- a/README.md +++ b/README.md @@ -1 +1,41 @@ -# edge-router-health-checker \ No newline at end of file +# edge-router-health-checker + +This Python script that can be used by high avaibility processes like keepalived to analyze openziti router health checks and make informed decisions when to best make protection switches/reversions. + +Everytime this script is run, it will search the openziti router configuration file to find the health checks web endpoint setting, i.e listen port. Once found, it will query the web endpoint and go through analysis of the output it gets. The environment variable to pass the configuration file location is `ROUTER_CONFIG_FILE_PATH`. + +There are four states that decision is made from: + +1. case_0 (`True`, `True`) +The is the default state. The `controllerPing` and `link.health` are both True. Return state is 0. +1. case_1 (`False`, `False`) +The `controllerPing` and `link.health` are both False. Return state is 1. +1. case_2 (`True`, `False`) +The `controllerPing` is True and `link.health` is False. Return state is 1. +1. case_3 (`False`, `True`) +The `controllerPing` is False and `link.health` is True. Return state is 1. In this state, a timer (environment variable is `SWITCH_TIMEOUT`) was introduced to delay the return state change to 1 to allow for the current sessions to drain. The default value is 5 minutes. The timer can be adjusted. + +***Important Notes:*** + +1. To allow the decision algorithm to take non-traversable openziti routers into account, a file with routerIds of these routers needs to be passed in. The environment variable to pass the file location is `NO_T_FLAG_ROUTERS_FILE_PATH`. The yaml schema for this file is as follows: + +```json +{ + "type": "object", + "required": ["routerIds"], + "properties": { + "routerIds": { + "type": "array", + "items": { + "type": "string" + } + } + } +} +``` + +1. In CloudZiti networks, there is a fabric only router deployed on the controller to allow for the managment channel to flow through to reach the salt master that is configured on the controller. The link from this router is ignored by the decision alrgorithm even if it is reported as being healthy. To find this link, the ip address of the controller is compared to the destination ip address of each link reported by the `link.health` check until match is made. + +Here is the link to the vrrp keepalived setup guide that goes through the configration set up steps. The section pertaining to configuration steps of the script is at the end of the article, i.e. `Ability to track loss of controller and/or fabric to trigger local switchover` + +[On-Prem HA](https://support.netfoundry.io/hc/en-us/articles/9962679994381-On-Prem-Ingress-High-Availability) diff --git a/files/keepalived.conf b/files/keepalived.conf new file mode 100644 index 0000000..7007981 --- /dev/null +++ b/files/keepalived.conf @@ -0,0 +1,32 @@ +global_defs { + script_user ziggy ziggy + enable_script_security +} + +vrrp_script wan_check { + script "/usr/bin/python3 /home/ziggy/erhchecker.pyz -c /home/ziggy/config.yml" + interval 10 + #timeout 90 + #weight 0 + rise 3 # times to wait before clear the failure, default 3 + fall 6 # times to wait to failover to standby, default 2 x rise + user ziggy ziggy +} + +vrrp_instance EN1 { + state MASTER + interface eth1 + virtual_router_id 10 + priority 200 + advert_int 1 + authentication { + auth_type PASS + auth_pass WaLLab01 + } + virtual_ipaddress { + 192.168.100.22 + } + track_script { + wan_check + } +} \ No newline at end of file diff --git a/files/requirements.txt b/files/requirements.txt new file mode 100644 index 0000000..3d90aaa --- /dev/null +++ b/files/requirements.txt @@ -0,0 +1 @@ +colorama \ No newline at end of file diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..270a9b2 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +""" +NetFoundry Edge Router Health Check +""" +import sys +import hcapp + +sys.exit(hcapp.main()) diff --git a/src/hcapp.py b/src/hcapp.py new file mode 100644 index 0000000..ec3c96c --- /dev/null +++ b/src/hcapp.py @@ -0,0 +1,267 @@ +""" +NetFoundry Edge Router Health Check +""" +import argparse +import logging +import os +import traceback +import ipaddress +import socket +from datetime import datetime +import yaml +import requests +import urllib3 +from jsonschema import validate, exceptions as jsonexcept +from colorama import Fore, Style, init +urllib3.disable_warnings(category = urllib3.exceptions.InsecureRequestWarning) + +# Global Options +TIMEOUT = 60 +HEADERS = {"content-type": "application/json"} +SCHEMA_ROUTERIDS = { + "type": "object", + "required": ["routerIds"], + "properties": { + "routerIds": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + +def get_arguments(): + """ + Create argparser Namespace + :return: A Namespace containing arguments + """ + __version__ = '1.0.0' + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--routerConfigFilePath', type=str, + help='Specify the edge router config file') + parser.add_argument('-t', '--switchTimeout', type=int, + help='Time to pass to allow for sessions drainage') + parser.add_argument('-r', '--noTFlagRoutersFilePath', type=str, + help='Specify yaml file containing list of router ids that have no-traversable flag set') + parser.add_argument('-l', '--logLevel', type=str, + choices=['INFO', 'ERROR', 'WARNING', 'DEBUG', 'CRITICAL'], + help='Set the logging level') + parser.add_argument('-f', '--logFile', type=str, + help='Specify the log file') + parser.add_argument('-v', '--version', + action='version', + version=__version__) + return parser.parse_args() + +class CustomFormatter(logging.Formatter): + """ + Return a custom color for the message based on the log level. + """ + LEVEL_COLORS = { + logging.CRITICAL: Fore.CYAN, + logging.ERROR: Fore.RED, + logging.WARNING: Fore.YELLOW, + logging.INFO: Fore.GREEN, + logging.DEBUG: Fore.MAGENTA + } + + def format(self, record): + level_color = self.LEVEL_COLORS.get(record.levelno, "") + colored_levelname = f"{level_color}{record.levelname}{Style.RESET_ALL}" + formatted_msg = super().format(record) + return formatted_msg.replace(record.levelname, colored_levelname) + +def setup_logging(logfile='program_name.log', loglevel=logging.INFO): + """ + Set up logging to log messages to both the console and a file. + Parameters: + - logfile (string): The file to log messages to. Defaults to 'program_name.log'. + - loglevel (int or string): The minimum level of log messages to display. Defaults to logging.INFO. + """ + # Initialize colorama + init(autoreset=True) + + # Create a logger object + logger = logging.getLogger() + logger.setLevel(loglevel) + + # Create a file handler to log messages to a file + if logfile: + file_handler = logging.FileHandler(logfile) + file_handler.setLevel(loglevel) + file_formatter = CustomFormatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S') + file_handler.setFormatter(file_formatter) + logger.addHandler(file_handler) + + # Create a console handler to log messages to the console + console_handler = logging.StreamHandler() + console_handler.setLevel(loglevel) + + console_formatter = CustomFormatter('%(levelname)s - %(message)s') + console_handler.setFormatter(console_formatter) + logger.addHandler(console_handler) + +def parse_variables(cmdVar, envVar, defaultVar): + if cmdVar is not None: + return cmdVar + return os.environ.get(envVar,defaultVar) + +def parse_yaml_file(file, logString): + logging.debug("Parsing YAML File: %s", file) + + if not file: + logging.warning("No File Path given for '%s' file", logString) + return None + + if os.path.getsize(file) == 0: + logging.warning("File has no content: %s", logString) + return None + + try: + with open(file, mode='r', encoding='utf-8') as newFile: + return yaml.safe_load(newFile) + except (yaml.YAMLError, IOError): + logging.warning(traceback.format_exc(0)) + logging.debug(traceback.format_exc()) + + return None + +def list_comprehension_return_dict_if(keysValues, key): + return {k:v for (k,v) in keysValues if k==key} + +def list_comprehension_return_list_if(valueList, key, value): + return [v for v in valueList if v[key]==value] + +def nested_list_comprehension_return_list_if(valueList, key, value, inner_key): + return [list(v[inner_key]) for v in valueList if v[key]==value] + +def is_ipv4(string): + try: + ipaddress.IPv4Network(string) + return True + except ValueError: + return False + +def case_0(**kwargs): + logging.debug("All healthchecks are healthy and at least one link is active") + logging.debug("Control Ping is %s", kwargs["controlPingData"]["healthy"]) + logging.debug("Link Ping is %s", kwargs["linkHealthData"]["healthy"]) + return 0 + +def case_1(**kwargs): + logging.debug("Number of consecutive controller check failures is %d", + kwargs["controlPingData"]["consecutiveFailures"]) + logging.info("Failure start time is %s", kwargs["controlPingData"]["failingSince"].split("+")[0]) + logging.debug("Current time is %s", kwargs["controlPingData"]["lastCheckTime"]) + return 1 + +def case_2(**kwargs): + logging.debug("All links are down, details are %s.", kwargs["linkHealthData"]["details"]) + return 1 + +def case_3(**kwargs): + # Switch after delay timeout reached to allow long live sessions + # to drain if only control channel is failed + delaySwitch = (datetime.strptime(kwargs["controlPingData"]["lastCheckTime"], '%Y-%m-%dT%H:%M:%SZ') - datetime.strptime( + kwargs["controlPingData"]["failingSince"],'%Y-%m-%dT%H:%M:%SZ')).total_seconds() + logging.debug("Time since Controller channel has gone down is over %ds", delaySwitch) + if delaySwitch > kwargs["switchTimeout"]: + logging.debug("Switch to slave due to timeout of %ds has been triggered", kwargs["switchTimeout"]) + return 1 + return 0 + +def main(): + """ + Main Function + """ + + # Get command line arguments or environment variables + args = get_arguments() + routerConfigFilePath = parse_variables(args.routerConfigFilePath, 'ROUTER_CONFIG_FILE_PATH', + '/opt/netfoundry/ziti/ziti-router/config.yml') + switchTimeout = int(parse_variables(args.switchTimeout, 'SWITCH_TIMEOUT', 600)) + noTFlagRoutersFilePath = parse_variables(args.noTFlagRoutersFilePath, + 'NO_T_FLAG_ROUTERS_FILE_PATH', "") + logFile = parse_variables(args.logFile, 'LOG_FILE', "") + logLevel = parse_variables(args.logLevel, 'LOG_LEVEL', "INFO") + + # Set up initial variables' states/values + setup_logging(logFile, logLevel) + if config := parse_yaml_file(routerConfigFilePath, "router config"): + pass + else: + return 0 + nonTraversableRouters = [] + if ids := parse_yaml_file(noTFlagRoutersFilePath, "router ids"): + try: + validate(instance=ids, schema=SCHEMA_ROUTERIDS) + nonTraversableRouters = ids.get("routerIds") + except jsonexcept.ValidationError: + logging.warning(traceback.format_exc(0)) + nonTraversableRouters = [] + logging.debug("Routers list is %s", nonTraversableRouters) + + # Get HC Url from config file + try: + web_config = list_comprehension_return_dict_if(config.items(), "web")["web"] + [[hcPort]] = nested_list_comprehension_return_list_if(web_config, "name", "health-check", "bindPoints") + [[hcPath]] = nested_list_comprehension_return_list_if(web_config, "name", "health-check", "apis") + url = f'https://127.0.0.1:{hcPort["address"].split(":")[1]}/{hcPath["binding"]}' + ctrlAddr = config["ctrl"]["endpoint"].split(":")[1] + except (KeyError, ValueError): + logging.warning(traceback.format_exc(0)) + logging.debug(traceback.format_exc()) + return 0 + + # Resolve ctrl dns name if needed + try: + ctrlIp = [ctrlAddr] if is_ipv4(ctrlAddr) else [socket.gethostbyname(ctrlAddr)] + logging.debug("ctrl address is %s", ctrlIp) + except (socket.gaierror, socket.herror, OSError): + logging.warning(traceback.format_exc(0)) + logging.debug(traceback.format_exc()) + return 0 + + # Get Healthcheck data + try: + response = requests.get(url, timeout=TIMEOUT, headers=HEADERS, verify=False) + hcData = response.json()["data"] + except (requests.RequestException, ValueError, KeyError, TypeError): + logging.error(traceback.format_exc(0)) + logging.debug(traceback.format_exc()) + return 1 + [controlPingData] = list_comprehension_return_list_if(hcData["checks"],"id","controllerPing") + [linkHealthData] = list_comprehension_return_list_if(hcData["checks"],"id","link.health") + logging.debug("HC = %s", hcData) + logging.debug("Overall Ping is %s", hcData["healthy"]) + + # Evaluate all active links and remove links with no-traversal flag + newLinkDetails = [] + newLinkHealthy = False + + if linkHealthData.get("details"): + newLinkDetails = [ + d for d in linkHealthData["details"] + if d["destRouterId"] not in nonTraversableRouters + if d["addresses"]["ack"]["remoteAddr"].split(":")[1] not in ctrlIp + ] + logging.debug("New link data after filtering %s", newLinkDetails) + + if len(newLinkDetails) > 0: + newLinkHealthy = True + + # Evaluate the various conditions and execute the corresponding function + condition = (controlPingData["healthy"], newLinkHealthy) + # Create a switch table mapping conditions to corresponding functions + switch_table = { + (True, True): case_0, + (False, False): case_1, + (True, False): case_2, + (False, True): case_3 + } + # Default to case 0 + result = switch_table.get(condition, lambda: 0)(controlPingData=controlPingData, + linkHealthData=linkHealthData, + switchTimeout=switchTimeout) + return result