Skip to content

Commit

Permalink
Revert "Refactor code for consistency and readability."
Browse files Browse the repository at this point in the history
This reverts commit 731c306.
dvershinin committed Jan 3, 2025
1 parent 731c306 commit f1d0361
Showing 5 changed files with 185 additions and 241 deletions.
114 changes: 51 additions & 63 deletions gixy/cli/argparser.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,59 @@
# flake8: noqa

from configargparse import *
from io import StringIO
from six.moves import StringIO

from gixy.core.plugins_manager import PluginsManager

# used while parsing args to keep track of where they came from
_COMMAND_LINE_SOURCE_KEY = "command_line"
_ENV_VAR_SOURCE_KEY = "environment_variables"
_CONFIG_FILE_SOURCE_KEY = "config_file"
_DEFAULTS_SOURCE_KEY = "defaults"
_COMMAND_LINE_SOURCE_KEY = 'command_line'
_ENV_VAR_SOURCE_KEY = 'environment_variables'
_CONFIG_FILE_SOURCE_KEY = 'config_file'
_DEFAULTS_SOURCE_KEY = 'defaults'


class GixyConfigParser(DefaultConfigFileParser):
def get_syntax_description(self):
return ""
return ''

def parse(self, stream):
"""Parses the keys + values from a config file."""

items = OrderedDict()
prefix = ""
prefix = ''
for i, line in enumerate(stream):
line = line.strip()
if not line or line[0] in ["#", ";"] or line.startswith("---"):
if not line or line[0] in ['#', ';'] or line.startswith('---'):
continue
if line[0] == "[":
prefix = "%s-" % line[1:-1].replace("_", "-")
if line[0] == '[':
prefix = '%s-' % line[1:-1].replace('_', '-')
continue

white_space = "\\s*"
key = "(?P<key>[^:=;#\s]+?)"
value = white_space + "[:=\s]" + white_space + "(?P<value>.+?)"
comment = white_space + "(?P<comment>\\s[;#].*)?"
white_space = '\\s*'
key = '(?P<key>[^:=;#\s]+?)'
value = white_space + '[:=\s]' + white_space + '(?P<value>.+?)'
comment = white_space + '(?P<comment>\\s[;#].*)?'

key_only_match = re.match("^" + key + comment + "$", line)
key_only_match = re.match('^' + key + comment + '$', line)
if key_only_match:
key = key_only_match.group("key")
items[key] = "true"
key = key_only_match.group('key')
items[key] = 'true'
continue

key_value_match = re.match("^" + key + value + comment + "$", line)
key_value_match = re.match('^' + key + value + comment + '$', line)
if key_value_match:
key = key_value_match.group("key")
value = key_value_match.group("value")
key = key_value_match.group('key')
value = key_value_match.group('value')

if value.startswith("[") and value.endswith("]"):
if value.startswith('[') and value.endswith(']'):
# handle special case of lists
value = [elem.strip() for elem in value[1:-1].split(",")]
value = [elem.strip() for elem in value[1:-1].split(',')]

items[prefix + key] = value
continue

raise ConfigFileParserException(
"Unexpected line %s in %s: %s"
% (i, getattr(stream, "name", "stream"), line)
)
raise ConfigFileParserException('Unexpected line %s in %s: %s' % (i,
getattr(stream, 'name', 'stream'), line))
return items

def serialize(self, items):
@@ -65,28 +63,24 @@ def serialize(self, items):
r = StringIO()
for key, value in items.items():
if type(value) == OrderedDict:
r.write("\n[%s]\n" % key)
r.write('\n[%s]\n' % key)
r.write(self.serialize(value))
else:
value, help = value
if help:
r.write("; %s\n" % help)
r.write("%s = %s\n" % (key, value))
r.write('; %s\n' % help)
r.write('%s = %s\n' % (key, value))
return r.getvalue()


class GixyHelpFormatter(HelpFormatter):
def format_help(self):
manager = PluginsManager()
help_message = super(GixyHelpFormatter, self).format_help()
if "plugins options:" in help_message:
# Print available plugins _only_ if we print options for it
plugins = "\n".join(
"\t" + plugin.__name__ for plugin in manager.plugins_classes
)
help_message = "{orig}\n\navailable plugins:\n{plugins}\n".format(
orig=help_message, plugins=plugins
)
if 'plugins options:' in help_message:
# Print available blugins _only_ if we prints options for it
plugins = '\n'.join('\t' + plugin.__name__ for plugin in manager.plugins_classes)
help_message = '{orig}\n\navailable plugins:\n{plugins}\n'.format(orig=help_message, plugins=plugins)
return help_message


@@ -98,14 +92,15 @@ def get_possible_config_keys(self, action):
"""
keys = []
for arg in action.option_strings:
if arg in ["--config", "--write-config", "--version"]:
if arg in ['--config', '--write-config', '--version']:
continue
if any([arg.startswith(2 * c) for c in self.prefix_chars]):
keys += [arg[2:], arg] # eg. for '--bla' return ['bla', '--bla']

return keys

def get_items_for_config_file_output(self, source_to_settings, parsed_namespace):
def get_items_for_config_file_output(self, source_to_settings,
parsed_namespace):
"""Converts the given settings back to a dictionary that can be passed
to ConfigFormatParser.serialize(..).
@@ -119,36 +114,29 @@ def get_items_for_config_file_output(self, source_to_settings, parsed_namespace)
config_file_items = OrderedDict()
for source, settings in source_to_settings.items():
if source == _COMMAND_LINE_SOURCE_KEY:
_, existing_command_line_args = settings[""]
_, existing_command_line_args = settings['']
for action in self._actions:
config_file_keys = self.get_possible_config_keys(action)
if (
config_file_keys
and not action.is_positional_arg
and already_on_command_line(
existing_command_line_args, action.option_strings
)
):
if config_file_keys and not action.is_positional_arg and \
already_on_command_line(existing_command_line_args,
action.option_strings):
value = getattr(parsed_namespace, action.dest, None)
if value is not None:
if type(value) is bool:
value = str(value).lower()
if ":" in action.dest:
section, key = action.dest.split(":", 2)
key = key.replace("_", "-")
if ':' in action.dest:
section, key = action.dest.split(':', 2)
key = key.replace('_', '-')
if section not in config_file_items:
config_file_items[section] = OrderedDict()
config_file_items[section][key] = (value, action.help)
else:
config_file_items[config_file_keys[0]] = (
value,
action.help,
)
config_file_items[config_file_keys[0]] = (value, action.help)
elif source.startswith(_CONFIG_FILE_SOURCE_KEY):
for key, (action, value) in settings.items():
if ":" in action.dest:
section, key = action.dest.split(":", 2)
key = key.replace("_", "-")
if ':' in action.dest:
section, key = action.dest.split(':', 2)
key = key.replace('_', '-')
if section not in config_file_items:
config_file_items[section] = OrderedDict()
config_file_items[section][key] = (value, action.help)
@@ -159,13 +147,13 @@ def get_items_for_config_file_output(self, source_to_settings, parsed_namespace)

def create_parser():
return ArgsParser(
description="Gixy - a Nginx configuration [sec]analyzer\n\n",
description='Gixy - a Nginx configuration [sec]analyzer\n\n',
formatter_class=GixyHelpFormatter,
config_file_parser_class=GixyConfigParser,
auto_env_var_prefix="GIXY_",
auto_env_var_prefix='GIXY_',
add_env_var_help=False,
default_config_files=["/etc/gixy/gixy.cfg", "~/.config/gixy/gixy.conf"],
args_for_setting_config_path=["-c", "--config"],
args_for_writing_out_config_file=["--write-config"],
add_config_file_help=False,
default_config_files=['/etc/gixy/gixy.cfg', '~/.config/gixy/gixy.conf'],
args_for_setting_config_path=['-c', '--config'],
args_for_writing_out_config_file=['--write-config'],
add_config_file_help=False
)
6 changes: 2 additions & 4 deletions gixy/core/manager.py
Original file line number Diff line number Diff line change
@@ -19,9 +19,8 @@ def __init__(self, config=None):
def audit(self, file_path, file_data, is_stdin=False):
LOG.debug("Audit config file: {fname}".format(fname=file_path))
parser = NginxParser(
cwd=os.path.dirname(file_path) if not is_stdin else "",
allow_includes=self.config.allow_includes,
)
cwd=os.path.dirname(file_path) if not is_stdin else '',
allow_includes=self.config.allow_includes)
self.root = parser.parse(content=file_data.read(), path_info=file_path)

push_context(self.root)
@@ -56,7 +55,6 @@ def _audit_recursive(self, tree):
pop_context()

def _update_variables(self, directive):
"""Update context with variables from directive"""
# TODO(buglloc): finish him!
if not directive.provide_variables:
return
203 changes: 86 additions & 117 deletions gixy/core/regexp.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
import re
import random
import itertools

try:
from cached_property import cached_property
except ImportError:
@@ -17,7 +16,7 @@
def _build_reverse_list(original):
result = []
for c in range(1, 126):
c = chr(c)
c = six.unichr(c)
if c not in original:
result.append(c)
return frozenset(result)
@@ -31,26 +30,24 @@ def _build_reverse_list(original):
sre_parse.CATEGORY_NOT_SPACE: _build_reverse_list(sre_parse.WHITESPACE),
sre_parse.CATEGORY_DIGIT: sre_parse.DIGITS,
sre_parse.CATEGORY_NOT_DIGIT: _build_reverse_list(sre_parse.DIGITS),
sre_parse.CATEGORY_WORD: frozenset(
"abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789_"
),
sre_parse.CATEGORY_NOT_WORD: _build_reverse_list(
frozenset(
"abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789_"
)
),
sre_parse.CATEGORY_LINEBREAK: frozenset("\n"),
sre_parse.CATEGORY_NOT_LINEBREAK: _build_reverse_list(frozenset("\n")),
"ANY": [chr(x) for x in range(1, 127) if x != 10],
sre_parse.CATEGORY_WORD: frozenset('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789_'),
sre_parse.CATEGORY_NOT_WORD: _build_reverse_list(frozenset('abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'0123456789_')),
sre_parse.CATEGORY_LINEBREAK: frozenset('\n'),
sre_parse.CATEGORY_NOT_LINEBREAK: _build_reverse_list(frozenset('\n')),
'ANY': [six.unichr(x) for x in range(1, 127) if x != 10]
}

CATEGORIES_NAMES = {
sre_parse.CATEGORY_DIGIT: r"\d",
sre_parse.CATEGORY_NOT_DIGIT: r"\D",
sre_parse.CATEGORY_SPACE: r"\s",
sre_parse.CATEGORY_NOT_SPACE: r"\S",
sre_parse.CATEGORY_WORD: r"\w",
sre_parse.CATEGORY_NOT_WORD: r"\W",
sre_parse.CATEGORY_DIGIT: r'\d',
sre_parse.CATEGORY_NOT_DIGIT: r'\D',
sre_parse.CATEGORY_SPACE: r'\s',
sre_parse.CATEGORY_NOT_SPACE: r'\S',
sre_parse.CATEGORY_WORD: r'\w',
sre_parse.CATEGORY_NOT_WORD: r'\W',
}


@@ -81,7 +78,7 @@ def extract_groups(parsed, top=True):


def _gen_combinator(variants, _merge=True):
if not hasattr(variants, "__iter__"):
if not hasattr(variants, '__iter__'):
return [variants] if variants is not None else []

res = []
@@ -112,7 +109,7 @@ def _merge_variants(variants):
result.append(_merge_variants(var))
else:
result.append(var)
return "".join(result)
return ''.join(result)


class Token(object):
@@ -141,48 +138,48 @@ def _reg_group(self, gid):
self.regexp.reg_group(gid, self)

def can_contain(self, char, skip_literal=True):
raise NotImplementedError("can_contain must be implemented")
raise NotImplementedError('can_contain must be implemented')

def can_startswith(self, char, strict=False):
return self.can_contain(char, skip_literal=False)

def must_contain(self, char):
raise NotImplementedError("must_contain must be implemented")
raise NotImplementedError('must_contain must be implemented')

def must_startswith(self, char, strict=False):
return self.must_contain(char)

def generate(self, context):
raise NotImplementedError("generate must be implemented")
raise NotImplementedError('generate must be implemented')

def __str__(self):
raise NotImplementedError("__str__ must be implemented")
raise NotImplementedError('__str__ must be implemented')


class AnyToken(Token):
type = sre_parse.ANY

def can_contain(self, char, skip_literal=True):
return char in CATEGORIES["ANY"]
return char in CATEGORIES['ANY']

def must_contain(self, char, skip_literal=True):
# Char may not be present in ANY token
return False

def generate(self, context):
if context.char in CATEGORIES["ANY"]:
if context.char in CATEGORIES['ANY']:
return context.char
return "a"
return 'a'

def __str__(self):
return "."
return '.'


class LiteralToken(Token):
type = sre_parse.LITERAL

def _parse(self):
self.char = chr(self.token[1])
self.char = six.unichr(self.token[1])

def can_contain(self, char, skip_literal=True):
if skip_literal:
@@ -203,7 +200,7 @@ class NotLiteralToken(Token):
type = sre_parse.NOT_LITERAL

def _parse(self):
self.char = chr(self.token[1])
self.char = six.unichr(self.token[1])
self.gen_char_list = list(_build_reverse_list(frozenset(self.char)))

def can_contain(self, char, skip_literal=True):
@@ -220,7 +217,7 @@ def generate(self, context):
return random.choice(self.gen_char_list)

def __str__(self):
return "[^{char}]".format(char=self.char)
return '[^{char}]'.format(char=self.char)


class RangeToken(Token):
@@ -229,8 +226,8 @@ class RangeToken(Token):
def _parse(self):
self.left_code = self.token[1][0]
self.right_code = self.token[1][1]
self.left = chr(self.left_code)
self.right = chr(self.right_code)
self.left = six.unichr(self.left_code)
self.right = six.unichr(self.right_code)

def can_contain(self, char, skip_literal=True):
return self.left <= char <= self.right
@@ -242,17 +239,17 @@ def generate(self, context):
if self.can_contain(context.char):
return context.char

return chr(random.randint(self.token[1][0], self.token[1][1]))
return six.unichr(random.randint(self.token[1][0], self.token[1][1]))

def __str__(self):
return "{left}-{right}".format(left=self.left, right=self.right)
return '{left}-{right}'.format(left=self.left, right=self.right)


class CategoryToken(Token):
type = sre_parse.CATEGORY

def _parse(self):
self.char_list = CATEGORIES.get(self.token[1], [""])
self.char_list = CATEGORIES.get(self.token[1], [''])

def can_contain(self, char, skip_literal=True):
return char in self.char_list
@@ -268,7 +265,7 @@ def generate(self, context):
return c

def __str__(self):
return CATEGORIES_NAMES.get(self.token[1], "\\C")
return CATEGORIES_NAMES.get(self.token[1], '\\C')


class MinRepeatToken(Token):
@@ -321,7 +318,7 @@ def generate(self, context):
res = []
if self.min == 0:
# [a-z]*
res.append("")
res.append('')
if self.max == 0:
# [a-z]{0}
return res
@@ -336,18 +333,16 @@ def generate(self, context):
return result

def __str__(self):
childs = "".join(str(x) for x in self.childs)
childs = ''.join(str(x) for x in self.childs)
if self.min == self.max:
return "{childs}{{{count}}}?".format(childs=childs, count=self.min)
return '{childs}{{{count}}}?'.format(childs=childs, count=self.min)
if self.min == 0 and self.max == 1:
return "{childs}?".format(childs=childs)
return '{childs}?'.format(childs=childs)
if self.min == 0 and self.max == sre_parse.MAXREPEAT:
return "{childs}*?".format(childs=childs)
return '{childs}*?'.format(childs=childs)
if self.min == 1 and self.max == sre_parse.MAXREPEAT:
return "{childs}+?".format(childs=childs)
return "{childs}{{{min},{max}}}?".format(
childs=childs, min=self.min, max=self.max
)
return '{childs}+?'.format(childs=childs)
return '{childs}{{{min},{max}}}?'.format(childs=childs, min=self.min, max=self.max)


class MaxRepeatToken(Token):
@@ -400,7 +395,7 @@ def generate(self, context):
res = []
if self.min == 0:
# [a-z]*
res.append("")
res.append('')
if self.max == 0:
# [a-z]{0}
return res
@@ -415,18 +410,16 @@ def generate(self, context):
return result

def __str__(self):
childs = "".join(str(x) for x in self.childs)
childs = ''.join(str(x) for x in self.childs)
if self.min == self.max:
return "{childs}{{{count}}}".format(childs=childs, count=self.min)
return '{childs}{{{count}}}'.format(childs=childs, count=self.min)
if self.min == 0 and self.max == 1:
return "{childs}?".format(childs=childs)
return '{childs}?'.format(childs=childs)
if self.min == 0 and self.max == sre_parse.MAXREPEAT:
return "{childs}*".format(childs=childs)
return '{childs}*'.format(childs=childs)
if self.min == 1 and self.max == sre_parse.MAXREPEAT:
return "{childs}+".format(childs=childs)
return "{childs}{{{min},{max}}}".format(
childs=childs, min=self.min, max=self.max
)
return '{childs}+'.format(childs=childs)
return '{childs}{{{min},{max}}}'.format(childs=childs, min=self.min, max=self.max)


class BranchToken(Token):
@@ -436,17 +429,11 @@ def _parse(self):
self.childs = []
for token in self.token[1][1]:
if not token:
self.childs.append(
EmptyToken(token=token, parent=self.parent, regexp=self.regexp)
)
self.childs.append(EmptyToken(token=token, parent=self.parent, regexp=self.regexp))
elif isinstance(token, sre_parse.SubPattern):
self.childs.append(
InternalSubpatternToken(
token=token, parent=self.parent, regexp=self.regexp
)
)
self.childs.append(InternalSubpatternToken(token=token, parent=self.parent, regexp=self.regexp))
else:
raise RuntimeError("Unexpected token {0} in branch".format(token))
raise RuntimeError('Unexpected token {0} in branch'.format(token))

def can_contain(self, char, skip_literal=True):
for child in self.childs:
@@ -475,7 +462,7 @@ def generate(self, context):
return res

def __str__(self):
return "(?:{0})".format("|".join(str(x) for x in self.childs))
return '(?:{0})'.format('|'.join(str(x) for x in self.childs))


class SubpatternToken(Token):
@@ -509,11 +496,9 @@ def can_startswith(self, char, strict=False):
continue
return can
return False
elif not strict and not isinstance(
self.childs[0], (SubpatternToken, InternalSubpatternToken)
):
elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)):
# Not strict regexp w/o ^ can starts with any character
return char in CATEGORIES["ANY"]
return char in CATEGORIES['ANY']

for child in self.childs:
can = child.can_startswith(char, strict)
@@ -531,9 +516,7 @@ def must_startswith(self, char, strict=False):
continue
return must
return False
elif not strict and not isinstance(
self.childs[0], (SubpatternToken, InternalSubpatternToken)
):
elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)):
# Not strict regexp w/o ^ MAY NOT starts with any character
return False

@@ -552,10 +535,10 @@ def generate(self, context):
return _gen_combinator(res)

def __str__(self):
childs = "".join(str(x) for x in self.childs)
childs = ''.join(str(x) for x in self.childs)
if self.group is None:
return "(?:{childs})".format(childs=childs)
return "({childs})".format(childs=childs)
return '(?:{childs})'.format(childs=childs)
return '({childs})'.format(childs=childs)


class InternalSubpatternToken(Token):
@@ -586,11 +569,9 @@ def can_startswith(self, char, strict=False):
continue
return can
return False
elif not strict and not isinstance(
self.childs[0], (SubpatternToken, InternalSubpatternToken)
):
elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)):
# Not strict regexp w/o ^ can starts with any character
return char in CATEGORIES["ANY"]
return char in CATEGORIES['ANY']

for child in self.childs:
can = child.can_startswith(char, strict)
@@ -608,9 +589,7 @@ def must_startswith(self, char, strict=False):
continue
return must
return False
elif not strict and not isinstance(
self.childs[0], (SubpatternToken, InternalSubpatternToken)
):
elif not strict and not isinstance(self.childs[0], (SubpatternToken, InternalSubpatternToken)):
# Not strict regexp w/o ^ MAY NOT starts with any character
return False

@@ -629,7 +608,7 @@ def generate(self, context):
return _gen_combinator(res)

def __str__(self):
return "".join(str(x) for x in self.childs)
return ''.join(str(x) for x in self.childs)


class InToken(Token):
@@ -679,10 +658,7 @@ def _generate_negative(self, context):
elif isinstance(child, LiteralToken):
blacklisted.add(child.char)
elif isinstance(child, RangeToken):
blacklisted.update(
chr(c)
for c in six.moves.range(child.left_code, child.right_code + 1)
)
blacklisted.update(six.unichr(c) for c in six.moves.range(child.left_code, child.right_code + 1))
elif isinstance(child, CategoryToken):
blacklisted.update(child.char_list)
else:
@@ -704,7 +680,7 @@ def generate(self, context):
return self._generate_positive(context)

def __str__(self):
return "[{childs}]".format(childs="".join(str(x) for x in self.childs))
return '[{childs}]'.format(childs=''.join(str(x) for x in self.childs))


class AtToken(Token):
@@ -723,17 +699,17 @@ def must_contain(self, char):
def generate(self, context):
if context.anchored:
if self.begin:
return "^"
return '^'
if self.end:
return "$"
return '$'
return None

def __str__(self):
if self.begin:
return "^"
return '^'
if self.end:
return "$"
LOG.warning("unexpected AT token: %s", self.token)
return '$'
LOG.warning('unexpected AT token: %s', self.token)


class NegateToken(Token):
@@ -755,7 +731,7 @@ def generate(self, context):
return None

def __str__(self):
return "^"
return '^'


class GroupRefToken(Token):
@@ -781,7 +757,7 @@ def generate(self, context):
return self.group.generate(context)

def __str__(self):
return "\\\\{0}".format(self.id)
return '\\\\{0}'.format(self.id)


class AssertToken(Token):
@@ -837,10 +813,10 @@ def must_startswith(self, char, strict=False):
return None

def generate(self, context):
return ""
return ''

def __str__(self):
return ""
return ''


def parse(sre_obj, parent=None, regexp=None):
@@ -893,9 +869,7 @@ def __init__(self, char, max_repeat=5, strict=False, anchored=True):


class Regexp(object):
def __init__(
self, source, strict=False, case_sensitive=True, _root=None, _parsed=None
):
def __init__(self, source, strict=False, case_sensitive=True, _root=None, _parsed=None):
"""
Gixy Regexp class, parse and provide helpers to work with it.
@@ -925,7 +899,8 @@ def can_startswith(self, char):
"""

return self.root.can_startswith(
char=char if self.case_sensitive else char.lower(), strict=self.strict
char=char if self.case_sensitive else char.lower(),
strict=self.strict
)

def can_contain(self, char, skip_literal=True):
@@ -945,7 +920,7 @@ def can_contain(self, char, skip_literal=True):

return self.root.can_contain(
char=char if self.case_sensitive else char.lower(),
skip_literal=skip_literal,
skip_literal=skip_literal
)

def must_startswith(self, char):
@@ -962,7 +937,8 @@ def must_startswith(self, char):
"""

return self.root.must_startswith(
char=char if self.case_sensitive else char.lower(), strict=self.strict
char=char if self.case_sensitive else char.lower(),
strict=self.strict
)

def must_contain(self, char):
@@ -997,8 +973,8 @@ def generate(self, char, anchored=False, max_repeat=5):

context = GenerationContext(char, anchored=anchored, max_repeat=max_repeat)
for val in self.root.generate(context=context):
if anchored and self.strict and not val.startswith("^"):
yield "^" + val
if anchored and self.strict and not val.startswith('^'):
yield '^' + val
else:
yield val

@@ -1012,7 +988,7 @@ def group(self, name):

if name in self.groups:
return self.groups[name]
return Regexp("")
return Regexp('')

def reg_group(self, gid, token):
self._groups[gid] = token
@@ -1027,12 +1003,7 @@ def groups(self):
# for name, token in self._groups.items():
# result[name] = Regexp(str(self), root=token, strict=True, case_sensitive=self.case_sensitive)
for name, parsed in extract_groups(self.parsed).items():
result[name] = Regexp(
"compiled",
_parsed=parsed,
strict=True,
case_sensitive=self.case_sensitive,
)
result[name] = Regexp('compiled', _parsed=parsed, strict=True, case_sensitive=self.case_sensitive)
for name, group in self.parsed.pattern.groupdict.items():
result[name] = result[group]
return result
@@ -1053,11 +1024,9 @@ def parsed(self):
return self._parsed

try:
self._parsed = sre_parse.parse(
FIX_NAMED_GROUPS_RE.sub("(?P<\\1>", self.source)
)
self._parsed = sre_parse.parse(FIX_NAMED_GROUPS_RE.sub('(?P<\\1>', self.source))
except sre_parse.error as e:
LOG.fatal("Failed to parse regex: %s (%s)", self.source, str(e))
LOG.fatal('Failed to parse regex: %s (%s)', self.source, str(e))
raise e

return self._parsed
79 changes: 34 additions & 45 deletions gixy/core/sre_parse/sre_parse.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@
r"\r": (LITERAL, ord("\r")),
r"\t": (LITERAL, ord("\t")),
r"\v": (LITERAL, ord("\v")),
r"\\": (LITERAL, ord("\\")),
r"\\": (LITERAL, ord("\\"))
}

CATEGORIES = {
@@ -78,12 +78,8 @@ def opengroup(self, name=None):
if name is not None:
ogid = self.groupdict.get(name, None)
if ogid is not None:
raise error(
(
"redefinition of group name %s as group %d; "
"was group %d" % (repr(name), gid, ogid)
)
)
raise error(("redefinition of group name %s as group %d; "
"was group %d" % (repr(name), gid, ogid)))
self.groupdict[name] = gid
self.open.append(gid)
return gid
@@ -169,7 +165,6 @@ class Tokenizer:
def __init__(self, string):
self.string = string
self.index = 0
self.next = None
self.__next()

def __next(self):
@@ -240,13 +235,13 @@ def _class_escape(source, escape):
escape = escape[2:]
if len(escape) != 2:
raise error("bogus escape: %s" % repr("\\" + escape))
return LITERAL, int(escape, 16) & 0xFF
return LITERAL, int(escape, 16) & 0xff
elif c in OCTDIGITS:
# octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
escape = escape[1:]
return LITERAL, int(escape, 8) & 0xFF
return LITERAL, int(escape, 8) & 0xff
elif c in DIGITS:
raise error("bogus escape: %s" % repr(escape))
if len(escape) == 2:
@@ -272,37 +267,31 @@ def _escape(source, escape, state):
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
return LITERAL, int(escape[2:], 16) & 0xFF
return LITERAL, int(escape[2:], 16) & 0xff
elif c == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xFF
return LITERAL, int(escape[1:], 8) & 0xff
elif c in DIGITS:
# octal escape *or* decimal group reference (sigh)
if source.next in DIGITS:
escape = escape + source.get()
if (
escape[1] in OCTDIGITS
and escape[2] in OCTDIGITS
and source.next in OCTDIGITS
):
if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
return LITERAL, int(escape[1:], 8) & 0xFF
return LITERAL, int(escape[1:], 8) & 0xff
# not an octal escape, so this is a group reference
group = int(escape[1:])
if group < state.groups:
if not state.checkgroup(group):
raise error("cannot refer to open group")
if state.lookbehind:
import warnings

warnings.warn(
"group references in lookbehind "
"assertions are not supported",
RuntimeWarning,
)
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
return GROUPREF, group
raise ValueError
if len(escape) == 2:
@@ -390,7 +379,7 @@ def _parse_sub_cond(source, state, condgroup):
_PATTERNENDERS = set("|)")
_ASSERTCHARS = set("=!<")
_LOOKBEHINDASSERTCHARS = set("=!")
_REPEATCODES = {MIN_REPEAT, MAX_REPEAT}
_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])


def _parse(source, state):
@@ -565,7 +554,8 @@ def _parse(source, state):
if not name:
raise error("missing group name")
if not isname(name):
raise error("bad character in group name %r" % name)
raise error("bad character in group name %r" %
name)
elif sourcematch("="):
# named backreference
name = ""
@@ -579,21 +569,17 @@ def _parse(source, state):
if not name:
raise error("missing group name")
if not isname(name):
raise error(
"bad character in backref group name " "%r" % name
)
raise error("bad character in backref group name "
"%r" % name)
gid = state.groupdict.get(name)
if gid is None:
msg = "unknown group name: {0!r}".format(name)
raise error(msg)
if state.lookbehind:
import warnings

warnings.warn(
"group references in lookbehind "
"assertions are not supported",
RuntimeWarning,
)
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
subpatternappend((GROUPREF, gid))
continue
else:
@@ -658,12 +644,9 @@ def _parse(source, state):
raise error("bad character in group name")
if state.lookbehind:
import warnings

warnings.warn(
"group references in lookbehind "
"assertions are not supported",
RuntimeWarning,
)
warnings.warn('group references in lookbehind '
'assertions are not supported',
RuntimeWarning)
else:
# flags
if not source.next in FLAGS:
@@ -754,6 +737,11 @@ def literal(literal, p=p, pappend=a):
else:
pappend((LITERAL, literal))

sep = source[:0]
if type(sep) is type(""):
makechar = chr
else:
makechar = unichr
while 1:
this = sget()
if this is None:
@@ -791,20 +779,21 @@ def literal(literal, p=p, pappend=a):
this = this + sget()
if s.next in OCTDIGITS:
this = this + sget()
literal(chr(int(this[1:], 8) & 0xFF))
literal(makechar(int(this[1:], 8) & 0xff))
elif c in DIGITS:
isoctal = False
if s.next in DIGITS:
this = this + sget()
if c in OCTDIGITS and this[2] in OCTDIGITS and s.next in OCTDIGITS:
if (c in OCTDIGITS and this[2] in OCTDIGITS and
s.next in OCTDIGITS):
this = this + sget()
isoctal = True
literal(chr(int(this[1:], 8) & 0xFF))
literal(makechar(int(this[1:], 8) & 0xff))
if not isoctal:
a((MARK, int(this[1:])))
else:
try:
this = chr(ESCAPES[this][1])
this = makechar(ESCAPES[this][1])
except KeyError:
pass
literal(this)
24 changes: 12 additions & 12 deletions gixy/plugins/add_header_content_type.py
Original file line number Diff line number Diff line change
@@ -7,34 +7,34 @@ class add_header_content_type(Plugin):
Bad example: add_header Content-Type text/plain;
Good example: default_type text/plain;
"""

summary = "Found add_header usage for setting Content-Type."
summary = 'Found add_header usage for setting Content-Type.'
severity = gixy.severity.LOW
description = 'Target Content-Type in NGINX should not be set via "add_header"'
help_url = "https://github.com/dvershinin/gixy/blob/master/docs/en/plugins/add_header_content_type.md"
directives = ["add_header"]
help_url = 'https://github.com/dvershinin/gixy/blob/master/docs/en/plugins/add_header_content_type.md'
directives = ['add_header']

def audit(self, directive):
header_values = get_header_values(directive)
if directive.header == "content-type":
reason = 'You probably want "default_type {default_type};" instead of "add_header" or "more_set_headers"'.format(
default_type=header_values[0]
if directive.header == 'content-type':
reason = 'You probably want "default_type {default_type};" instead of "add_header" or "more_set_headers"'.format(default_type=header_values[0])
self.add_issue(
directive=directive,
reason=reason
)
self.add_issue(directive=directive, reason=reason)


def get_header_values(directive):
if directive.name == "add_header":
if directive.name == 'add_header':
return [directive.args[1]]

# See headers more documentation: https://github.com/openresty/headers-more-nginx-module#description
result = []
skip_next = False
for arg in directive.args:
if arg in ["-s", "-t"]:
# Skip the next value because it's not a header
if arg in ['-s', '-t']:
# Skip next value, because it's not a header
skip_next = True
elif arg.startswith("-"):
elif arg.startswith('-'):
# Skip any options
pass
elif skip_next:

0 comments on commit f1d0361

Please sign in to comment.