Skip to content

Commit

Permalink
test: setup test environment for source code analyzer
Browse files Browse the repository at this point in the history
  • Loading branch information
art1f1c3R committed Feb 3, 2025
1 parent c87c685 commit 1d8039b
Show file tree
Hide file tree
Showing 12 changed files with 704 additions and 33 deletions.
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

# See https://pre-commit.com for more information
Expand Down Expand Up @@ -64,6 +64,7 @@ repos:
name: Check flake8 issues
files: ^src/macaron/|^tests/
types: [text, python]
exclude: ^tests/malware_analyzer/pypi/resources/sourcecode_samples.*
additional_dependencies: [flake8-bugbear==22.10.27, flake8-builtins==2.0.1, flake8-comprehensions==3.10.1, flake8-docstrings==1.6.0, flake8-mutable==1.2.0, flake8-noqa==1.3.0, flake8-pytest-style==1.6.0, flake8-rst-docstrings==0.3.0, pep8-naming==0.13.2]
args: [--config, .flake8]

Expand Down Expand Up @@ -94,6 +95,7 @@ repos:
language: python
files: ^src/macaron/|^tests/
types: [text, python]
exclude: ^tests/malware_analyzer/pypi/resources/sourcecode_samples.*
args: [--show-traceback, --config-file, pyproject.toml]

# Check for potential security issues.
Expand Down
1 change: 1 addition & 0 deletions .semgrepignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Items added to this file will be ignored by Semgrep.
11 changes: 6 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,14 @@ Issues = "https://github.com/oracle/macaron/issues"
[tool.bandit]
tests = []
skips = ["B101"]

exclude_dirs = ['tests/malware_analyzer/pypi/resources/sourcecode_samples']

# https://github.com/psf/black#configuration
[tool.black]
line-length = 120

force-exclude = '''
tests/malware_analyzer/pypi/resources/sourcecode_samples/
'''

# https://github.com/commitizen-tools/commitizen
# https://commitizen-tools.github.io/commitizen/bump/
Expand Down Expand Up @@ -172,7 +174,6 @@ exclude = [
"SECURITY.md",
]


# https://pycqa.github.io/isort/
[tool.isort]
profile = "black"
Expand All @@ -183,7 +184,6 @@ skip_gitignore = true

# https://mypy.readthedocs.io/en/stable/config_file.html#using-a-pyproject-toml
[tool.mypy]
# exclude=
show_error_codes = true
show_column_numbers = true
check_untyped_defs = true
Expand All @@ -210,7 +210,6 @@ module = [
]
ignore_missing_imports = true


# https://pylint.pycqa.org/en/latest/user_guide/configuration/index.html
[tool.pylint.MASTER]
fail-under = 10.0
Expand Down Expand Up @@ -241,6 +240,7 @@ disable = [
"too-many-statements",
"duplicate-code",
]
ignore-paths = "tests/malware_analyzer/pypi/resources/sourcecode_samples"

[tool.pylint.MISCELLANEOUS]
notes = [
Expand All @@ -262,6 +262,7 @@ addopts = """-vv -ra --tb native \
--doctest-modules --doctest-continue-on-failure --doctest-glob '*.rst' \
--cov macaron \
--ignore tests/integration \
--ignore tests/malware_analyzer/pypi/resources/sourcecode_samples \
""" # Consider adding --pdb
# https://docs.python.org/3/library/doctest.html#option-flags
doctest_optionflags = "IGNORE_EXCEPTION_DETAIL"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ class PyPISourcecodeAnalyzer:

EXPECTED_PATTERN_CATEGORIES = [IMPORTS, CONSTANTS, CALLS]

def __init__(self) -> None:
def __init__(self, resources_path: str = global_config.resources_path) -> None:
"""Collect required data for analysing the source code."""
self.default_rule_path, self.custom_rule_path = self._load_defaults()
self.default_rule_path, self.custom_rule_path = self._load_defaults(resources_path)

def _load_defaults(self) -> tuple[str, str | None]:
def _load_defaults(self, resources_path: str) -> tuple[str, str | None]:
"""
Load the default semgrep rules and, if present, the custom semgrep rules provided by the user.
Expand All @@ -72,9 +72,15 @@ def _load_defaults(self) -> tuple[str, str | None]:
Raises
------
ConfigurationError
If the heuristic.pypi entry is not present, or if the semgrep validation of the custom rule path failed.
If the default rule path is invalid, the heuristic.pypi entry is not present, or if the semgrep
validation of the custom rule path failed.
"""
default_rule_path = os.path.join(global_config.resources_path, "pypi_malware_rules")
default_rule_path = os.path.join(resources_path, "pypi_malware_rules")
if not os.path.exists(default_rule_path):
error_msg = f"Error with locating default rule path {default_rule_path}"
logger.debug(error_msg)
raise ConfigurationError(error_msg)

section_name = "heuristic.pypi"

if defaults.has_section(section_name):
Expand Down Expand Up @@ -112,7 +118,7 @@ def _load_defaults(self) -> tuple[str, str | None]:
logger.debug("Including custom ruleset from %s.", custom_rule_path)
return default_rule_path, custom_rule_path

def analyze_patterns(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
"""Analyze the source code of the package for malicious patterns.
This is the first phase of the source code analyzer.
Expand Down
52 changes: 51 additions & 1 deletion src/macaron/resources/pypi_malware_rules/exfiltration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
rules:
- id: remote-exfiltration
metadata:
description: Detected the exfiltration of data to a remote endpoint
description: Identifies the flow of sensitive information to a remote endpoint.
message: Detected exfiltration of sensitive data to a remote endpoint.
languages:
- python
Expand All @@ -23,6 +23,43 @@ rules:
- pattern: __import__('builtins').exec(...)
- pattern: __import__('builtins').eval(...)

# process spawning
# using subprocess module
- pattern: subprocess.check_output(...)
- pattern: subprocess.check_call(...)
- pattern: subprocess.run(...)
- pattern: subprocess.call(...)
- pattern: subprocess.Popen(...)
- pattern: subprocess.getoutput(...)
- pattern: subprocess.getstatusoutput(...)
# using os module
- pattern: os.execl(...)
- pattern: os.execle(...)
- pattern: os.execlp(...)
- pattern: os.execlpe(...)
- pattern: os.execv(...)
- pattern: os.execve(...)
- pattern: os.execvp(...)
- pattern: os.execvpe(...)
- pattern: os.popen(...)
- pattern: os.posix_spawn(...)
- pattern: os.posix_spawnp(...)
- pattern: os.spawnl(...)
- pattern: os.spawnle(...)
- pattern: os.spawnlp(...)
- pattern: os.spawnlpe(...)
- pattern: os.spawnv(...)
- pattern: os.spawnve(...)
- pattern: os.spawnvp(...)
- pattern: os.spawnvpe(...)
- pattern: os.system(...)
# using commands module
- pattern: commands.getstatusoutput(...)
- pattern: commands.getoutput(...)
# using runpy module
- pattern: runpy.run_module(...)
- pattern: runpy.run_path(...)

# environment variables
- pattern: os.environ
- pattern: os.environ[...]
Expand Down Expand Up @@ -84,6 +121,19 @@ rules:
- pattern: winreg.QueryInfoKey(...)
- pattern: winreg.QueryValue(...)
- pattern: winreg.QueryValueEx(...)
- pattern: sqlite3.connect(...)

# file exfiltration
- patterns:
- pattern: open($FILE, $MODE)
- metavariable-regex:
metavariable: $MODE
regex: r|rt|r+|w+|rb|r+b|w+b|a+|a+b
- patterns:
- pattern: os.open($FILE, $MODE)
- metavariable-regex:
metavariable: $MODE
regex: os\.O_RDONLY|os\.O_RDWR

pattern-sinks:
- pattern-either:
Expand Down
34 changes: 15 additions & 19 deletions src/macaron/resources/pypi_malware_rules/obfuscation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 +67,26 @@ rules:
- pattern: __import__('__pyarmor__')
# pyarmor RTF mode: pyarmor.readthedocs.io/en/latest/tutorial/advanced.html
- pattern: __assert_armored__($PAYLOAD)
- patterns:
- pattern: |
def $FUNC_NAME(...):
...
- metavariable-regex:
metavariable: $FUNC_NAME
regex: ^pyarmor__\d+$
# inline pyarmor marker: pyarmor.readthedocs.io/en/latest/tutorial/advanced.html
- pattern-regex: ^# pyarmor:.?
- pattern-regex: ^\s*#\s*pyarmor:.*
# obfuscated names using pyob.oxyry.com with O, o, 0 or github.com/QQuick/Opy and pyobfuscate using l, I, 1
- patterns:
- pattern: |
def $OBF(...):
...
- pattern: |
class $OBF(...):
...
- pattern: $OBF = ...
- pattern-either:
- pattern: |
def $OBF(...):
...
- pattern: |
class $OBF(...):
...
- pattern: $OBF = ...
- metavariable-regex:
metavariable: $OBF
regex: (^_?[Oo0]|[1Il]+$)
regex: (^_*([lI1_]{5,}|[Oo0_]{5,})_*$)|(^pyarmor_*\d+$)
# obfuscated using pyobfuscate.com
- pattern: pyobfuscate=...
# obfuscated using liftoff.github.io/pyminifier
- pattern: import mystificate
- pattern: import demiurgic

- id: inline-imports
metadata:
Expand Down Expand Up @@ -134,9 +129,10 @@ rules:
- pattern: bytes.fromhex(...)
# unicode construction
- patterns:
- pattern: $STRING.join(map($FOO, [...]))
- pattern: $STRING.join($FOO($VAL) for $VAL in [...])
- pattern: $STRING.join($FOO($VAL) for $VAL in $GEN(...))
- pattern-either:
- pattern: $STRING.join(map($FOO, [...]))
- pattern: $STRING.join($FOO($VAL) for $VAL in [...])
- pattern: $STRING.join($FOO($VAL) for $VAL in $GEN(...))
- metavariable-regex:
metavariable: $FOO
regex: unicode|unichr|chr|ord
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def analyze_source(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[Heuri
logger.debug("Instantiating %s", PyPISourcecodeAnalyzer.__name__)
try:
sourcecode_analyzer = PyPISourcecodeAnalyzer()
return sourcecode_analyzer.analyze_patterns(pypi_package_json)
return sourcecode_analyzer.analyze(pypi_package_json)
except (ConfigurationError, HeuristicAnalyzerValueError) as source_code_error:
logger.debug("Unable to perform source code analysis: %s", source_code_error)
return HeuristicResult.SKIP, {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""
Running this code will not produce any malicious behavior, but code isolation measures are
in place for safety.
"""

import sys

# ensure no symbols are exported so this code cannot accidentally be used
__all__ = []
sys.exit()

def test_function():
"""
All code to be tested will be defined inside this function, so it is all local to it. This is
to isolate the code to be tested, as it exists to replicate the patterns present in malware
samples.
"""
sys.exit()
# marshal encryption from pyobfuscate.com/marshal-encrypt, script is just print("Hello world!")

from marshal import loads
bytecode = loads(b'\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0c\x00\x00\x00e\x00d\x00\x83\x01\x01\x00d\x01S\x00)\x02z\x0cHello world!N)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x08<string>\xda\x08<module>\x01\x00\x00\x00\xf3\x00\x00\x00\x00')
exec(bytecode)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""
Running this code will not produce any malicious behavior, but code isolation measures are
in place for safety.
"""

import sys

# ensure no symbols are exported so this code cannot accidentally be used
__all__ = []
sys.exit()

def test_function():
"""
All code to be tested will be defined inside this function, so it is all local to it. This is
to isolate the code to be tested, as it exists to replicate the patterns present in malware
samples.
"""
sys.exit()
import builtins
_ = __import__
_ = getattr
_ = bytes
_ = bytearray
_ = exec
_ = eval
_ = setattr
_ = compile
_ = map
_ = open
_ = zip
_ = vars
_ = dir
_ = builtins.__import__
_ = builtins.getattr
_ = builtins.bytes
_ = builtins.bytearray
_ = builtins.exec
_ = builtins.eval
_ = builtins.setattr
_ = builtins.compile
_ = builtins.map
_ = builtins.open
_ = builtins.zip
_ = builtins.vars
_ = builtins.dir
_ = __import__('builtins').__import__
_ = __import__('builtins').getattr
_ = __import__('builtins').bytes
_ = __import__('builtins').bytearray
_ = __import__('builtins').exec
_ = __import__('builtins').eval
_ = __import__('builtins').setattr
_ = __import__('builtins').compile
_ = __import__('builtins').builtins.map
_ = __import__('builtins').open
_ = __import__('builtins').zip
_ = __import__('builtins').vars
_ = __import__('builtins').dir
Loading

0 comments on commit 1d8039b

Please sign in to comment.