diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 24cf391105e..1df8fd5c0a3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -37,7 +37,7 @@ jobs: poetry run pip install git+https://github.com/CAPESandbox/pyattck maco - name: Run Ruff - run: poetry run ruff check . --line-length 132 --ignore E501,E402 + run: poetry run ruff check . --output-format=github . - name: Run unit tests run: poetry run python -m pytest --import-mode=append @@ -63,13 +63,6 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Format with black - run: poetry run black . - - # to be replaced with ruff - - name: Format imports with isort - run: poetry run isort . - - name: Commit changes if any # Skip this step if being run by nektos/act if: ${{ !env.ACT }} @@ -77,6 +70,8 @@ jobs: git config user.name "GitHub Actions" git config user.email "action@github.com" if output=$(git status --porcelain) && [ ! -z "$output" ]; then + git pull + git add . git commit -m "style: Automatic code formatting" -a git push fi diff --git a/agent/agent.py b/agent/agent.py index 987b597940c..eeeb9cdfa10 100644 --- a/agent/agent.py +++ b/agent/agent.py @@ -227,7 +227,6 @@ def handle(self, obj): self.close_connection = True def shutdown(self): - # BaseServer also features a .shutdown() method, but you can't use # that from the same thread as that will deadlock the whole thing. if hasattr(self, "s"): diff --git a/analyzer/linux/analyzer.py b/analyzer/linux/analyzer.py index 6add3119edc..28c78bf335c 100644 --- a/analyzer/linux/analyzer.py +++ b/analyzer/linux/analyzer.py @@ -71,7 +71,7 @@ def monitor_new_processes(parent_pid, interval=0.25): new_processes = current_processes - known_processes for pid in new_processes: - log.info(f"New child process detected: {pid}") + log.info("New child process detected: %s", str(pid)) dump_memory(pid) add_pids(pid) # Add the new process to PROCESS_LIST @@ -118,20 +118,20 @@ def dump_memory(pid): chunk = mem_file.read(end - start) output_file.write(chunk) except (OSError, ValueError) as e: - log.error(f"Could not read memory range {start:x}-{end:x}: {e}") + log.error("Could not read memory range %s: {e}", f"{start:x}-{end:x}", str(e)) maps_file.close() mem_file.close() output_file.close() except FileNotFoundError: - log.error(f"Process with PID {pid} not found.") + log.error("Process with PID %s not found.", str(pid)) except PermissionError: - log.error(f"Permission denied to access process with PID {pid}.") + log.error("Permission denied to access process with PID %s.", str(pid)) if os.path.exists(f"{MEM_PATH}/{pid}.dmp"): upload_to_host(f"{MEM_PATH}/{pid}.dmp", f"memory/{pid}.dmp") DUMPED_LIST.add(pid) else: - log.error(f"Memdump file not found in guest machine for PID {pid}") + log.error("Memdump file not found in guest machine for PID %s", str(pid)) class Analyzer: diff --git a/analyzer/linux/lib/api/screenshot.py b/analyzer/linux/lib/api/screenshot.py index 2273e6b2ade..eb93756781c 100644 --- a/analyzer/linux/lib/api/screenshot.py +++ b/analyzer/linux/lib/api/screenshot.py @@ -139,7 +139,7 @@ async def is_gnome(self): log.info("Detected non-Gnome desktop environment.") else: self._is_gnome = True - log.info(f"Detected Gnome version {version}") + log.info("Detected Gnome version %s", str(version)) name = "org.gnome.Screenshot" resp = await self.bus.request_name(name) if resp not in ( @@ -205,8 +205,8 @@ async def take_screenshot_gnome(self): "http://www.freedesktop.org/standards/dbus/1.0/introspect.dtd"> - - + + @@ -260,7 +260,7 @@ async def handler(response, results): if response == 0: await queue.put(urllib.parse.urlparse(results["uri"].value).path) else: - log.warning(f"Received non-zero response when taking screenshot: {response}") + log.warning("Received non-zero response when taking screenshot: %s", str(response)) await queue.put(None) # Set up the signal handler diff --git a/analyzer/linux/lib/common/results.py b/analyzer/linux/lib/common/results.py index 365b089fa7d..b43ca019d05 100644 --- a/analyzer/linux/lib/common/results.py +++ b/analyzer/linux/lib/common/results.py @@ -38,7 +38,7 @@ def upload_to_host(file_path, dump_path, pids="", ppids="", metadata="", categor nc.send(buf, retry=True) buf = infd.read(BUFSIZE) except Exception as e: - log.error("Exception uploading file %s to host: %s", file_path, e, exc_info=True) + log.exception("Exception uploading file %s to host: %s", file_path, e) finally: if nc: nc.close() diff --git a/analyzer/linux/modules/auxiliary/filecollector.py b/analyzer/linux/modules/auxiliary/filecollector.py index c68da449ce5..83fe88f4987 100755 --- a/analyzer/linux/modules/auxiliary/filecollector.py +++ b/analyzer/linux/modules/auxiliary/filecollector.py @@ -51,7 +51,6 @@ def __init__(self, options, config): self.thread.join(0.5) def run(self): - if not HAVE_PYINOTIFY: log.info("Missed dependency: pip3 install pyinotify") return False diff --git a/analyzer/linux/modules/auxiliary/screenshots.py b/analyzer/linux/modules/auxiliary/screenshots.py index 36f25818e7b..b1ef4c83100 100644 --- a/analyzer/linux/modules/auxiliary/screenshots.py +++ b/analyzer/linux/modules/auxiliary/screenshots.py @@ -11,6 +11,7 @@ if HAVE_PIL and HAVE_DBUS_NEXT: from PIL import Image + from lib.api.screenshot import Screenshot, ScreenshotGrabber, ScreenshotsUnsupported from lib.common.abstracts import Auxiliary diff --git a/analyzer/linux/modules/packages/zip.py b/analyzer/linux/modules/packages/zip.py index 0cc17c6b775..20e475b7071 100644 --- a/analyzer/linux/modules/packages/zip.py +++ b/analyzer/linux/modules/packages/zip.py @@ -17,7 +17,6 @@ class Zip(Package): - real_package = None def prepare(self): diff --git a/analyzer/windows/analyzer.py b/analyzer/windows/analyzer.py index deb7a583b87..362b46be381 100644 --- a/analyzer/windows/analyzer.py +++ b/analyzer/windows/analyzer.py @@ -691,7 +691,7 @@ def analysis_loop(self, aux_modules): try: Process(pid=pid).upload_memdump() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) log.info("Process with pid %s appears to have terminated", pid) if pid in self.process_list.pids: self.process_list.remove_pid(pid) @@ -915,7 +915,7 @@ def dump_file(self, filepath, metadata="", pids="", ppids="", category="files"): except (IOError, socket.error) as e: log.error('Unable to upload dropped file at path "%s": %s', filepath, e) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) def delete_file(self, filepath, pid=None): """A file is about to removed and thus should be dumped right away.""" @@ -1508,8 +1508,7 @@ def dispatch(self, data): try: response = fn(arguments) except Exception as e: - log.error(e, exc_info=True) - log.exception("Pipe command handler exception occurred (command %s args %s)", command, arguments) + log.exception("Pipe command handler exception occurred (command %s args %s). %s", command, arguments, str(e)) return response @@ -1536,7 +1535,7 @@ def dispatch(self, data): # When user set wrong package, Example: Emotet package when submit doc, package only is for EXE! except CuckooError: - log.info("You probably submitted the job with wrong package", exc_info=True) + log.exception("You probably submitted the job with wrong package") data["status"] = "exception" data["description"] = "You probably submitted the job with wrong package" try: diff --git a/analyzer/windows/lib/api/process.py b/analyzer/windows/lib/api/process.py index e4b2b6f7592..c9ad9fa898f 100644 --- a/analyzer/windows/lib/api/process.py +++ b/analyzer/windows/lib/api/process.py @@ -43,13 +43,13 @@ CAPEMON64_NAME, LOADER32_NAME, LOADER64_NAME, - TTD32_NAME, - TTD64_NAME, LOGSERVER_PREFIX, PATHS, PIPE, SHUTDOWN_MUTEX, TERMINATE_EVENT, + TTD32_NAME, + TTD64_NAME, ) from lib.common.defines import ( KERNEL32, @@ -601,7 +601,6 @@ def is_64bit(self): return False def write_monitor_config(self, interest=None, nosleepskip=False): - config_path = os.path.join(Path.cwd(), "dll", f"{self.pid}.ini") log.info("Monitor config for %s: %s", self, config_path) @@ -759,7 +758,7 @@ def upload_memdump(self): try: upload_to_host(file_path, os.path.join("memory", f"{self.pid}.dmp"), category="memory") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) log.error(os.path.join("memory", f"{self.pid}.dmp")) log.error(file_path) log.info("Memory dump of %s uploaded", self) diff --git a/analyzer/windows/lib/common/results.py b/analyzer/windows/lib/common/results.py index b6983a52f7d..b552bbe1c79 100644 --- a/analyzer/windows/lib/common/results.py +++ b/analyzer/windows/lib/common/results.py @@ -61,7 +61,7 @@ def upload_to_host(file_path, dump_path, pids="", ppids="", metadata="", categor size -= read_size buf = infd.read(BUFSIZE) except Exception as e: - log.error("Exception uploading file %s to host: %s", file_path, e, exc_info=True) + log.exception("Exception uploading file %s to host: %s", file_path, e) def upload_buffer_to_host(buffer, dump_path, filepath=False, pids="", ppids="", metadata="", category="", duplicated=False): diff --git a/analyzer/windows/lib/common/zip_utils.py b/analyzer/windows/lib/common/zip_utils.py index 0bccb6628f9..7ea21702a6a 100644 --- a/analyzer/windows/lib/common/zip_utils.py +++ b/analyzer/windows/lib/common/zip_utils.py @@ -1,16 +1,12 @@ import hashlib import logging import os +import re import shutil import subprocess from pathlib import Path from zipfile import BadZipfile, ZipFile -try: - import re2 as re -except ImportError: - import re - from lib.common.constants import OPT_MULTI_PASSWORD from lib.common.exceptions import CuckooPackageError from lib.common.hashing import hash_file @@ -61,7 +57,7 @@ def extract_archive(seven_zip_path, archive_path, extract_path, password="infect stdout=subprocess.PIPE, ) stdoutput, stderr = p.stdout, p.stderr - log.debug(f"{p.stdout} {p.stderr}") + log.debug("%s %s", p.stdout, p.stderr) if try_multiple_passwords: passwords = password.split(":") @@ -85,9 +81,9 @@ def extract_archive(seven_zip_path, archive_path, extract_path, password="infect stdout=subprocess.PIPE, ) stdoutput, stderr = p.stdout, p.stderr - log.debug(f"{p.stdout} {p.stderr}") + log.debug("%s - %s", p.stdout, p.stderr) if b"Wrong password" in stderr: - log.debug(f"The provided password '{pword}' was incorrect") + log.debug("The provided password '%s' was incorrect", str(pword)) continue else: # We did it! @@ -196,7 +192,7 @@ def extract_zip(zip_path, extract_path, password=b"infected", recursion_depth=1, raise CuckooPackageError("Invalid Zip file") from e except RuntimeError as e: if "Bad password for file" in repr(e): - log.debug(f"Password '{pword}' was unsuccessful in extracting the archive.") + log.debug("Password '%s' was unsuccessful in extracting the archive.", str(pword)) password_fail = True continue else: @@ -204,7 +200,7 @@ def extract_zip(zip_path, extract_path, password=b"infected", recursion_depth=1, try: archive.extractall(path=extract_path, pwd=pword) except RuntimeError as e: - raise CuckooPackageError(f"Unable to extract Zip file: {e}") from e + raise CuckooPackageError("Unable to extract Zip file: %s", str(e)) from e finally: if recursion_depth < 4: # Extract nested archives. @@ -228,7 +224,7 @@ def extract_zip(zip_path, extract_path, password=b"infected", recursion_depth=1, log.error("Error extracting nested Zip file %s with details: %s", name, run_err) if password_fail: - raise CuckooPackageError(f"Unable to extract password-protected Zip file with the password(s): {passwords}") + raise CuckooPackageError("Unable to extract password-protected Zip file with the password(s): %s", str(passwords)) def is_overwritten(zip_path): @@ -265,7 +261,7 @@ def winrar_extractor(winrar_binary, extract_path, archive_path): stdout=subprocess.PIPE, ) # stdoutput, stderr = p.stdout, p.stderr - log.debug(p.stdout + p.stderr) + log.debug("%s - %s", p.stdout, p.stderr) return os.listdir(extract_path) @@ -290,11 +286,11 @@ def upload_extracted_files(root, files_at_root): for entry in files_at_root: try: file_path = os.path.join(root, entry) - log.info("Uploading {0} to host".format(file_path)) + log.info("Uploading %s to host", str(file_path)) filename = f"files/{hash_file(hashlib.sha256, file_path)}" upload_to_host(file_path, filename, metadata=Path(entry).name, duplicated=False) except Exception as e: - log.warning(f"Couldn't upload file {Path(entry).name} to host {e}") + log.warning("Couldn't upload file %s to host %s", str(Path(entry).name), str(e)) def attempt_multiple_passwords(options: dict, password: str) -> bool: diff --git a/analyzer/windows/lib/core/pipe.py b/analyzer/windows/lib/core/pipe.py index c8fecc6aba5..c5f399ae3e2 100644 --- a/analyzer/windows/lib/core/pipe.py +++ b/analyzer/windows/lib/core/pipe.py @@ -224,7 +224,7 @@ def stop(self): if h.is_alive(): h.stop() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) def disconnect_pipes(): diff --git a/analyzer/windows/modules/auxiliary/amsi.py b/analyzer/windows/modules/auxiliary/amsi.py index 2d361ea95e4..05750811be8 100644 --- a/analyzer/windows/modules/auxiliary/amsi.py +++ b/analyzer/windows/modules/auxiliary/amsi.py @@ -30,7 +30,6 @@ import logging import sys import threading -import traceback import uuid logger = logging.getLogger(__name__) @@ -945,7 +944,7 @@ def _unpackSimpleType(self, record, info, event_property): # if there is no data remaining then return if user_data_remaining <= 0: - logger.warning("No more user data left, returning none for field {:s}".format(name_field)) + logger.warning("No more user data left, returning none for field %s", str(name_field)) return {name_field: None} in_type = event_property.epi_u1.nonStructType.InType @@ -986,7 +985,7 @@ def _unpackSimpleType(self, record, info, event_property): if status != ERROR_SUCCESS: # We can handle this error and still capture the data. - logger.warning("Failed to get data field data for {:s}, incrementing by reported size".format(name_field)) + logger.warning("Failed to get data field data for %s, incrementing by reported size", str(name_field)) self.index += property_length return {name_field: None} @@ -1135,7 +1134,7 @@ def _processEvent(self, record): if record.contents.EventHeader.Flags & EVENT_HEADER_FLAG_EXTENDED_INFO: parsed_data["EventExtendedData"] = self._parseExtendedData(record) except Exception as e: - logger.warning("Unable to parse event: {}".format(e)) + logger.warning("Unable to parse event: %s", str(e)) try: out.update(parsed_data) @@ -1143,8 +1142,7 @@ def _processEvent(self, record): if self.event_callback: self.event_callback(out) except Exception as e: - logger.error("Exception during callback: {}".format(e)) - logger.error(traceback.format_exc()) + logger.exception("Exception during callback: %s", str(e)) class TraceProperties: @@ -1170,7 +1168,7 @@ def __init__(self, event_callback=None): raise OSError("AMSI not supported on this platform") from err self.provider = None self.properties = TraceProperties() - self.session_name = "{:s}".format(str(uuid.uuid4())) + self.session_name = str(uuid.uuid4()) self.running = False self.event_callback = event_callback self.trace_logfile = None diff --git a/analyzer/windows/modules/auxiliary/browsermonitor.py b/analyzer/windows/modules/auxiliary/browsermonitor.py index 6989f190b20..4e0ce43a16d 100644 --- a/analyzer/windows/modules/auxiliary/browsermonitor.py +++ b/analyzer/windows/modules/auxiliary/browsermonitor.py @@ -35,7 +35,7 @@ def _find_browser_extension(self): for directory in temp_dir_list: # TOR Browser saves directly to %temp% if directory.startswith("bext_") and directory.endswith(".json"): - log.debug(f"Found extension logs: {self.browser_logfile}") + log.debug("Found extension logs: %s", self.browser_logfile) self.browser_logfile = os.path.join(temp_dir, directory) break tmp_directory_path = os.path.join(temp_dir, directory) @@ -47,7 +47,7 @@ def _find_browser_extension(self): for file in tmp_dir_files: if file.startswith("bext_") and file.endswith(".json"): self.browser_logfile = os.path.join(temp_dir, directory, file) - log.debug(f"Found extension logs: {self.browser_logfile}") + log.debug("Found extension logs: %s", self.browser_logfile) break time.sleep(1) diff --git a/analyzer/windows/modules/auxiliary/disguise.py b/analyzer/windows/modules/auxiliary/disguise.py index 25ce5f5bbbd..4d8c2d0db7f 100644 --- a/analyzer/windows/modules/auxiliary/disguise.py +++ b/analyzer/windows/modules/auxiliary/disguise.py @@ -248,7 +248,7 @@ def add_persistent_route(self, gateway: str): def start(self): if self.config.windows_static_route: - log.info(f"Config for route is: {str(self.config.windows_static_route)}") + log.info("Config for route is: %s", str(self.config.windows_static_route)) self.add_persistent_route(self.config.windows_static_route_gateway) self.change_productid() self.set_office_mrus() diff --git a/analyzer/windows/modules/auxiliary/dns_etw.py b/analyzer/windows/modules/auxiliary/dns_etw.py index 4c52da10b60..093d50f7288 100644 --- a/analyzer/windows/modules/auxiliary/dns_etw.py +++ b/analyzer/windows/modules/auxiliary/dns_etw.py @@ -22,8 +22,8 @@ HAVE_ETW = True except ImportError as e: log.debug( - f"Could not load auxiliary module DNS_ETW due to '{e}'\nIn order to use DNS_ETW functionality, it " - "is required to have pywintrace setup in python" + "Could not load auxiliary module DNS_ETW due to '%s'\nIn order to use DNS_ETW functionality, it " + "is required to have pywintrace setup in python", str(e) ) __author__ = "[Canadian Centre for Cyber Security] @CybercentreCanada" @@ -43,7 +43,6 @@ def encode(data, encoding="utf-8"): if HAVE_ETW: class ETW_provider(ETW): - def __init__( self, ring_buf_size=1024, @@ -120,7 +119,7 @@ def on_event(self, event_tufo): if event_id not in self.event_id_filters: return if self.no_conout is False: - log.info("{:d} ({:s})\n{:s}\n".format(event_id, event["Task Name"], pprint.pformat(encode(event)))) + log.info("%d (%s)\n%s\n", event_id, event["Task Name"], pprint.pformat(encode(event))) if event["QueryName"] in SAFELIST: return # Event 3010 query @@ -227,5 +226,5 @@ def stop(self): # file_name = file_path_list[-1] # process = file_path_list[-2] dumppath = os.path.join("DNS_ETW", "etw_dns.json") - log.debug("DNS_ETW Aux Module is uploading %s" % f) + log.debug("DNS_ETW Aux Module is uploading %s", f) upload_to_host(f, dumppath) diff --git a/analyzer/windows/modules/auxiliary/evtx.py b/analyzer/windows/modules/auxiliary/evtx.py index 41f47c3d16f..1899812f736 100644 --- a/analyzer/windows/modules/auxiliary/evtx.py +++ b/analyzer/windows/modules/auxiliary/evtx.py @@ -12,7 +12,6 @@ class Evtx(Thread, Auxiliary): - evtx_dump = "evtx.zip" windows_logs = [ diff --git a/analyzer/windows/modules/auxiliary/filepickup.py b/analyzer/windows/modules/auxiliary/filepickup.py index 0adb7305e9d..fbfa3e6059f 100644 --- a/analyzer/windows/modules/auxiliary/filepickup.py +++ b/analyzer/windows/modules/auxiliary/filepickup.py @@ -29,7 +29,7 @@ def start(self): def stop(self): if hasattr(self, "file_to_get"): if self.file_to_get: - log.info(f"Uploading {self.file_to_get}") + log.info("Uploading %s", self.file_to_get) upload_to_host(self.file_to_get, os.path.join("files", os.path.basename(self.file_to_get))) self.do_run = False diff --git a/analyzer/windows/modules/auxiliary/human.py b/analyzer/windows/modules/auxiliary/human.py index c7fb4c8c519..6851589d9cd 100644 --- a/analyzer/windows/modules/auxiliary/human.py +++ b/analyzer/windows/modules/auxiliary/human.py @@ -517,7 +517,7 @@ def run(self): pass else: for instruction in GIVEN_INSTRUCTIONS: - log.info("Instruction: %s" % instruction) + log.info("Instruction: %s", instruction) try: if instruction.lower() == CLICK_CMD: click_mouse() @@ -536,7 +536,7 @@ def run(self): if match and len(match.regs) == 2: interval = int(match.group(1)) except Exception as e: - log.error("One of the instruction given is invalid: %s with error %s" % (instruction, e)) + log.error("One of the instruction given is invalid: %s with error %s", instruction, str(e)) continue while self.do_run: diff --git a/analyzer/windows/modules/auxiliary/permissions.py b/analyzer/windows/modules/auxiliary/permissions.py index 8b8bb711f1a..17b655119b9 100644 --- a/analyzer/windows/modules/auxiliary/permissions.py +++ b/analyzer/windows/modules/auxiliary/permissions.py @@ -34,7 +34,6 @@ def start(self): log.debug("Adjusting permissions for %s", locations) for location in locations: - # First add a non-inherited permission for Admin Read+Execute # icacls /grant:r "BUILTIN\Administrators:(OI)(CI)(RX)" "BUILTIN\\Administrators:(RX)" /t /c /q modify_admin_params = [ diff --git a/analyzer/windows/modules/auxiliary/recentfiles.py b/analyzer/windows/modules/auxiliary/recentfiles.py index e6ec3cf4d23..04a8cc427c9 100644 --- a/analyzer/windows/modules/auxiliary/recentfiles.py +++ b/analyzer/windows/modules/auxiliary/recentfiles.py @@ -70,7 +70,7 @@ def start(self): ext = random.choice(self.extensions) filepath = os.path.join(dirpath, "%s.%s" % (filename, ext)) open(filepath, "wb").write(os.urandom(random.randint(30, 999999))) - log.debug("Wrote 'recentfile' %s to disk." % filepath) + log.debug("Wrote 'recentfile' %s to disk.", filepath) SHELL32.SHAddToRecentDocs(SHARD_PATHA, filepath) diff --git a/analyzer/windows/modules/auxiliary/watchdownloads.py b/analyzer/windows/modules/auxiliary/watchdownloads.py index fdef5d0bc07..c2aefa5ffda 100644 --- a/analyzer/windows/modules/auxiliary/watchdownloads.py +++ b/analyzer/windows/modules/auxiliary/watchdownloads.py @@ -36,7 +36,7 @@ def on_any_event(self, event: FileSystemEvent) -> None: HAVE_WATCHDOG = True except ImportError as e: - log.debug(f"Could not load auxiliary module WatchDownloads due to '{e}'") + log.debug("Could not load auxiliary module WatchDownloads due to '%s'", str(e)) class WatchDownloads(Auxiliary, Thread): diff --git a/analyzer/windows/modules/auxiliary/wmi_etw.py b/analyzer/windows/modules/auxiliary/wmi_etw.py index e506335ef82..cbd2f5e379e 100644 --- a/analyzer/windows/modules/auxiliary/wmi_etw.py +++ b/analyzer/windows/modules/auxiliary/wmi_etw.py @@ -22,8 +22,8 @@ HAVE_ETW = True except ImportError as e: log.debug( - f"Could not load auxiliary module WMI_ETW due to '{e}'\nIn order to use WMI_ETW functionality, it " - "is required to have pywintrace setup in python" + "Could not load auxiliary module WMI_ETW due to '%s'\nIn order to use WMI_ETW functionality, it " + "is required to have pywintrace setup in python", str(e) ) __author__ = "[Andrea Oliveri starting from code of Canadian Centre for Cyber Security]" @@ -118,7 +118,7 @@ def on_event(self, event_tufo): event_id, event = event_tufo if self.no_conout is False: - log.info("{:d} ({:s})\n{:s}\n".format(event_id, event["Task Name"], pprint.pformat(encode(event)))) + log.info("%d (%s)\n%s\n", event_id, event["Task Name"], pprint.pformat(encode(event))) if self.logfile is not None: with open(self.logfile, "a") as file: @@ -188,5 +188,5 @@ def stop(self): log.debug(files_to_upload) for f in files_to_upload: dumppath = os.path.join("aux", "wmi_etw.json") - log.debug("WMI_ETW Aux Module is uploading %s" % f) + log.debug("WMI_ETW Aux Module is uploading %s", f) upload_to_host(f, dumppath) diff --git a/analyzer/windows/modules/packages/archive.py b/analyzer/windows/modules/packages/archive.py index 394085c6b98..95264d281ca 100644 --- a/analyzer/windows/modules/packages/archive.py +++ b/analyzer/windows/modules/packages/archive.py @@ -108,7 +108,7 @@ def start(self, path): files_at_root = [os.path.join(r, f).replace(f"{root}\\", "") for r, _, files in os.walk(root) for f in files] log.debug(files_at_root) if set(file_names) != set(files_at_root): - log.debug(f"Replacing {file_names} with {files_at_root}") + log.debug("Replacing %s with %s", str(file_names), str(files_at_root)) file_names = files_at_root upload_extracted_files(root, files_at_root) @@ -123,12 +123,12 @@ def start(self, path): try: shutil.copytree(d, os.path.join("C:\\", item)) except Exception as e: - log.warning(f"Couldn't copy {d} to root of C: {e}") + log.warning("Couldn't copy %s to root of C: %s", d, str(e)) else: try: shutil.copy(d, "C:\\") except Exception as e: - log.warning(f"Couldn't copy {d} to root of C: {e}") + log.warning("Couldn't copy %s to root of C: %s", d, str(e)) file_name = self.options.get(OPT_FILE) # If no file name is provided via option, discover files to execute. diff --git a/analyzer/windows/modules/packages/dll.py b/analyzer/windows/modules/packages/dll.py index 1c047bf2154..068c951a0a0 100644 --- a/analyzer/windows/modules/packages/dll.py +++ b/analyzer/windows/modules/packages/dll.py @@ -97,7 +97,7 @@ def start(self, path): # If the user has not enabled multi, but requested multiple functions, log it and default to #1 elif not enable_multi and (":" in function or "-" in function or ".." in function): - log.warning(f"You need to enable the `{_OPT_ENABLE_MULTI}` option if you want to run multiple functions.") + log.warning("You need to enable the `%s` option if you want to run multiple functions.", str(_OPT_ENABLE_MULTI)) # Setting function to the first ordinal number since the user does not want use to run multiple functions. function = "#1" diff --git a/analyzer/windows/modules/packages/msix.py b/analyzer/windows/modules/packages/msix.py index 13bf6c58400..8b3683ceee9 100644 --- a/analyzer/windows/modules/packages/msix.py +++ b/analyzer/windows/modules/packages/msix.py @@ -38,7 +38,7 @@ def start(self, path): if len(file_names) and "config.json" in file_names: extract_zip(path, orig_path.parent) - log.debug(f"Extracted {len(file_names)} files from {path} to {orig_path.parent}") + log.debug("Extracted %d files from %s to %s", len(file_names), str(path), str(orig_path.parent)) with suppress(Exception): config_path = str(orig_path.with_name("config.json")) @@ -51,7 +51,7 @@ def start(self, path): if script_paths: path = str(orig_path.with_name(script_paths[0])) args = f'-NoProfile -ExecutionPolicy bypass -File "{path}"' - log.debug(f"msix file contains script {path}") + log.debug("msix file contains script %s", str(path)) if not args: args = f"-NoProfile -ExecutionPolicy bypass {os.getcwd()}\data\msix.ps1 {path}" diff --git a/analyzer/windows/modules/packages/pub.py b/analyzer/windows/modules/packages/pub.py index 9da03cc3437..1602615d8b7 100644 --- a/analyzer/windows/modules/packages/pub.py +++ b/analyzer/windows/modules/packages/pub.py @@ -30,7 +30,6 @@ def __init__(self, options=None, config=None): The .pub filename extension will be added automatically.""" def set_keys(self): - baseOfficeKeyPath = r"Software\Microsoft\Office" installedVersions = [] try: diff --git a/analyzer/windows/modules/packages/pub2016.py b/analyzer/windows/modules/packages/pub2016.py index 950c1fa6048..196a395aefb 100644 --- a/analyzer/windows/modules/packages/pub2016.py +++ b/analyzer/windows/modules/packages/pub2016.py @@ -26,7 +26,6 @@ def __init__(self, options=None, config=None): The .pub filename extension will be added automatically.""" def set_keys(self): - baseOfficeKeyPath = r"Software\Microsoft\Office" installedVersions = [] try: diff --git a/analyzer/windows/prescripts/prescript_detection.py b/analyzer/windows/prescripts/prescript_detection.py index 46ff0f4510d..09aa2a165a8 100644 --- a/analyzer/windows/prescripts/prescript_detection.py +++ b/analyzer/windows/prescripts/prescript_detection.py @@ -249,17 +249,17 @@ def add_file_to_path(src_path, dst_path, overwrite=False): if os.path.exists(dst_path) and overwrite: # in case of the src and dst are the same file if os.path.samefile(src_path, dst_path): - log.info(f"Same file {dst_path} already in the victim vm") + log.info("Same file %s already in the victim vm", str(dst_path)) return os.remove(dst_path) shutil.copyfile(src=src_path, dst=dst_path) - log.info(f"File {dst_path} modified in the victim vm") + log.info("File %s modified in the victim vm", str(dst_path)) elif os.path.exists(dst_path): - log.info(f"File {dst_path} already in the victim vm") + log.info("File %s already in the victim vm", str(dst_path)) return else: shutil.copyfile(src=src_path, dst=dst_path) - log.info(f"File {dst_path} added to victim vm") + log.info("File %s added to victim vm", str(dst_path)) def run_script(script_path, args, timeout): @@ -268,12 +268,12 @@ def run_script(script_path, args, timeout): subprocess.check_output("python " + exec, timeout=timeout, stderr=subprocess.STDOUT) else: subprocess.check_output(exec, timeout=timeout, stderr=subprocess.STDOUT) - log.info(f"Running script {script_path} with parameters {args} on the victim vm") + log.info("Running script %s with parameters %s on the victim vm", str(script_path), str(args)) def add_directory(path): os.makedirs(path, exist_ok=True) - log.info(f"Folder {path} added to victim vm") + log.info("Folder %s added to victim vm", str(path)) def registry_path_to_winreg(path): @@ -304,7 +304,7 @@ def create_registry(path, key, value, value_type): RegistryKey = CreateKey(path, key) SetValueEx(RegistryKey, key, 0, value_type, value) CloseKey(RegistryKey) - log.info(f"Created registry {path}, with key {key} and value {value} on the victim vm") + log.info("Created registry %s, with key %s and value %s on the victim vm", str(path), str(key), str(value)) def modify_registry(path, key, value, value_type): @@ -312,9 +312,9 @@ def modify_registry(path, key, value, value_type): try: RegistryKey = OpenKey(path, key, 0, KEY_ALL_ACCESS) except Exception as _: - log.info(f"The target registry doesn't exist on the victim vm at path {path} with key {key}") + log.info("The target registry doesn't exist on the victim vm at path %s with key %s", str(path), str(key)) SetValueEx(RegistryKey, key, 0, value_type, value) - log.info(f"Modified registry {path}, with key {key} to value {value} on the victim vm") + log.info("Modified registry %s, with key %s to value %s on the victim vm", str(path), str(key), str(value)) def create_scheduled_task( @@ -346,7 +346,7 @@ def create_scheduled_task( tr.SetTrigger(trigger) pf = new_task.QueryInterface(pythoncom.IID_IPersistFile) pf.Save(None, 1) - log.info(f"Scheduled task {task_name} created on the victim vm") + log.info("Scheduled task %s created on the victim vm", str(task_name)) def create_scheduled_task2( @@ -567,7 +567,7 @@ def modify_scheduled_task( folder.DeleteTask(task_name, 0) else: folder.RegisterTaskDefinition(task_name, modified_task, TASK_CREATION.TASK_UPDATE.value, "", "", 0) - log.info(f"Scheduled task {task_name} modified on the victim vm") + log.info("Scheduled task %s modified on the victim vm", str(task_name)) def create_trigger( @@ -598,8 +598,8 @@ def create_trigger( def change_execution_dir(dir): - log.info(f"Changing execution directory to {dir}") - log.warn("Changing directory not available in prescript testing") + log.info("Changing execution directory to %s", dir) + log.warning("Changing directory not available in prescript testing") def main(args): @@ -681,8 +681,8 @@ def main(args): args=params_dict[ACTIONS_PARAMETERS[parsed_action][1]], timeout=int(params_dict[ACTIONS_PARAMETERS[parsed_action][2]]), ) - log.info(f"Runned script with {params_dict}") - print(f"Runned script with {params_dict}") + log.info("Runned script with %s", str(params_dict)) + # print(f"Runned script with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[1]: add_file_to_path( src_path=params_dict[ACTIONS_PARAMETERS[parsed_action][0]], @@ -690,15 +690,15 @@ def main(args): overwrite=bool(params_dict[ACTIONS_PARAMETERS[parsed_action][2]]), ) log.info( - f"Adding file from {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]} to {params_dict[ACTIONS_PARAMETERS[parsed_action][1]]}" - ) - print( - f"Adding file from {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]} to {params_dict[ACTIONS_PARAMETERS[parsed_action][1]]}" + "Adding file from %s to %s", params_dict[ACTIONS_PARAMETERS[parsed_action][0]], params_dict[ACTIONS_PARAMETERS[parsed_action][1]] ) + # print( + # f"Adding file from {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]} to {params_dict[ACTIONS_PARAMETERS[parsed_action][1]]}" + # ) elif parsed_action == LIST_OF_VALID_ACTIONS[2]: add_directory(path=params_dict[ACTIONS_PARAMETERS[parsed_action][0]]) - log.info(f"Created directory with {params_dict}") - print(f"Created directory with {params_dict}") + log.info("Created directory with %s", str(params_dict)) + # print(f"Created directory with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[3]: value_type = identify_registry_value_type(params_dict[ACTIONS_PARAMETERS[parsed_action][2]]) create_registry( @@ -707,8 +707,8 @@ def main(args): value=params_dict[ACTIONS_PARAMETERS[parsed_action][2]], value_type=value_type, ) - log.info(f"Created registry with {params_dict}") - print(f"Created registry with {params_dict}") + log.info("Created registry with %s", str(params_dict)) + # print(f"Created registry with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[4]: value_type = identify_registry_value_type(params_dict[ACTIONS_PARAMETERS[parsed_action][2]]) modify_registry( @@ -717,8 +717,8 @@ def main(args): value=params_dict[ACTIONS_PARAMETERS[parsed_action][2]], value_type=value_type, ) - log.info(f"Modified registry with {params_dict}") - print(f"Modified registry with {params_dict}") + log.info("Modified registry with %s", str(params_dict)) + # print(f"Modified registry with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[5]: parsed_params_dict = {} for param in ACTIONS_PARAMETERS[parsed_action]: @@ -747,15 +747,15 @@ def main(args): else: parsed_params_dict[param] = params_dict[param] create_scheduled_task2(**parsed_params_dict) - log.info(f"Created scheduled task with {params_dict}") - print(f"Created scheduled task with {params_dict}") + log.info("Created scheduled task with %s", str(params_dict)) + # print(f"Created scheduled task with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[6]: create_scheduled_task_from_xml( task_name=params_dict[ACTIONS_PARAMETERS[parsed_action][0]], xml_path=params_dict[ACTIONS_PARAMETERS[parsed_action][1]], ) - log.info(f"Created scheduled task from xml with {params_dict}") - print(f"Created scheduled task from xml with {params_dict}") + log.info("Created scheduled task from xml with %s", str(params_dict)) + # print(f"Created scheduled task from xml with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[7]: parsed_params_dict = {} for param in ACTIONS_PARAMETERS[parsed_action]: @@ -770,16 +770,16 @@ def main(args): else: parsed_params_dict[param] = params_dict[param] modify_scheduled_task(**parsed_params_dict) - log.info(f"Modified scheduled task with {params_dict}") - print(f"Modified scheduled task with {params_dict}") + log.info("Modified scheduled task with %s", str(params_dict)) + # print(f"Modified scheduled task with {params_dict}") elif parsed_action == LIST_OF_VALID_ACTIONS[8]: change_execution_dir(path=params_dict[ACTIONS_PARAMETERS[parsed_action][0]]) - log.info(f"Changed execution dir to {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]}") - print(f"Changed execution dir to {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]}") + log.info("Changed execution dir to %s", params_dict[ACTIONS_PARAMETERS[parsed_action][0]]) + # print(f"Changed execution dir to {params_dict[ACTIONS_PARAMETERS[parsed_action][0]]}") except Exception as e: - log.debug(f"Invalid action {action} with parameters {params_dict} --> {e}") - print(f"Invalid action {action} with parameters {params_dict} --> {e}") + log.debug("Invalid action %s with parameters %s --> %s", str(action), str(params_dict), str(e)) + # print(f"Invalid action {action} with parameters {params_dict} --> {e}") if __name__ == "__main__": diff --git a/analyzer/windows/tests/lib/common/test_abstracts.py b/analyzer/windows/tests/lib/common/test_abstracts.py index 1607df26f29..2df0f991625 100644 --- a/analyzer/windows/tests/lib/common/test_abstracts.py +++ b/analyzer/windows/tests/lib/common/test_abstracts.py @@ -6,7 +6,6 @@ class TestPackageConfiguration(unittest.TestCase): - def test_private_package_configuration(self): # test analysis package package_module = self.__class__.__module__ diff --git a/analyzer/windows/tests/test_analyzer.py b/analyzer/windows/tests/test_analyzer.py index a8db4ec4241..353b6401a68 100644 --- a/analyzer/windows/tests/test_analyzer.py +++ b/analyzer/windows/tests/test_analyzer.py @@ -81,7 +81,6 @@ def test_prepare(self, set_lock, init_logging, config, pipeserver): class TestAnalyzerChoosePackage(unittest.TestCase): - def test_choose_package_shellcode(self): test = analyzer.Analyzer() test.config = MagicMock() diff --git a/dev_utils/elasticsearchdb.py b/dev_utils/elasticsearchdb.py index 6b92867ec01..defcfca51b6 100644 --- a/dev_utils/elasticsearchdb.py +++ b/dev_utils/elasticsearchdb.py @@ -92,7 +92,7 @@ def get_calls_index(): def delete_analysis_and_related_calls(task_id: str): analyses = elastic_handler.search(index=get_analysis_index(), query=get_query_by_info_id(task_id))["hits"]["hits"] if analyses: - log.debug("Deleting analysis data for Task %s" % task_id) + log.debug("Deleting analysis data for Task %s", task_id) for analysis in analyses: analysis = analysis["_source"] for process in analysis["behavior"].get("processes", []): @@ -100,7 +100,7 @@ def delete_analysis_and_related_calls(task_id: str): elastic_handler.delete_by_query(index=get_calls_index(), body={"query": {"match": {"_id": call}}}) elastic_handler.delete_by_query(index=get_analysis_index(), body={"query": get_query_by_info_id(task_id)}) - log.debug("Deleted previous ElasticsearchDB data for Task %s" % task_id) + log.debug("Deleted previous ElasticsearchDB data for Task %s", task_id) def scroll(scroll_id: str) -> dict: diff --git a/dev_utils/mongodb.py b/dev_utils/mongodb.py index 3debefdf1ba..45beb829567 100644 --- a/dev_utils/mongodb.py +++ b/dev_utils/mongodb.py @@ -210,7 +210,7 @@ def mongo_delete_data(task_ids: Union[int, Sequence[int]]): for hook in hooks[mongo_delete_data]["analysis"]: hook(found_task_ids) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) def mongo_is_cluster(): diff --git a/lib/cuckoo/common/abstracts.py b/lib/cuckoo/common/abstracts.py index ea90282f8c7..156bfc75d4a 100644 --- a/lib/cuckoo/common/abstracts.py +++ b/lib/cuckoo/common/abstracts.py @@ -838,7 +838,6 @@ def set_path(self, analysis_path): CuckooReportError(e) def yara_detected(self, name): - target = self.results.get("target", {}) if target.get("category") in ("file", "static") and target.get("file"): for keyword in ("cape_yara", "yara"): @@ -900,16 +899,22 @@ def yara_detected(self, name): for yara_block in self.results["static"]["office"]["Macro"]["info"].get("macroname", []) or []: for sub_block in self.results["static"]["office"]["Macro"]["info"]["macroname"].get(yara_block, []) or []: if re.findall(name, sub_block["name"], re.I): - yield "macro", os.path.join(macro_path, macroname), sub_block, self.results["static"]["office"]["Macro"][ - "info" - ] + yield ( + "macro", + os.path.join(macro_path, macroname), + sub_block, + self.results["static"]["office"]["Macro"]["info"], + ) if self.results.get("static", {}).get("office", {}).get("XLMMacroDeobfuscator", False): for yara_block in self.results["static"]["office"]["XLMMacroDeobfuscator"].get("info", []).get("yara_macro", []) or []: if re.findall(name, yara_block["name"], re.I): - yield "macro", os.path.join(macro_path, "xlm_macro"), yara_block, self.results["static"]["office"][ - "XLMMacroDeobfuscator" - ]["info"] + yield ( + "macro", + os.path.join(macro_path, "xlm_macro"), + yara_block, + self.results["static"]["office"]["XLMMacroDeobfuscator"]["info"], + ) def signature_matched(self, signame: str) -> bool: # Check if signature has matched (useful for ordered signatures) @@ -975,7 +980,6 @@ def _get_ip_by_host(self, hostname): ) def _get_ip_by_host_dns(self, hostname): - ips = [] try: @@ -1096,7 +1100,7 @@ def check_file(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["files"] + subject = self.results.get("behavior", {}).get("summary", {}).get("files", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_read_file(self, pattern, regex=False, all=False): @@ -1109,7 +1113,7 @@ def check_read_file(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["read_files"] + subject = self.results.get("behavior", {}).get("summary", {}).get("read_files", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_write_file(self, pattern, regex=False, all=False): @@ -1122,7 +1126,7 @@ def check_write_file(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["write_files"] + subject = self.results.get("behavior", {}).get("summary", {}).get("write_files", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_delete_file(self, pattern, regex=False, all=False): @@ -1135,7 +1139,7 @@ def check_delete_file(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["delete_files"] + subject = self.results.get("behavior", {}).get("summary", {}).get("delete_files", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_key(self, pattern, regex=False, all=False): @@ -1148,7 +1152,7 @@ def check_key(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["keys"] + subject = self.results.get("behavior", {}).get("summary", {}).get("keys", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_read_key(self, pattern, regex=False, all=False): @@ -1161,7 +1165,7 @@ def check_read_key(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["read_keys"] + subject = self.results.get("behavior", {}).get("summary", {}).get("read_keys", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_write_key(self, pattern, regex=False, all=False): @@ -1174,7 +1178,7 @@ def check_write_key(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["write_keys"] + subject = self.results.get("behavior", {}).get("summary", {}).get("write_keys", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_delete_key(self, pattern, regex=False, all=False): @@ -1187,7 +1191,7 @@ def check_delete_key(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["delete_keys"] + subject = self.results.get("behavior", {}).get("summary", {}).get("delete_keys", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_mutex(self, pattern, regex=False, all=False): @@ -1200,7 +1204,7 @@ def check_mutex(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["mutexes"] + subject = self.results.get("behavior", {}).get("summary", {}).get("mutexes", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all, ignorecase=False) def check_started_service(self, pattern, regex=False, all=False): @@ -1213,7 +1217,7 @@ def check_started_service(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["started_services"] + subject = self.results.get("behavior", {}).get("summary", {}).get("started_services", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_created_service(self, pattern, regex=False, all=False): @@ -1226,7 +1230,7 @@ def check_created_service(self, pattern, regex=False, all=False): @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["created_services"] + subject = self.results.get("behavior", {}).get("summary", {}).get("created_services", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all) def check_executed_command(self, pattern, regex=False, all=False, ignorecase=True): @@ -1241,7 +1245,7 @@ def check_executed_command(self, pattern, regex=False, all=False, ignorecase=Tru @return: depending on the value of param 'all', either a set of matched items or the first matched item """ - subject = self.results["behavior"]["summary"]["executed_commands"] + subject = self.results.get("behavior", {}).get("summary", {}).get("executed_commands", []) return self._check_value(pattern=pattern, subject=subject, regex=regex, all=all, ignorecase=ignorecase) def check_api(self, pattern, process=None, regex=False, all=False): @@ -1762,7 +1766,7 @@ def update(self) -> bool: try: req = requests.get(self.downloadurl, headers=headers, verify=True) except requests.exceptions.RequestException as e: - log.warn("Error downloading feed for %s: %s", self.feedname, e) + log.warning("Error downloading feed for %s: %s", self.feedname, e) return False if req.status_code == 200: self.downloaddata = req.content diff --git a/lib/cuckoo/common/admin_utils.py b/lib/cuckoo/common/admin_utils.py index a25b6c9bd96..fa512ae3079 100644 --- a/lib/cuckoo/common/admin_utils.py +++ b/lib/cuckoo/common/admin_utils.py @@ -372,7 +372,7 @@ def execute_command_on_all(remote_command, servers: list, ssh_proxy: SSHClient): _, ssh_stdout, _ = ssh.exec_command(remote_command) ssh_out = ssh_stdout.read().decode("utf-8").strip() if "Active: active (running)" in ssh_out and "systemctl status" not in remote_command: - log.info("[+] Service " + green("restarted successfully and is UP")) + log.info("[+] Service %s", green("restarted successfully and is UP")) else: if ssh_out: log.info(green(f"[+] {server} - {ssh_out}")) @@ -382,7 +382,7 @@ def execute_command_on_all(remote_command, servers: list, ssh_proxy: SSHClient): except TimeoutError as e: sys.exit(f"Did you forget to use jump box? {str(e)}") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) def bulk_deploy(files, yara_category, dry_run=False, servers: list = [], ssh_proxy: SSHClient = False): diff --git a/lib/cuckoo/common/cape_utils.py b/lib/cuckoo/common/cape_utils.py index 642af3e4994..6299e3c4d22 100644 --- a/lib/cuckoo/common/cape_utils.py +++ b/lib/cuckoo/common/cape_utils.py @@ -221,7 +221,7 @@ def static_config_parsers(cape_name: str, file_path: str, file_data: bytes) -> d cape_config.setdefault(cape_name, {}).update({key: [value]}) parser_loaded = True except Exception as e: - log.error("CAPE: parsing error on %s with %s: %s", file_path, cape_name, e, exc_info=True) + log.exception("CAPE: parsing error on %s with %s: %s", file_path, cape_name, e) # DC3-MWCP if HAS_MWCP and not parser_loaded and cape_name and cape_name in mwcp_decoders: @@ -287,7 +287,7 @@ def static_config_parsers(cape_name: str, file_path: str, file_data: bytes) -> d if "rules" in str(e): log.warning("You probably need to compile yara-python with dotnet support") else: - log.error(e, exc_info=True) + log.exception(e) log.warning( "malwareconfig parsing error for %s with %s: %s, you should submit issue/fix to https://github.com/kevthehermit/RATDecoders/", file_path, diff --git a/lib/cuckoo/common/cleaners_utils.py b/lib/cuckoo/common/cleaners_utils.py index 203fef16cd1..1614aae18ea 100644 --- a/lib/cuckoo/common/cleaners_utils.py +++ b/lib/cuckoo/common/cleaners_utils.py @@ -167,7 +167,7 @@ def is_reporting_db_connected(): connect_to_es() return True except Exception as e: - log.error(f"Can't connect to reporting db {e}") + log.error("Can't connect to reporting db %s", str(e)) return False @@ -185,7 +185,7 @@ def delete_bulk_tasks_n_folders(tids: list, delete_mongo: bool): for id in ids_tmp: if db.delete_task(id): try: - path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % str(id)) + path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s", str(id)) if path_is_dir(path): delete_folder(path) except Exception as e: @@ -194,7 +194,7 @@ def delete_bulk_tasks_n_folders(tids: list, delete_mongo: bool): # If we don't remove from mongo we should keep in db to be able to show task in webgui for id in ids_tmp: try: - path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % str(id)) + path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s", str(id)) if path_is_dir(path): delete_folder(path) except Exception as e: @@ -209,7 +209,7 @@ def fail_job(tid): tid = tid["info"]["id"] elif "id" in tid: tid = tid["id"] - log.info("set %s job to failed" % (tid)) + log.info("set %s job to failed", tid) db.set_status(tid, TASK_FAILED_ANALYSIS) @@ -223,18 +223,18 @@ def delete_data(tid): elif "id" in tid: tid = tid["id"] try: - log.info("removing %s from analysis db" % (tid)) + log.info("removing %s from analysis db", tid) if repconf.mongodb.enabled: mongo_delete_data(tid) elif repconf.elasticsearchdb.enabled: delete_analysis_and_related_calls(tid) except Exception as e: - log.error("failed to remove analysis info (may not exist) %s due to %s" % (tid, e), exc_info=True) + log.exception("failed to remove analysis info (may not exist) %s due to %s", tid, e) with db.session.begin(): if db.delete_task(tid): - delete_folder(os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % tid)) + delete_folder(os.path.join(CUCKOO_ROOT, "storage", "analyses", str(tid))) else: - log.info("failed to remove faile task %s from DB" % (tid)) + log.info("failed to remove faile task %s from DB", tid) def dist_delete_data(data, dist_db): @@ -345,18 +345,18 @@ def cuckoo_clean_bson_suri_logs(): for el2 in e: new = el2.to_dict() id = new["id"] - path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % id) + path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(id)) if path_exists(path): - jsonlogs = glob("%s/logs/*json*" % (path)) - bsondata = glob("%s/logs/*.bson" % (path)) - filesmeta = glob("%s/logs/files/*.meta" % (path)) + jsonlogs = glob(f"{path}/logs/*json*") + bsondata = glob(f"{path}/logs/*.bson") + filesmeta = glob(f"{path}/logs/files/*.meta") for f in jsonlogs, bsondata, filesmeta: for fe in f: try: - log.info(("removing %s" % (fe))) + log.info("removing %s", fe) path_delete(fe) except Exception as Err: - log.info(("failed to remove sorted_pcap from disk %s" % (Err))) + log.info("failed to remove sorted_pcap from disk %s", Err) def cuckoo_clean_failed_url_tasks(): @@ -414,7 +414,7 @@ def cuckoo_clean_lower_score(malscore: int): index=get_analysis_index(), query={"query": {"range": {"malscore": {"lte": malscore}}}}, _source=["info.id"] ) ] - log.info(("number of matching records %s" % len(id_arr))) + log.info("number of matching records %s", len(id_arr)) resolver_pool.map(lambda tid: delete_data(tid), id_arr) @@ -478,7 +478,7 @@ def cuckoo_clean_before_day(args: dict): for e in old_tasks: id_arr.append({"info.id": (int(e.to_dict()["id"]))}) - log.info(("number of matching records %s before suri/custom filter " % len(id_arr))) + log.info("number of matching records %s before suri/custom filter", len(id_arr)) if id_arr and args.get("suricata_zero_alert_filter"): result = list( mongo_find("analysis", {"suricata.alerts.alert": {"$exists": False}, "$or": id_arr}, {"info.id": 1, "_id": 0}) @@ -491,7 +491,7 @@ def cuckoo_clean_before_day(args: dict): ) ) id_arr = [entry["info"]["id"] for entry in result] - log.info("number of matching records %s" % len(id_arr)) + log.info("number of matching records %s", len(id_arr)) delete_bulk_tasks_n_folders(id_arr, args.get("delete_mongo")) # resolver_pool.map(lambda tid: delete_data(tid), id_arr) @@ -542,12 +542,12 @@ def cuckoo_clean_sorted_pcap_dump(): elif repconf.elasticsearchdb.enabled: es.update(index=e["index"], id=e["info"]["id"], body={"network.sorted_pcap_id": ""}) except Exception: - log.info(("failed to remove sorted pcap from db for id %s" % (e["info"]["id"]))) + log.info(("failed to remove sorted pcap from db for id %s", e["info"]["id"])) try: - path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "%s" % (e["info"]["id"]), "dump_sorted.pcap") + path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(e["info"]["id"]), "dump_sorted.pcap") path_delete(path) except Exception as e: - log.info(("failed to remove sorted_pcap from disk %s" % (e))) + log.info(("failed to remove sorted_pcap from disk %s", e)) else: done = True else: @@ -654,7 +654,6 @@ def binaries_clean_before_day(days: int): def execute_cleanup(args: dict, init_log=True): - if init_log: init_console_logging() diff --git a/lib/cuckoo/common/demux.py b/lib/cuckoo/common/demux.py index 7d111688aaa..712be489869 100644 --- a/lib/cuckoo/common/demux.py +++ b/lib/cuckoo/common/demux.py @@ -184,7 +184,7 @@ def _sf_children(child: sfFile): # -> bytes: path_to_extract = os.path.join(tmp_dir, sanitize_filename((child.filename).decode())) _ = path_write_file(path_to_extract, child.contents) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return (path_to_extract.encode(), child.platform, child.magic, child.filesize) @@ -229,7 +229,7 @@ def demux_sflock(filename: bytes, options: str, check_shellcode: bool = True): if tmp_child and tmp_child[0]: retlist.append(tmp_child) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return retlist, "" diff --git a/lib/cuckoo/common/email_utils.py b/lib/cuckoo/common/email_utils.py index 46f0066ff9a..ca212ba19c3 100644 --- a/lib/cuckoo/common/email_utils.py +++ b/lib/cuckoo/common/email_utils.py @@ -28,7 +28,6 @@ def find_attachments_in_email(s, expand_attachment): def _find_attachments_in_email(mesg, expand_attachment, atts): - # MHTML detection if mesg.get_content_maintype() == "multipart" and mesg.get_content_subtype() == "related": for part in mesg.walk(): diff --git a/lib/cuckoo/common/gcp.py b/lib/cuckoo/common/gcp.py index 4accf6643a3..204f1c6c625 100644 --- a/lib/cuckoo/common/gcp.py +++ b/lib/cuckoo/common/gcp.py @@ -63,7 +63,7 @@ def list_instances(self) -> dict: ] servers.setdefault(instance["name"], ips) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) elif HAVE_GCP: try: instance_client = compute_v1.InstancesClient() @@ -88,7 +88,6 @@ def list_instances(self) -> dict: return servers def autodiscovery(self): - while True: servers = self.list_instances() if not servers: @@ -109,9 +108,9 @@ def autodiscovery(self): if not r.ok: log.error("Can't registger worker with IP: %s. status_code: %d ", ip, r.status_code) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) break except Exception as e: - log.error(e, exc_info=True) + log.exception(e) time.sleep(int(self.dist_cfg.GCP.autodiscovery)) diff --git a/lib/cuckoo/common/icon.py b/lib/cuckoo/common/icon.py index 754b6628bca..cfc88ff5017 100644 --- a/lib/cuckoo/common/icon.py +++ b/lib/cuckoo/common/icon.py @@ -2,11 +2,10 @@ # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file 'docs/LICENSE' for copying permission. -from ctypes import POINTER, Structure, byref +from ctypes import POINTER, Structure, byref, cast, create_string_buffer, pointer, sizeof, string_at from ctypes import c_ubyte as BYTE from ctypes import c_uint as DWORD from ctypes import c_ushort as WORD -from ctypes import cast, create_string_buffer, pointer, sizeof, string_at class GRPICONDIR(Structure): diff --git a/lib/cuckoo/common/integrations/XLMMacroDeobfuscator.py b/lib/cuckoo/common/integrations/XLMMacroDeobfuscator.py index 0da287a8401..a9c32e39723 100644 --- a/lib/cuckoo/common/integrations/XLMMacroDeobfuscator.py +++ b/lib/cuckoo/common/integrations/XLMMacroDeobfuscator.py @@ -45,7 +45,6 @@ def xlmdeobfuscate(filepath: str, task_id: str, password: str = "", on_demand: bool = False): - if not HAVE_XLM_DEOBF or processing_conf.xlsdeobf.on_demand and not on_demand: return xlm_kwargs["file"] = filepath @@ -68,4 +67,4 @@ def xlmdeobfuscate(filepath: str, task_id: str, password: str = "", on_demand: b if "no attribute 'workbook'" in str(e) or "Can't find workbook" in str(e): log.info("Workbook not found. Probably not an Excel file") else: - log.error(e, exc_info=True) + log.exception(e) diff --git a/lib/cuckoo/common/integrations/capa.py b/lib/cuckoo/common/integrations/capa.py index 551dcc00110..fac87287061 100644 --- a/lib/cuckoo/common/integrations/capa.py +++ b/lib/cuckoo/common/integrations/capa.py @@ -300,6 +300,6 @@ def flare_capa_details( except EmptyReportError: log.info("FLARE CAPA -> No process data available") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return capa_output diff --git a/lib/cuckoo/common/integrations/file_extra_info.py b/lib/cuckoo/common/integrations/file_extra_info.py index 2daeb1c24fa..f36587f35dd 100644 --- a/lib/cuckoo/common/integrations/file_extra_info.py +++ b/lib/cuckoo/common/integrations/file_extra_info.py @@ -585,7 +585,7 @@ def vbe_extract(file: str, **_) -> ExtractorReturnType: try: decoded = vbe_decode_file(file, data) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) if not decoded: log.debug("VBE content wasn't decoded") diff --git a/lib/cuckoo/common/integrations/file_extra_info_modules/overlay.py b/lib/cuckoo/common/integrations/file_extra_info_modules/overlay.py index 540a97d2e22..31bcf4136fc 100644 --- a/lib/cuckoo/common/integrations/file_extra_info_modules/overlay.py +++ b/lib/cuckoo/common/integrations/file_extra_info_modules/overlay.py @@ -17,7 +17,6 @@ @time_tracker def extract_details(file, *, data_dictionary, **_) -> ExtractorReturnType: - if not data_dictionary.get("pe", {}).get("overlay"): return {} diff --git a/lib/cuckoo/common/integrations/floss.py b/lib/cuckoo/common/integrations/floss.py index 44a370c896c..07c552b1e2e 100644 --- a/lib/cuckoo/common/integrations/floss.py +++ b/lib/cuckoo/common/integrations/floss.py @@ -125,7 +125,7 @@ def run(self): results[stype].append(sval.string) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) fm.set_log_config(fm.DebugLevel.DEFAULT, False) diff --git a/lib/cuckoo/common/integrations/office_one.py b/lib/cuckoo/common/integrations/office_one.py index 4594a7bd0da..bd7d20604da 100644 --- a/lib/cuckoo/common/integrations/office_one.py +++ b/lib/cuckoo/common/integrations/office_one.py @@ -115,10 +115,10 @@ def extract_files(self) -> Iterator[bytes]: size = self.data[size_offset : size_offset + 4] i_size = struct.unpack(" Iterator[OneNoteMetadataObject]: creation_date=h_createDate, last_modification_date=h_LastDate, ) - except Exception as e: - logger.error(f"Error while parsing object {cpt}") - logger.error(f"Error: {e}.") + logger.error("Error while parsing object %s - Error: %s", cpt, str(e)) return ret diff --git a/lib/cuckoo/common/integrations/parse_dotnet.py b/lib/cuckoo/common/integrations/parse_dotnet.py index c88c1b936f6..6a86fb30eb4 100644 --- a/lib/cuckoo/common/integrations/parse_dotnet.py +++ b/lib/cuckoo/common/integrations/parse_dotnet.py @@ -56,7 +56,7 @@ def _get_custom_attrs(self) -> List[Dict[str, str]]: except subprocess.CalledProcessError as e: log.error("Monodis: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None def _get_assembly_refs(self) -> List[Dict[str, str]]: @@ -84,7 +84,7 @@ def _get_assembly_refs(self) -> List[Dict[str, str]]: except subprocess.CalledProcessError as e: log.error("Monodis: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None def _get_assembly_info(self) -> Dict[str, str]: @@ -103,7 +103,7 @@ def _get_assembly_info(self) -> Dict[str, str]: except subprocess.CalledProcessError as e: log.error("Monodis: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None def _get_type_refs(self) -> List[Dict[str, str]]: @@ -128,7 +128,7 @@ def _get_type_refs(self) -> List[Dict[str, str]]: except subprocess.CalledProcessError as e: log.error("Monodis: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None def run(self) -> Dict[str, Any]: @@ -151,5 +151,5 @@ def run(self) -> Dict[str, Any]: else: return except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None diff --git a/lib/cuckoo/common/integrations/parse_hwp.py b/lib/cuckoo/common/integrations/parse_hwp.py index 2b037bdf227..5d64bd24ba5 100644 --- a/lib/cuckoo/common/integrations/parse_hwp.py +++ b/lib/cuckoo/common/integrations/parse_hwp.py @@ -38,7 +38,7 @@ def unpack_hwp(self): stream_content = zlib.decompress(contents, -15) self.files[stream_name] = stream_content except Exception as e: - log.error(e, exc_info=True) + log.exception(e) def extract_eps(self) -> List[bytes]: """Extract some information from Encapsulated Post Script files.""" diff --git a/lib/cuckoo/common/integrations/parse_java.py b/lib/cuckoo/common/integrations/parse_java.py index 733ed296342..4d45863c3f9 100644 --- a/lib/cuckoo/common/integrations/parse_java.py +++ b/lib/cuckoo/common/integrations/parse_java.py @@ -41,7 +41,7 @@ def run(self) -> Dict[str, Any]: p = Popen([self.decomp_jar, jar_file], stdout=PIPE) results["decompiled"] = convert_to_printable(p.stdout.read()) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) with contextlib.suppress(Exception): Path(jar_file.decode()).unlink() diff --git a/lib/cuckoo/common/integrations/parse_office.py b/lib/cuckoo/common/integrations/parse_office.py index 1c7de942cca..b047d6bc604 100644 --- a/lib/cuckoo/common/integrations/parse_office.py +++ b/lib/cuckoo/common/integrations/parse_office.py @@ -129,7 +129,7 @@ def _get_xml_meta(self, filepath) -> Dict[str, Dict[str, str]]: continue metares["SummaryInformation"][n.split(":")[1]] = convert_to_printable(data[0].data) except (IndexError, AttributeError) as e: - log.error(e, exc_info=True) + log.exception(e) for elem in app._get_documentElement().childNodes: try: @@ -146,7 +146,7 @@ def _get_xml_meta(self, filepath) -> Dict[str, Dict[str, str]]: continue metares["DocumentSummaryInformation"][n] = convert_to_printable(data[0].data) except (IndexError, AttributeError) as e: - log.error(e, exc_info=True) + log.exception(e) return metares @@ -236,7 +236,7 @@ def _parse(self, filepath: str) -> Dict[str, Any]: if temp_results: results["office_rtf"] = temp_results except Exception as e: - log.error(e, exc_info=True) + log.exception(e) else: try: vba = VBA_Parser(filepath) @@ -254,7 +254,7 @@ def _parse(self, filepath: str) -> Dict[str, Any]: except AttributeError: log.warning("OleFile library bug: AttributeError! fix: poetry run pip install olefile") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) officeresults = {"Metadata": {}} macro_folder = os.path.join(CUCKOO_ROOT, "storage", "analyses", self.task_id, "macros") @@ -301,7 +301,7 @@ def _parse(self, filepath: str) -> Dict[str, Any]: except ValueError as e: log.error("Can't parse macros for %s - %s ", filepath, str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) for keyword, description in detect_autoexec(vba_code): officeresults["Macro"]["Analysis"].setdefault("AutoExec", []).append( (keyword.replace(".", "_"), description) @@ -328,7 +328,7 @@ def _parse(self, filepath: str) -> Dict[str, Any]: if indicator.value and indicator.name in {"Word Document", "Excel Workbook", "PowerPoint Presentation"}: officeresults["Metadata"]["DocumentType"] = indicator.name except Exception as e: - log.error(e, exc_info=True) + log.exception(e) if HAVE_XLM_DEOBF: tmp_xlmmacro = xlmdeobfuscate(filepath, self.task_id, self.options.get("password", "")) diff --git a/lib/cuckoo/common/integrations/parse_pe.py b/lib/cuckoo/common/integrations/parse_pe.py index f24c0397fa7..1765e468f90 100644 --- a/lib/cuckoo/common/integrations/parse_pe.py +++ b/lib/cuckoo/common/integrations/parse_pe.py @@ -219,7 +219,7 @@ def get_peid_signatures(self, pe: pefile.PE) -> list: if result: return list(result) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None @@ -344,7 +344,7 @@ def get_resources(self, pe: pefile.PE) -> List[Dict[str, str]]: except pefile.PEFormatError as e: log.debug("get_resources error: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue return resources @@ -368,7 +368,7 @@ def get_pdb_path(self, pe: pefile.PE) -> str: length = struct.unpack_from("IIB", dbgdata)[1] return dbgdata[12:length].decode("latin-1").rstrip("\0") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None @@ -401,7 +401,7 @@ def get_imported_symbols(self, pe: pefile.PE) -> Dict[str, dict]: "imports": symbols, } except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue return imports @@ -538,7 +538,7 @@ def get_sections(self, pe: pefile.PE) -> List[Dict[str, str]]: } ) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue return sections @@ -650,7 +650,7 @@ def get_icon_info(self, pe: pefile.PE) -> Tuple[str, str, str, str]: return None, None, None, None return icon, fullhash, simphash, dhash except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None, None, None, None @@ -693,7 +693,7 @@ def get_versioninfo(self, pe: pefile.PE) -> List[dict]: entry["value"] = f"0x0{entry['value'][2:5]} 0x0{entry['value'][7:10]}" peresults.append(entry) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue return peresults @@ -853,12 +853,12 @@ def get_dll_exports(self) -> str: else: exports.append(re.sub("[^A-Za-z0-9_?@-]", "", exported_symbol.name)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return ",".join(exports) except Exception as e: log.error("PE type not recognised") - log.error(e, exc_info=True) + log.exception(e) return "" @@ -873,7 +873,7 @@ def choose_dll_export(self) -> str: if exp.name.decode() in ("DllInstall", "DllRegisterServer", "xlAutoOpen"): return exp.name.decode() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return None def get_entrypoint(self, pe: pefile.PE) -> str: diff --git a/lib/cuckoo/common/integrations/pdfminer.py b/lib/cuckoo/common/integrations/pdfminer.py index d3b98bd3e52..f5861aa11bf 100644 --- a/lib/cuckoo/common/integrations/pdfminer.py +++ b/lib/cuckoo/common/integrations/pdfminer.py @@ -44,10 +44,10 @@ def _mine_for_urls(file_path: str) -> Set[str]: try: obj = doc.getobj(object_id) urls.update(_search_for_url(obj)) - except Exception as ex: - log.error(ex, exc_info=True) - except Exception as ex: - log.error(ex, exc_info=True) + except Exception as e: + log.exception(e) + except Exception as e: + log.exception(e) return urls diff --git a/lib/cuckoo/common/integrations/peepdf.py b/lib/cuckoo/common/integrations/peepdf.py index 27357ecb646..68edc4bcf0d 100644 --- a/lib/cuckoo/common/integrations/peepdf.py +++ b/lib/cuckoo/common/integrations/peepdf.py @@ -59,7 +59,7 @@ def _set_base_uri(pdf): if elem: return elem.getValue() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return "" @@ -74,7 +74,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]: try: _, pdf = parser.parse(filepath, forceMode=True, looseMode=True, manualAnalysis=False) except Exception as e: - log.debug("Error parsing pdf: {}".format(e)) + log.debug("Error parsing pdf: %s", str(e)) return pdfresult urlset = set() annoturiset = set() @@ -109,7 +109,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]: jslist, unescapedbytes, urlsfound, errors, ctxdummy = analyseJS(decoded_stream.strip()) jsdata = jslist[0] except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue if errors or jsdata is None: continue @@ -137,7 +137,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]: jslist, unescapedbytes, urlsfound, errors, ctxdummy = analyseJS(js_elem.value) jsdata = jslist[0] except Exception as e: - log.error(e, exc_info=True) + log.exception(e) continue if errors or not jsdata: continue diff --git a/lib/cuckoo/common/integrations/vba2graph.py b/lib/cuckoo/common/integrations/vba2graph.py index fcaaf64708a..7b73b2d0b74 100644 --- a/lib/cuckoo/common/integrations/vba2graph.py +++ b/lib/cuckoo/common/integrations/vba2graph.py @@ -599,7 +599,6 @@ def vba_clean_whitespace(vba_content_lines): # process lines one by one for vba_line in vba_content_lines: - # remove leading and trailing whitespace # & reduce multiple whitespaces into one space vba_line = " ".join(vba_line.split()) @@ -801,7 +800,6 @@ def vba_extract_properties(vba_content_lines): # process lines one by one for vba_line in vba_content_lines: - # look for property start keywords prop_start_pos = max(vba_line.find("Property Let "), vba_line.find("Property Get ")) @@ -856,7 +854,6 @@ def create_call_graph(vba_func_dict): DG.add_node(func_name, keywords="") # analyze function calls for func_name in vba_func_dict: - func_code = vba_func_dict[func_name] # split function code into tokens func_code_tokens = list(filter(None, re.split(r'["(, \\-!?:\\r\\n)&=.><]+', func_code))) @@ -898,7 +895,6 @@ def find_keywords_in_graph(vba_func_dict, DG): """ # analyze function calls for func_name in vba_func_dict: - func_code = vba_func_dict[func_name] # split function code into lines func_code_lines = filter(None, re.split("\n", func_code)) diff --git a/lib/cuckoo/common/integrations/virustotal.py b/lib/cuckoo/common/integrations/virustotal.py index c56f8fb4e66..cb7068f5e7c 100644 --- a/lib/cuckoo/common/integrations/virustotal.py +++ b/lib/cuckoo/common/integrations/virustotal.py @@ -156,7 +156,6 @@ def get_vt_consensus(namelist: list): - finaltoks = defaultdict(int) for name in namelist: toks = re.findall(r"[A-Za-z0-9]+", name) @@ -210,7 +209,7 @@ def vt_lookup(category: str, target: str, results: dict = {}, on_demand: bool = try: urlscrub_compiled_re = re.compile(urlscrub) except Exception as e: - log.error(f"Failed to compile urlscrub regex: {e}") + log.error("Failed to compile urlscrub regex:", str(e)) return {} try: target = re.sub(urlscrub_compiled_re, "", target) diff --git a/lib/cuckoo/common/logtbl.py b/lib/cuckoo/common/logtbl.py index 8bba49c920b..0c1dbd8e8ec 100644 --- a/lib/cuckoo/common/logtbl.py +++ b/lib/cuckoo/common/logtbl.py @@ -8,6 +8,7 @@ by hand. """ + table = ( ("__process__", "__init__", ("",)), ("__thread__", "__init__", ("",)), diff --git a/lib/cuckoo/common/objects.py b/lib/cuckoo/common/objects.py index e84028416eb..e31c3273556 100644 --- a/lib/cuckoo/common/objects.py +++ b/lib/cuckoo/common/objects.py @@ -348,7 +348,7 @@ def get_content_type(self): except magic.MagicException as e: log.error("Magic error: %s", str(e)) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) if not file_type and hasattr(magic, "open"): try: ms = magic.open(magic.MAGIC_MIME | magic.MAGIC_SYMLINK) @@ -356,7 +356,7 @@ def get_content_type(self): file_type = ms.file(self.file_path) ms.close() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) if file_type is None: try: @@ -365,7 +365,7 @@ def get_content_type(self): ) file_type = p.stdout.read().strip() except Exception as e: - log.error(e, exc_info=True) + log.exception(e) return file_type @@ -414,7 +414,7 @@ def get_type(self): File.notified_pefile = True log.warning("Unable to import pefile (install with `pip3 install pefile`)") except Exception as e: - log.error(e, exc_info=True) + log.exception(e) if not self.file_type: self.file_type = self.get_content_type() diff --git a/lib/cuckoo/common/url_validate.py b/lib/cuckoo/common/url_validate.py index 9df8de2a461..8ce2b626509 100644 --- a/lib/cuckoo/common/url_validate.py +++ b/lib/cuckoo/common/url_validate.py @@ -17,7 +17,10 @@ # protocol identifier r"(?:(?:https?|ftp|tcp|udp)://)" # user:pass authentication - r"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" r"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?" r"(?:" r"(?P" + r"(?:[-a-z\u00a1-\uffff0-9._~%!$&'()*+,;=:]+" + r"(?::[-a-z0-9._~%!$&'()*+,;=:]*)?@)?" + r"(?:" + r"(?P" # IP address exclusion # private & local networks r"(?:(?:10|127)" + ip_middle_octet + r"{2}" + ip_last_octet + r")|" @@ -25,13 +28,19 @@ r"(?:172\.(?:1[6-9]|2\d|3[0-1])" + ip_middle_octet + ip_last_octet + r"))" r"|" # private & local hosts - r"(?P" r"(?:localhost))" r"|" + r"(?P" + r"(?:localhost))" + r"|" # IP address dotted notation octets # excludes loopback network 0.0.0.0 # excludes reserved space >= 224.0.0.0 # excludes network & broadcast addresses # (first & last IP address of each class) - r"(?P" r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" r"" + ip_middle_octet + r"{2}" r"" + ip_last_octet + r")" r"|" + r"(?P" + r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" + r"" + ip_middle_octet + r"{2}" + r"" + ip_last_octet + r")" + r"|" # IPv6 RegEx from https://stackoverflow.com/a/17871737 r"\[(" # 1:2:3:4:5:6:7:8 @@ -59,16 +68,23 @@ r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255 # (IPv4-mapped IPv6 addresses and IPv4-translated addresses) - r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" r"([0-9a-fA-F]{1,4}:){1,4}:" r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" + r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|" + r"([0-9a-fA-F]{1,4}:){1,4}:" + r"((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}" # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33 # (IPv4-Embedded IPv6 Address) - r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" r")\]|" + r"(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])" + r")\]|" # host name - r"(?:(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" + r"(?:(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)" # domain name - r"(?:\.(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" + r"(?:\.(?:(?:xn--[-]{0,2})|[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]-?)*" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]+)*" # TLD identifier - r"(?:\.(?:(?:xn--[-]{0,2}[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" r")" + r"(?:\.(?:(?:xn--[-]{0,2}[a-z\u00a1-\uffff\U00010000-\U0010ffff0-9]{2,})|" + r"[a-z\u00a1-\uffff\U00010000-\U0010ffff]{2,}))" + r")" # port number r"(?::\d{2,5})?" # resource path @@ -76,7 +92,8 @@ # query string r"(?:\?\S*)?" # fragment - r"(?:#\S*)?" r"$", + r"(?:#\S*)?" + r"$", re.UNICODE | re.IGNORECASE, ) diff --git a/lib/cuckoo/common/web_utils.py b/lib/cuckoo/common/web_utils.py index 212ed44e75d..8dd260ef922 100644 --- a/lib/cuckoo/common/web_utils.py +++ b/lib/cuckoo/common/web_utils.py @@ -76,9 +76,8 @@ if dist_conf.distributed.enabled: try: # Tags - from lib.cuckoo.common.dist_db import Machine, Node + from lib.cuckoo.common.dist_db import Machine, Node, create_session from lib.cuckoo.common.dist_db import Task as DTask - from lib.cuckoo.common.dist_db import create_session HAVE_DIST = True dist_session = create_session(dist_conf.distributed.db) @@ -1588,7 +1587,7 @@ def _malwarebazaar_dl(hash: str) -> bytes: except pyzipper.zipfile.BadZipFile: print("_malwarebazaar_dl", data.content[:100]) except Exception as e: - logging.error(e, exc_info=True) + log.exception(e) return sample diff --git a/lib/cuckoo/core/database.py b/lib/cuckoo/core/database.py index 2f2a663acf0..8885d3cf807 100644 --- a/lib/cuckoo/core/database.py +++ b/lib/cuckoo/core/database.py @@ -661,7 +661,7 @@ def delete_machine(self, name) -> bool: self.session.delete(machine) return True else: - log.warning(f"{name} does not exist in the database.") + log.warning("%s does not exist in the database.", name) return False def add_machine( @@ -1329,7 +1329,7 @@ def _identify_aux_func(self, file: bytes, package: str, check_shellcode: bool = try: tmp_package = sflock_identify(f, check_shellcode=check_shellcode) except Exception as e: - log.error(f"Failed to sflock_ident due to {e}") + log.error("Failed to sflock_ident due to %s", str(e)) tmp_package = "generic" if tmp_package and tmp_package in sandbox_packages: @@ -1369,7 +1369,6 @@ def recon( cape=False, category=None, ): - # Get file filetype to ensure self extracting archives run longer if not isinstance(filename, str): filename = bytes2str(filename) diff --git a/lib/cuckoo/core/guest.py b/lib/cuckoo/core/guest.py index 8c268b72ecd..4614d658681 100644 --- a/lib/cuckoo/core/guest.py +++ b/lib/cuckoo/core/guest.py @@ -388,7 +388,7 @@ def wait_for_completion(self): ) continue except Exception as e: - log.error("Task #%s: Virtual machine %s /status failed. %s", self.task_id, self.vmid, e, exc_info=True) + log.exception("Task #%s: Virtual machine %s /status failed. %s", self.task_id, self.vmid, e) continue if status["status"] in ("complete", "failed"): diff --git a/lib/cuckoo/core/plugins.py b/lib/cuckoo/core/plugins.py index 9ee8c702d51..73ff3ca2b16 100644 --- a/lib/cuckoo/core/plugins.py +++ b/lib/cuckoo/core/plugins.py @@ -520,8 +520,8 @@ def process(self, signature): log.debug('Analysis matched signature "%s"', signature.name) # Return information on the matched signature. return signature.as_result() - except KeyError as e: - log.error('Failed to run signature "%s": %s', signature.name, e) + except (KeyError, TypeError, AttributeError) as e: + log.debug('Failed to run signature "%s": %s', signature.name, e) except NotImplementedError: return None except Exception as e: @@ -614,6 +614,8 @@ def run(self, test_signature: str = False): stats[sig.name] += timediff except NotImplementedError: continue + except (KeyError, TypeError, AttributeError) as e: + log.debug('Failed to run signature "%s": %s', sig.name, e) except Exception as e: log.exception('Failed run on_complete() method for signature "%s": %s', sig.name, e) continue diff --git a/lib/cuckoo/core/resultserver.py b/lib/cuckoo/core/resultserver.py index 4d952001a9e..6407799c702 100644 --- a/lib/cuckoo/core/resultserver.py +++ b/lib/cuckoo/core/resultserver.py @@ -383,7 +383,6 @@ def parse_message(self, buffer): argdict = {argnames[i]: converters[i](arg) for i, arg in enumerate(args)} if apiname == "__process__": - # pid = argdict["ProcessIdentifier"] ppid = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] @@ -464,7 +463,7 @@ def create_folders(self): try: create_folder(self.storagepath, folder=folder.decode()) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) # ToDo # except CuckooOperationalError as e: # log.error("Unable to create folder %s", folder) @@ -511,7 +510,7 @@ def handle(self, sock, addr): with protocol: protocol.handle() except CuckooOperationalError as e: - log.error(e, exc_info=True) + log.exception(e) finally: with self.task_mgmt_lock: s.discard(ctx) diff --git a/lib/cuckoo/core/startup.py b/lib/cuckoo/core/startup.py index 8c202b0d426..468f751d5dc 100644 --- a/lib/cuckoo/core/startup.py +++ b/lib/cuckoo/core/startup.py @@ -199,9 +199,7 @@ def check_linux_dist(): with suppress(AttributeError): platform_details = platform.dist() if platform_details[0] != "Ubuntu" and platform_details[1] not in ubuntu_versions: - log.info( - f"[!] You are using NOT supported Linux distribution by devs! Any issue report is invalid! We only support Ubuntu LTS {ubuntu_versions}" - ) + log.info("[!] You are using NOT supported Linux distribution by devs! Any issue report is invalid! We only support Ubuntu LTS %s", ubuntu_versions) def init_logging(level: int): @@ -562,4 +560,4 @@ def check_vms_n_resultserver_networking(): vm_ip, vm_rs = network # is there are better way to check networkrange without range CIDR? if not resultserver_block.startswith(vm_ip) or (vm_rs and not vm_rs.startswith(vm_ip)): - log.error(f"Your resultserver and VM:{vm} are in different nework ranges. This might give you: CuckooDeadMachine") + log.error("Your resultserver and VM: %s are in different nework ranges. This might give you: CuckooDeadMachine", vm) diff --git a/modules/auxiliary/AzSniffer.py b/modules/auxiliary/AzSniffer.py index 9f14eafebd1..68538834813 100644 --- a/modules/auxiliary/AzSniffer.py +++ b/modules/auxiliary/AzSniffer.py @@ -95,13 +95,13 @@ def create_packet_capture(self, custom_filters): result = poller.result() self.blob_url = result.storage_location.storage_path - log.info(f"Started Azure Network Watcher packet capture: {self.capture_name}") - log.debug(f"Blob URL for packet capture: {self.blob_url}") + log.info("Started Azure Network Watcher packet capture: %s",self.capture_name) + log.debug("Blob URL for packet capture: %s", self.blob_url) except AzureError as e: - log.error(f"Azure error occurred while creating packet capture: {str(e)}") + log.error("Azure error occurred while creating packet capture: %s", str(e)) raise except Exception as e: - log.error(f"Unexpected error occurred while creating packet capture: {str(e)}") + log.error("Unexpected error occurred while creating packet capture: %s", str(e)) raise def stop(self): @@ -124,11 +124,11 @@ def stop_packet_capture(self): packet_capture_name=self.capture_name, ) poller.result() - log.info(f"Stopped Azure Network Watcher packet capture: {self.capture_name}") + log.info("Stopped Azure Network Watcher packet capture: %s", self.capture_name) except AzureError as e: - log.error(f"Azure error occurred while stopping packet capture: {str(e)}") + log.error("Azure error occurred while stopping packet capture: %s", str(e)) except Exception as e: - log.error(f"Unexpected error occurred while stopping packet capture: {str(e)}") + log.error("Unexpected error occurred while stopping packet capture: %s", str(e)) def download_packet_capture(self): if not self.blob_url: @@ -147,22 +147,22 @@ def download_packet_capture(self): blob_client = self.blob_service_client.get_blob_client(container=container_name, blob=blob_name) self._download_to_file(blob_client, primary_output_file) - log.info(f"Downloaded packet capture for task {self.task.id} to {primary_output_file}") + log.info("Downloaded packet capture for task %s to %s", str(self.task.id), primary_output_file) self.convert_cap_to_pcap(primary_output_file) except AzureError as e: - log.error(f"Azure error occurred while downloading packet capture: {str(e)}") + log.error("Azure error occurred while downloading packet capture: %s", str(e)) self._try_fallback_download(blob_client, fallback_output_file) except Exception as e: - log.error(f"Unexpected error occurred while downloading packet capture: {str(e)}") + log.error("Unexpected error occurred while downloading packet capture: %s", str(e)) self._try_fallback_download(blob_client, fallback_output_file) def _try_fallback_download(self, blob_client, fallback_output_file): try: self._download_to_file(blob_client, fallback_output_file) - log.info(f"Downloaded packet capture for task {self.task.id} to fallback location {fallback_output_file}") + log.info("Downloaded packet capture for task %s to fallback location %s", self.task.id, fallback_output_file) self.convert_cap_to_pcap(fallback_output_file) except Exception as e: - log.error(f"Failed to download packet capture to fallback location: {str(e)}") + log.error("Failed to download packet capture to fallback location: %s", str(e)) def _download_to_file(self, blob_client, output_file): os.makedirs(os.path.dirname(output_file), exist_ok=True) @@ -178,12 +178,12 @@ def convert_cap_to_pcap(self, cap_file_path): try: os.makedirs(output_dir, exist_ok=True) subprocess.run(convert_cmd, check=True, capture_output=True, text=True) - log.info(f"Converted .cap file to .pcap: {pcap_file_path}") + log.info("Converted .cap file to .pcap: {pcap_file_path}") os.remove(cap_file_path) # Remove the original .cap file except subprocess.CalledProcessError as e: - log.error(f"Failed to convert .cap file to .pcap: {e.stderr}") + log.error("Failed to convert .cap file to .pcap: %s", str(e.stderr)) except OSError as e: - log.error(f"Failed to create directory or remove .cap file: {e}") + log.error("Failed to create directory or remove .cap file: %s", str(e)) def delete_packet_capture(self): try: @@ -193,11 +193,11 @@ def delete_packet_capture(self): packet_capture_name=self.capture_name, ) poller.result() - log.info(f"Deleted Azure Network Watcher packet capture: {self.capture_name}") + log.info("Deleted Azure Network Watcher packet capture: %s", self.capture_name) except AzureError as e: - log.error(f"Azure error occurred while deleting packet capture: {str(e)}") + log.error("Azure error occurred while deleting packet capture: %s", str(e)) except Exception as e: - log.error(f"Unexpected error occurred while deleting packet capture: {str(e)}") + log.error("Unexpected error occurred while deleting packet capture: %s", str(e)) def set_task(self, task): self.task = task diff --git a/modules/auxiliary/QemuScreenshots.py b/modules/auxiliary/QemuScreenshots.py index 0c54b5cb21c..30e1d81a141 100644 --- a/modules/auxiliary/QemuScreenshots.py +++ b/modules/auxiliary/QemuScreenshots.py @@ -102,7 +102,7 @@ def run(self): # log.info(f'Screenshot saved to {file_path}') img_counter += 1 except (IOError, libvirt.libvirtError) as e: - log.error(f"Cannot take screenshot: {e}") + log.error("Cannot take screenshot: %s", str(e)) continue def _take_screenshot(self): diff --git a/modules/machinery/aws.py b/modules/machinery/aws.py index 2bb3fe12cfc..c247a473bfc 100644 --- a/modules/machinery/aws.py +++ b/modules/machinery/aws.py @@ -52,7 +52,7 @@ def _initialize_check(self): self.ec2_machines = {} self.dynamic_machines_sequence = 0 self.dynamic_machines_count = 0 - log.info("connecting to AWS:{}".format(self.options.aws.region_name)) + log.info("connecting to AWS: %s", self.options.aws.region_name) # Performing a check to see if the access and secret keys were passed through the configuration file access_key = getattr(self.options.aws, "aws_access_key_id", None) @@ -81,7 +81,7 @@ def _initialize_check(self): ] ): if self._is_autoscaled(instance): - log.info("Terminating autoscaled instance %s" % instance.id) + log.info("Terminating autoscaled instance %s", instance.id) instance.terminate() instance_ids = self._list() @@ -165,7 +165,7 @@ def _allocate_new_machine(self): break except Exception as e: attempts += 1 - log.warning(f"Failed while creating new instance {e}. Trying again.") + log.warning("Failed while creating new instance %s. Trying again.", str(e)) instance = None if instance is None: @@ -200,7 +200,7 @@ def _start_or_create_machines(self): # if no sufficient machines left -> launch a new machines while autoscale_options["autoscale"] and current_available_machines < running_machines_gap: if self.dynamic_machines_count >= dynamic_machines_limit: - log.debug("Reached dynamic machines limit - %d machines" % dynamic_machines_limit) + log.debug("Reached dynamic machines limit - %d machines", dynamic_machines_limit) break if not self._allocate_new_machine(): break @@ -245,10 +245,10 @@ def _status(self, label): status = AWS.ERROR else: status = AWS.ERROR - log.info("instance state: {}".format(status)) + log.info("instance state: %s", status) return status except Exception as e: - log.exception("can't retrieve the status: {}".format(e)) + log.exception("can't retrieve the status: %s", e) return AWS.ERROR """override Machinery method""" @@ -259,8 +259,7 @@ def start(self, label): @param label: virtual machine label. @raise CuckooMachineError: if unable to start. """ - log.debug("Starting vm {}".format(label)) - + log.debug("Starting vm %s", label) if not self._is_autoscaled(self.ec2_machines[label]): self.ec2_machines[label].start() self._wait_status(label, AWS.RUNNING) @@ -274,7 +273,7 @@ def stop(self, label): @param label: virtual machine label. @raise CuckooMachineError: if unable to stop. """ - log.debug("Stopping vm %s" % label) + log.debug("Stopping vm %s", label) status = self._status(label) @@ -364,7 +363,7 @@ def _restore(self, label): This method detaches and deletes the current volume, then creates a new one and attaches it. :param label: machine label """ - log.info("restoring machine: {}".format(label)) + log.info("restoring machine: %s", label) vm_info = self.db.view_machine_by_label(label) snap_id = vm_info.snapshot instance = self.ec2_machines[label] @@ -378,7 +377,7 @@ def _restore(self, label): log.debug("Detaching %s", old_volume.id) resp = instance.detach_volume(VolumeId=old_volume.id, Force=True) - log.debug("response: {}".format(resp)) + log.debug("response: %s", resp) while True: old_volume.reload() if old_volume.state != "in-use": @@ -412,7 +411,7 @@ def _restore(self, label): log.debug("Attaching new volume") resp = instance.attach_volume(VolumeId=new_volume.id, Device="/dev/sda1") - log.debug("response {}".format(resp)) + log.debug("response %s", resp) while True: new_volume.reload() if new_volume.state != "available": @@ -421,4 +420,4 @@ def _restore(self, label): log.debug("new volume %s in state %s", new_volume.id, new_volume.state) if new_volume.state != "in-use": new_volume.delete() - raise CuckooMachineError("New volume turned into state %s instead of 'in-use'" % old_volume.state) + raise CuckooMachineError("New volume turned into state %s instead of 'in-use'", old_volume.state) diff --git a/modules/machinery/az.py b/modules/machinery/az.py index fba0172b769..75c72a2b21a 100644 --- a/modules/machinery/az.py +++ b/modules/machinery/az.py @@ -24,7 +24,6 @@ HAVE_AZURE = True except ImportError: - print("Missing machinery-required libraries.") print("poetry run pip install azure-identity msrest msrestazure azure-mgmt-compute azure-mgmt-network") @@ -158,14 +157,14 @@ def _initialize(self): # scale set, which is bad for Cuckoo logic if scale_set_opts["initial_pool_size"] <= 0: raise CuckooCriticalError( - f"The initial pool size for VMSS '{scale_set_id}' is 0. Please set it to a positive integer." + "The initial pool size for VMSS '%s' is 0. Please set it to a positive integer.", scale_set_id ) # Insert the scale_set_opts into the module.scale_sets attribute mmanager_opts["scale_sets"][scale_set_id] = scale_set_opts except (AttributeError, CuckooCriticalError) as e: - log.warning(f"Configuration details about scale set {scale_set_id.strip()} are missing: {e}") + log.warning("Configuration details about scale set %s are missing: %s", str(scale_set_id.strip()), str(e)) continue def _initialize_check(self): @@ -243,7 +242,7 @@ def _thr_refresh_clients(self): and compute clients using an updated ClientSecretCredential object. """ - log.debug(f"Connecting to Azure for the region '{self.options.az.region_name}'.") + log.debug("Connecting to Azure for the region '%s'.", self.options.az.region_name) # Getting an updated ClientSecretCredential credentials = self._get_credentials() @@ -287,7 +286,7 @@ def _set_vmss_stage(self): operation=self.compute_client.gallery_images.get, ) except CuckooMachineError: - raise CuckooCriticalError(f"Gallery image '{scale_set_values.gallery_image_name}' does not exist") + raise CuckooCriticalError("Gallery image '%s' does not exist", scale_set_values.gallery_image_name) # Map the Image Reference to the VMSS self.required_vmsss[scale_set_id]["platform"] = scale_set_values.platform.capitalize() @@ -298,13 +297,13 @@ def _set_vmss_stage(self): # All required VMSSs must have an image reference, tag and os for required_vmss_name, required_vmss_values in self.required_vmsss.items(): if required_vmss_values["image"] is None: - raise CuckooCriticalError(f"The VMSS '{required_vmss_name}' does not have an image reference.") + raise CuckooCriticalError("The VMSS '5s' does not have an image reference.", required_vmss_name) elif required_vmss_values["tag"] is None: - raise CuckooCriticalError(f"The VMSS '{required_vmss_name}' does not have an tag.") + raise CuckooCriticalError("The VMSS '%s' does not have an tag.", required_vmss_name) elif required_vmss_values["platform"] is None: - raise CuckooCriticalError(f"The VMSS '{required_vmss_name}' does not have an OS value.") + raise CuckooCriticalError("The VMSS '%s' does not have an OS value.", required_vmss_name) elif required_vmss_values["initial_pool_size"] is None: - raise CuckooCriticalError(f"The VMSS '{required_vmss_name}' does not have an initial pool size.") + raise CuckooCriticalError("The VMSS '%s' does not have an initial pool size.", required_vmss_name) self._process_pre_existing_vmsss() self._check_cpu_cores() @@ -329,11 +328,9 @@ def _process_pre_existing_vmsss(self): # Delete incorrectly named VMSSs or mark them as existing for vmss in existing_vmsss: - # If a VMSS does not have any tags or does not have the tag that we use to indicate that it is used for # Cuckoo (AUTO_SCALE_CAPE key-value pair), ignore if not vmss.tags or not vmss.tags.get(Azure.AUTO_SCALE_CAPE_KEY) == Azure.AUTO_SCALE_CAPE_VALUE: - # Ignoring... unless! They have one of the required names of the VMSSs that we are going to create if vmss.name in self.required_vmsss.keys(): async_delete_vmss = Azure._azure_api_call( @@ -493,7 +490,7 @@ def start(self, label=None): # Something bad happened, we are starting a task on a machine that needs to be deleted with vms_currently_being_deleted_lock: if label in vms_currently_being_deleted: - raise CuckooMachineError(f"Attempting to start a task with machine {label} while it is scheduled for deletion.") + raise CuckooMachineError("Attempting to start a task with machine %s while it is scheduled for deletion.", label) def stop(self, label=None): """ @@ -501,7 +498,7 @@ def stop(self, label=None): @param label: virtual machine label @return: End method call """ - log.debug(f"Stopping machine '{label}'") + log.debug("Stopping machine '%s'", label) # Parse the tag and instance id out to confirm which VMSS to modify vmss_name, instance_id = label.split("_") # If we aren't scaling down, then reimage @@ -561,7 +558,7 @@ def _add_machines_to_db(self, vmss_name): @param vmss_name: the name of the VMSS to be queried """ try: - log.debug(f"Adding machines to database for {vmss_name}.") + log.debug("Adding machines to database for %s.", vmss_name) # We don't want to re-add machines! Therefore, let's see what we're working with machines_in_db = self.db.list_machines() db_machine_labels = [machine.label for machine in machines_in_db] @@ -595,13 +592,13 @@ def _add_machines_to_db(self, vmss_name): continue if vmss_vm.name in vms_to_avoid_adding: # Don't add it if it is currently being deleted! - log.debug(f"{vmss_vm.name} is currently being deleted!") + log.debug("%s is currently being deleted!", vmss_vm.name) continue # According to Microsoft, the OS type is... platform = vmss_vm.storage_profile.os_disk.os_type.lower() if not vmss_vm.network_profile: - log.error(f"{vmss_vm.name} does not have a network profile") + log.error("%s does not have a network profile", vmss_vm.name) continue vmss_vm_nic = next( @@ -614,7 +611,7 @@ def _add_machines_to_db(self, vmss_name): ) if not vmss_vm_nic: log.error( - f"{vmss_vm.network_profile.network_interfaces[0].id.lower()} does not match any NICs in {[vmss_vm_nic.id.lower() for vmss_vm_nic in vmss_vm_nics]}" + "%s does not match any NICs in %s", vmss_vm.network_profile.network_interfaces[0].id.lower(), str([vmss_vm_nic.id.lower() for vmss_vm_nic in vmss_vm_nics]) ) continue # Sets "new_machine" object in configuration object to @@ -623,7 +620,7 @@ def _add_machines_to_db(self, vmss_name): private_ip = vmss_vm_nic.ip_configurations[0].private_ip_address if private_ip in db_machine_ips: - log.error(f"The IP '{private_ip}' is already associated with a machine in the DB. Moving on...") + log.error("The IP '%s' is already associated with a machine in the DB. Moving on...", private_ip) continue # Add machine to DB. @@ -659,15 +656,15 @@ def _add_machines_to_db(self, vmss_name): try: thr.join() except CuckooGuestCriticalTimeout: - log.debug(f"Rough start for {vm}, deleting.") + log.debug("Rough start for %s, deleting.", vm) self.delete_machine(vm) raise except Exception as e: - log.error(repr(e), exc_info=True) + log.exception(repr(e)) # If no machines on any VMSSs are in the db when we leave this method, CAPE will crash. if not self.machines() and self.required_vmsss[vmss_name]["retries"] > 0: - log.warning(f"No available VMs after initializing {vmss_name}. Attempting to reinitialize VMSS.") + log.warning("No available VMs after initializing %s. Attempting to reinitialize VMSS.", vmss_name) self.required_vmsss[vmss_name]["retries"] -= 1 start_time = timeit.default_timer() @@ -678,14 +675,14 @@ def _add_machines_to_db(self, vmss_name): continue self._update_or_create_vmsss(vmsss_dict={vmss_name: self.required_vmsss[vmss_name]}) return - log.debug(f"{vmss_name} initialize retry failed. Timed out waiting for VMs to be deleted.") + log.debug("%s initialize retry failed. Timed out waiting for VMs to be deleted.", vmss_name) def _delete_machines_from_db_if_missing(self, vmss_name): """ Delete machine from database if it does not exist in the VMSS. @param vmss_name: the name of the VMSS to be queried """ - log.debug(f"Deleting machines from database if they do not exist in the VMSS {vmss_name}.") + log.debug("Deleting machines from database if they do not exist in the VMSS %s.", vmss_name) # Get all VMs in the VMSS paged_vmss_vms = Azure._azure_api_call( self.options.az.sandbox_resource_group, @@ -735,16 +732,16 @@ def _thr_wait_for_ready_machine(machine_name, machine_ip): # We did it! break except socket.timeout: - log.debug(f"{machine_name}: Initializing...") + log.debug("%s: Initializing...", machine_name) except socket.error: - log.debug(f"{machine_name}: Initializing...") + log.debug("%s: Initializing...", machine_name) if (timeit.default_timer() - start) >= timeout: # We didn't do it :( raise CuckooGuestCriticalTimeout( - f"Machine {machine_name}: the guest initialization hit the critical timeout, analysis aborted." + "Machine %s: the guest initialization hit the critical timeout, analysis aborted.", machine_name ) time.sleep(10) - log.debug(f"Machine {machine_name} was created and available in {round(timeit.default_timer() - start)}s") + log.debug("Machine %s was created and available in %d s", machine_name, round(timeit.default_timer() - start)) @staticmethod def _azure_api_call(*args, **kwargs): @@ -764,13 +761,13 @@ def _azure_api_call(*args, **kwargs): api_call = f"{operation}({args},{kwargs})" try: - log.debug(f"Trying {api_call}") + log.debug("Trying %s", api_call) results = operation(*args, **kwargs) except Exception as exc: # For ClientRequestErrors, they do not have the attribute 'error' error = exc.error.error if getattr(exc, "error", False) else exc log.warning( - f"Failed to {api_call} due to the Azure error '{error}': '{exc.message if hasattr(exc, 'message') else repr(exc)}'." + "Failed to 5s due to the Azure error '%s': '%s'.", str(api_call), str(error), f"{exc.message if hasattr(exc, 'message') else repr(exc)}" ) if "NotFound" in repr(exc) or (hasattr(exc, "status_code") and exc.status_code == 404): # Note that this exception is used to represent if an Azure resource @@ -782,7 +779,7 @@ def _azure_api_call(*args, **kwargs): # Log the subscription limits headers = results._response.headers log.debug( - f"API Charge: {headers['x-ms-request-charge']}; Remaining Calls: {headers['x-ms-ratelimit-remaining-resource']}" + "API Charge: %s; Remaining Calls: %s", headers['x-ms-request-charge'], headers['x-ms-ratelimit-remaining-resource'] ) return results @@ -804,7 +801,7 @@ def _thr_create_vmss(self, vmss_name, vmss_image_ref, vmss_image_os): ).id # note the id attribute here except CuckooMachineError: raise CuckooCriticalError( - f"Subnet '{self.options.az.subnet}' does not exist in Virtual Network '{self.options.az.vnet}'" + "Subnet '%s' does not exist in Virtual Network '%s'", self.options.az.subnet, self.options.az.vnet ) vmss_managed_disk = models.VirtualMachineScaleSetManagedDiskParameters( @@ -915,7 +912,7 @@ def _thr_reimage_vmss(self, vmss_name): ) _ = self._handle_poller_result(async_restart_vmss) else: - log.error(repr(e), exc_info=True) + log.exception(repr(e)) raise with self.db.session.begin(): self._add_machines_to_db(vmss_name) @@ -985,7 +982,7 @@ def _scale_machine_pool(self, tag, per_platform=False): if number_of_relevant_machines_required > self.subnet_limit: number_of_relevant_machines_required = self.subnet_limit - log.debug("Scaling limited by the size of the subnet: %s" % self.subnet_limit) + log.debug("Scaling limited by the size of the subnet: %s", self.subnet_limit) number_of_machines = len(self.db.list_machines()) projected_total_machines = number_of_machines - number_of_relevant_machines + number_of_relevant_machines_required @@ -1024,7 +1021,7 @@ def _scale_machine_pool(self, tag, per_platform=False): number_of_relevant_machines + number_of_new_cpus_available / self.instance_type_cpus ) log.debug( - f"Quota could be exceeded with projected number of machines ({old_number_of_relevant_machines_required}). Setting new limit to {number_of_relevant_machines_required}" + "Quota could be exceeded with projected number of machines (%s). Setting new limit to %s", str(old_number_of_relevant_machines_required), str(number_of_relevant_machines_required) ) if machine_pools[vmss_name]["size"] == number_of_relevant_machines_required: @@ -1034,7 +1031,7 @@ def _scale_machine_pool(self, tag, per_platform=False): self._delete_machines_from_db_if_missing(vmss_name) # Update the VMSS size accordingly machine_pools[vmss_name]["size"] = len(self._get_relevant_machines(tag)) - log.debug(f"The size of the machine pool {vmss_name} is already the size that we want") + log.debug("The size of the machine pool %s is already the size that we want", vmss_name) machine_pools[vmss_name]["is_scaling"] = False if platform: is_platform_scaling[platform] = False @@ -1058,7 +1055,7 @@ def _scale_machine_pool(self, tag, per_platform=False): if relevant_task_queue == initial_number_of_locked_relevant_machines == 0: # The VMSS will scale in via the ScaleInPolicy. machine_pools[vmss_name]["wait"] = True - log.debug(f"System is at rest, scale down {vmss_name} capacity and delete machines.") + log.debug("System is at rest, scale down %s capacity and delete machines.", vmss_name) # System is not at rest, but task queue is 0, therefore set machines in use to delete elif relevant_task_queue == 0: machine_pools[vmss_name]["is_scaling_down"] = True @@ -1079,7 +1076,7 @@ def _scale_machine_pool(self, tag, per_platform=False): # We don't want to be stuck in this for longer than the timeout specified if (timeit.default_timer() - start_time) > AZURE_TIMEOUT: - log.debug(f"Breaking out of the while loop within the scale down section for {vmss_name}.") + log.debug("Breaking out of the while loop within the scale down section for %s.", vmss_name) break # Get the updated number of relevant machines required relevant_task_queue = self._get_number_of_relevant_tasks(tag) @@ -1094,7 +1091,7 @@ def _scale_machine_pool(self, tag, per_platform=False): # Relaxxxx time.sleep(self.options.az.scale_down_polling_period) log.debug( - f"Scaling {vmss_name} down until new task is received. {number_of_relevant_machines} -> {number_of_relevant_machines_required}" + "Scaling %s down until new task is received. %s -> %s", vmss_name, str(number_of_relevant_machines), str(number_of_relevant_machines_required) ) # Get an updated count of relevant machines @@ -1112,7 +1109,7 @@ def _scale_machine_pool(self, tag, per_platform=False): return # Update the capacity of the VMSS - log.debug(f"Scaling {vmss_name} size from {initial_capacity} -> {number_of_relevant_machines_required}") + log.debug("Scaling %s size from %s -> %s", vmss_name, initial_capacity, str(number_of_relevant_machines_required)) vmss = Azure._azure_api_call( self.options.az.sandbox_resource_group, vmss_name, @@ -1144,11 +1141,11 @@ def _scale_machine_pool(self, tag, per_platform=False): return timediff = timeit.default_timer() - start_time - log.debug(f"The scaling of {vmss_name} took {round(timediff)}s") + log.debug("The scaling of %s took %d s", vmss_name, round(timediff)) machine_pools[vmss_name]["size"] = number_of_relevant_machines_required # Alter the database based on if we scaled up or down - log.debug(f"Updated {vmss_name} capacity: {number_of_relevant_machines_required}; Initial capacity: {initial_capacity}") + log.debug("Updated %s capacity: %s; Initial capacity: %s", vmss_name, str(number_of_relevant_machines_required), str(initial_capacity)) if number_of_relevant_machines_required > initial_capacity: self._add_machines_to_db(vmss_name) else: @@ -1159,14 +1156,13 @@ def _scale_machine_pool(self, tag, per_platform=False): machine_pools[vmss_name]["is_scaling"] = False if platform: is_platform_scaling[platform] = False - log.debug(f"Scaling {vmss_name} has completed.") + log.debug("Scaling %s has completed.", vmss_name) except Exception as exc: machine_pools[vmss_name]["wait"] = False machine_pools[vmss_name]["is_scaling"] = False if platform: is_platform_scaling[platform] = False - log.error(repr(exc), exc_info=True) - log.debug(f"Scaling {vmss_name} has completed with errors {exc!r}.") + log.exception("Scaling %s has completed with errors %s.", vmss_name, str(exc)) @staticmethod def _handle_poller_result(lro_poller_object): @@ -1182,7 +1178,7 @@ def _handle_poller_result(lro_poller_object): raise CuckooMachineError(repr(e)) time_taken = timeit.default_timer() - start_time if time_taken >= AZURE_TIMEOUT: - raise CuckooMachineError(f"The task took {round(time_taken)}s to complete! Bad Azure!") + raise CuckooMachineError("The task took %ds to complete! Bad Azure!", round(time_taken)) else: return lro_poller_result @@ -1306,7 +1302,7 @@ def _thr_reimage_list_reader(self): operation=self.compute_client.virtual_machine_scale_sets.begin_reimage_all, ) except Exception as exc: - log.error(repr(exc), exc_info=True) + log.exception(repr(exc)) # If InvalidParameter: 'The provided instanceId x is not an active Virtual Machine Scale Set VM instanceId. # This means that the machine has been deleted # If BadRequest: The VM x creation in Virtual Machine Scale Set > with ephemeral disk is not complete. Please trigger a restart if required' @@ -1322,10 +1318,10 @@ def _thr_reimage_list_reader(self): for instance_id in instance_ids_that_should_not_be_reimaged_again: if "InvalidParameter" in repr(exc): - log.warning(f"Machine {vmss_to_reimage}_{instance_id} does not exist anymore. Deleting from database.") + log.warning("Machine %s does not exist anymore. Deleting from database.", f"{vmss_to_reimage}_{instance_id}") elif "BadRequest" in repr(exc): log.warning( - f"Machine {vmss_to_reimage}_{instance_id} cannot start due to ephemeral disk issues with Azure. Deleting from database and Azure." + "Machine %s cannot start due to ephemeral disk issues with Azure. Deleting from database and Azure.", f"{vmss_to_reimage}_{instance_id}" ) with vms_currently_being_deleted_lock: vms_currently_being_deleted.append(f"{vmss_to_reimage}_{instance_id}") @@ -1348,7 +1344,7 @@ def _thr_reimage_list_reader(self): reimaged = False log.warning( - f"Reimaging machines {instance_ids} in {vmss_to_reimage} took too long, deleting them from the DB and the VMSS." + "Reimaging machines %s in %s took too long, deleting them from the DB and the VMSS.", str(instance_ids), str(vmss_to_reimage) ) # That sucks, now we have mark each one for deletion for instance_id in instance_ids: @@ -1365,10 +1361,10 @@ def _thr_reimage_list_reader(self): current_vmss_operations -= 1 timediff = timeit.default_timer() - start_time log.debug( - f"{'S' if reimaged else 'Uns'}uccessfully reimaging instances {instance_ids} in {vmss_to_reimage} took {round(timediff)}s" + "%successfully reimaging instances %s in %s took %ds", {'S' if reimaged else 'Uns'}, str(instance_ids), str(vmss_to_reimage), round(timediff) ) except Exception as e: - log.error(f"Exception occurred in the reimage thread: {e}. Trying again...") + log.error("Exception occurred in the reimage thread: %s. Trying again...", str(e)) def _thr_delete_list_reader(self): global current_vmss_operations @@ -1411,7 +1407,7 @@ def _thr_delete_list_reader(self): operation=self.compute_client.virtual_machine_scale_sets.begin_delete_instances, ) except Exception as exc: - log.error(repr(exc), exc_info=True) + log.exception(repr(exc)) with current_operations_lock: current_vmss_operations -= 1 with vms_currently_being_deleted_lock: @@ -1423,7 +1419,7 @@ def _thr_delete_list_reader(self): while not async_delete_some_machines.done(): deleted = True if (timeit.default_timer() - start_time) > AZURE_TIMEOUT: - log.warning(f"Deleting machines {instance_ids} in {vmss_to_delete_from} took too long.") + log.warning("Deleting machines %s in %s took too long.", str(instance_ids), str(vmss_to_delete_from)) deleted = False break time.sleep(2) @@ -1431,7 +1427,7 @@ def _thr_delete_list_reader(self): if self.initializing and deleted: # All machines should have been removed from the db and the VMSS at this point. # To force the VMSS to scale to initial_pool_size, set the size to zero here. - log.debug(f"Setting size to 0 for VMSS {vmss_to_delete_from} after successful deletion") + log.debug("Setting size to 0 for VMSS %s after successful deletion", vmss_to_delete_from) machine_pools[vmss_to_delete_from]["size"] = 0 with vms_currently_being_deleted_lock: @@ -1441,7 +1437,7 @@ def _thr_delete_list_reader(self): with current_operations_lock: current_vmss_operations -= 1 log.debug( - f"{'S' if deleted else 'Uns'}uccessfully deleting instances {instance_ids} in {vmss_to_delete_from} took {round(timeit.default_timer() - start_time)}s" + "%successfully deleting instances %s in {vmss_to_delete_from} took %ss", 'S' if deleted else 'Uns', str(instance_ids), str(round(timeit.default_timer() - start_time)) ) except Exception as e: - log.error(f"Exception occurred in the delete thread: {e}. Trying again...") + log.error("Exception occurred in the delete thread: %s. Trying again...", str(e)) diff --git a/modules/machinery/vsphere.py b/modules/machinery/vsphere.py index 600d19499f4..692d2566aa6 100644 --- a/modules/machinery/vsphere.py +++ b/modules/machinery/vsphere.py @@ -88,7 +88,7 @@ def _initialize_check(self): sslContext = ssl._create_unverified_context() self.connect_opts["sslContext"] = sslContext - log.warn("Turning off SSL certificate verification!") + log.warning("Turning off SSL certificate verification!") # Check that a snapshot is configured for each machine # and that it was taken in a powered-on state diff --git a/modules/processing/amsi.py b/modules/processing/amsi.py index 0accb640888..ee33c5eb30f 100644 --- a/modules/processing/amsi.py +++ b/modules/processing/amsi.py @@ -23,10 +23,10 @@ def run(self): try: decoded = self.decode_event(json.loads(line)) except Exception: - log.exception(f"Failed to process line {idx} of {jsonl_file}.") + log.exception("Failed to process line %d of %s.", idx, jsonl_file) break result.append(decoded) - log.info(f"Processed {idx} AMSI event{'s' if idx != 1 else ''}.") + log.info("Processed %d AMSI event{'s' if idx != 1 else ''}.", idx) return result diff --git a/modules/processing/analysisinfo.py b/modules/processing/analysisinfo.py index 23d2d20aa5b..0bb71ab0afd 100644 --- a/modules/processing/analysisinfo.py +++ b/modules/processing/analysisinfo.py @@ -29,7 +29,7 @@ def get_running_commit() -> str: head_name = Path(git_folder, "HEAD").read_text().split("\n")[0].split(" ")[-1] return Path(git_folder, head_name).read_text().replace("\n", "") except Exception as e: - log.error(f"Error getting running commit hash: {e}") + log.error("Error getting running commit hash: %s", str(e)) return "unknown" diff --git a/modules/processing/behavior.py b/modules/processing/behavior.py index 02d598dd55f..abbd81bd8ec 100644 --- a/modules/processing/behavior.py +++ b/modules/processing/behavior.py @@ -310,7 +310,7 @@ def _parse(self, row): try: argument["value"] = convert_to_printable(arg_value, self.conversion_cache) except Exception: - log.error(arg_value, exc_info=True) + log.exception(arg_value) continue if not self.reporting_mode: if isinstance(arg_value_raw, bytes): diff --git a/modules/processing/network.py b/modules/processing/network.py index 22bb964bafd..c87b4d287a4 100644 --- a/modules/processing/network.py +++ b/modules/processing/network.py @@ -1108,7 +1108,6 @@ def _import_ja3_fprints(self): return ja3_fprints def run(self): - if not path_exists(self.pcap_path): log.debug('The PCAP file does not exist at path "%s"', self.pcap_path) return {} diff --git a/modules/processing/parsers/CAPE/Snake.py b/modules/processing/parsers/CAPE/Snake.py deleted file mode 100644 index 50356034693..00000000000 --- a/modules/processing/parsers/CAPE/Snake.py +++ /dev/null @@ -1,174 +0,0 @@ -import base64 -import hashlib -import logging -import re -import traceback - -import dnfile - -try: - from Cryptodome.Cipher import DES - from Cryptodome.Util.Padding import unpad -except ModuleNotFoundError: - raise ModuleNotFoundError("Please run: pip3 install pycryptodomex") - -log = logging.getLogger(__name__) -log.setLevel(logging.INFO) - - -def is_base64(s): - pattern = re.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)$") - if not s or len(s) < 1: - return False - else: - return pattern.match(s) - - -def pad(text): - n = len(text) % 8 - return text + (b" " * n) - - -def md5(string: bytes) -> bytes: - return bytes.fromhex(hashlib.md5(string).hexdigest()) - - -def handle_plain(dotnet_file, c2_type, user_strings): - user_strings_list = list(user_strings.values()) - if c2_type == "Telegram": - token = dotnet_file.net.user_strings.get(user_strings_list[15]).value.__str__() - chat_id = dotnet_file.net.user_strings.get(user_strings_list[16]).value.__str__() - return {"Type": "Telegram", "C2": f"https://api.telegram.org/bot{token}/sendMessage?chat_id={chat_id}"} - elif c2_type == "SMTP": - smtp_from = dotnet_file.net.user_strings.get(user_strings_list[7]).value.__str__() - smtp_password = dotnet_file.net.user_strings.get(user_strings_list[8]).value.__str__() - smtp_host = dotnet_file.net.user_strings.get(user_strings_list[9]).value.__str__() - smtp_to = dotnet_file.net.user_strings.get(user_strings_list[10]).value.__str__() - smtp_port = dotnet_file.net.user_strings.get(user_strings_list[11]).value.__str__() - return { - "Type": "SMTP", - "Host": smtp_host, - "Port": smtp_port, - "From Address": smtp_from, - "To Address": smtp_to, - "Password": smtp_password, - } - elif c2_type == "FTP": - ftp_username = dotnet_file.net.user_strings.get(user_strings_list[12]).value.__str__() - ftp_password = dotnet_file.net.user_strings.get(user_strings_list[13]).value.__str__() - ftp_host = dotnet_file.net.user_strings.get(user_strings_list[14]).value.__str__() - return {"Type": "FTP", "Host": ftp_host, "Username": ftp_username, "Password": ftp_password} - - -def handle_encrypted(dotnet_file, data, c2_type, user_strings): - # Match decrypt string pattern - decrypt_string_pattern = re.compile( - Rb"""(?x) - \x72(...)\x70 - \x7E(...)\x04 - \x28...\x06 - \x80...\x04 - """ - ) - - config_dict = None - decrypted_strings = [] - - matches2 = decrypt_string_pattern.findall(data) - for match in matches2: - string_index = int.from_bytes(match[0], "little") - user_string = dotnet_file.net.user_strings.get(string_index).value - # Skip user strings that are empty/not base64 - if user_string == "Yx74dJ0TP3M=" or not is_base64(user_string): - continue - field_row_index = int.from_bytes(match[1], "little") - field_name = dotnet_file.net.mdtables.Field.get_with_row_index(field_row_index).Name.__str__() - key_index = user_strings[field_name] - key_str = dotnet_file.net.user_strings.get(key_index).value.__str__() - key = md5(key_str.encode())[:8] - des = DES.new(key, DES.MODE_ECB) - - decoded_str = base64.b64decode(user_string) - padded_str = pad(decoded_str) - decrypted_text = des.decrypt(padded_str) - plaintext_bytes = unpad(decrypted_text, DES.block_size) - plaintext = plaintext_bytes.decode() - decrypted_strings.append(plaintext) - - if decrypted_strings: - if c2_type == "Telegram": - token, chat_id = decrypted_strings - config_dict = {"Type": "Telegram", "C2": f"https://api.telegram.org/bot{token}/sendMessage?chat_id={chat_id}"} - elif c2_type == "SMTP": - smtp_from, smtp_password, smtp_host, smtp_to, smtp_port = decrypted_strings - config_dict = { - "Type": "SMTP", - "Host": smtp_host, - "Port": smtp_port, - "From Address": smtp_from, - "To Address": smtp_to, - "Password": smtp_password, - } - elif c2_type == "FTP": - ftp_username, ftp_password, ftp_host = decrypted_strings - config_dict = {"Type": "FTP", "Host": ftp_host, "Username": ftp_username, "Password": ftp_password} - return config_dict - - -def extract_config(data): - - try: - dotnet_file = dnfile.dnPE(data=data) - except Exception as e: - log.debug(f"Exception when attempting to parse .NET file: {e}") - log.debug(traceback.format_exc()) - - # ldstr, stsfld - static_strings = re.compile( - Rb"""(?x) - \x72(...)\x70 - \x80(...)\x04 - """ - ) - - # Get user strings and C2 type - user_strings = {} - c2_type = None - matches = static_strings.findall(data) - for match in matches: - try: - string_index = int.from_bytes(match[0], "little") - string_value = dotnet_file.net.user_strings.get(string_index).value.__str__() - field_index = int.from_bytes(match[1], "little") - field_name = dotnet_file.net.mdtables.Field.get_with_row_index(field_index).Name.__str__() - if string_value == "$%TelegramDv$": - c2_type = "Telegram" - - elif string_value == "$%SMTPDV$": - c2_type = "SMTP" - - elif string_value == "%FTPDV$": - c2_type = "FTP" - else: - user_strings[field_name] = string_index - except Exception as e: - log.debug(f"There was an exception parsing user strings: {e}") - log.debug(traceback.format_exc()) - - if c2_type is None: - raise ValueError("Could not identify C2 type.") - - # Handle encrypted strings - config_dict = handle_encrypted(dotnet_file, data, c2_type, user_strings) - if config_dict is None: - # Handle plain strings - config_dict = handle_plain(dotnet_file, c2_type, user_strings) - - return config_dict - - -if __name__ == "__main__": - import sys - - with open(sys.argv[1], "rb") as f: - print(extract_config(f.read())) diff --git a/modules/processing/reversinglabs.py b/modules/processing/reversinglabs.py index 49e663d4924..a55a838f523 100644 --- a/modules/processing/reversinglabs.py +++ b/modules/processing/reversinglabs.py @@ -124,7 +124,7 @@ def run(self): return {} target = self.task["target"] - log.debug(f"Looking up: {target}") + log.debug("Looking up: %s", target) reversing_labs_response = reversing_labs_lookup(target) if "error" in reversing_labs_response: raise CuckooProcessingError(reversing_labs_response["msg"]) diff --git a/modules/reporting/browserext.py b/modules/reporting/browserext.py index 20872821dd8..3486ac3205f 100644 --- a/modules/reporting/browserext.py +++ b/modules/reporting/browserext.py @@ -23,5 +23,5 @@ def run(self, results): with open(browser_log_path, "r") as blp_fd: try: results["browser"]["requests"] = json.load(blp_fd) - except Exception as ex: - log.debug(f"error parsing browser requests json: {ex}") + except Exception as e: + log.debug("error parsing browser requests json: %s", str(e)) diff --git a/modules/reporting/maec5.py b/modules/reporting/maec5.py index bb237e251c7..59958b076aa 100644 --- a/modules/reporting/maec5.py +++ b/modules/reporting/maec5.py @@ -284,7 +284,6 @@ def add_dropped_files(self): # Grab list of all dropped files- remember # package['observable_objects'] is a dict where the key is object-ID for f in self.results["dropped"]: - # Create a new Malware Instance for each dropped file malwareInstance = self.create_malware_instance(f) @@ -432,7 +431,7 @@ def create_network_obj(self, value, obj): elif re.match("^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$", value): network_obj["type"] = "mac-addr" # Test for an IPv4 address - elif re.match("^(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[0-9]{1,2})){3}$", value): + elif re.match(r"^(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[0-9]{1,2})){3}$", value): network_obj["type"] = "ipv4-addr" obj["protocols"] = ["ipv4", "tcp"] else: diff --git a/modules/reporting/mongodb.py b/modules/reporting/mongodb.py index b0d55f2285a..f56a92e2737 100644 --- a/modules/reporting/mongodb.py +++ b/modules/reporting/mongodb.py @@ -170,12 +170,12 @@ def run(self, results): for j, parent_dict in enumerate(report[parent_key]): child_key, csize = self.debug_dict_size(parent_dict)[0] if csize > size_filter: - log.warn("results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) + log.warning("results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) del report[parent_key][j][child_key] else: child_key, csize = self.debug_dict_size(report[parent_key])[0] if csize > size_filter: - log.warn("results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) + log.warning("results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) del report[parent_key][child_key] try: mongo_insert_one("analysis", report) diff --git a/modules/reporting/tmpfsclean.py b/modules/reporting/tmpfsclean.py index 92e5b25e5f0..55b5e3e06f8 100644 --- a/modules/reporting/tmpfsclean.py +++ b/modules/reporting/tmpfsclean.py @@ -10,6 +10,7 @@ class TMPFSCLEAN(Report): "Remove/save memdump" + order = 9998 def run(self, results): diff --git a/modules/signatures/CAPE.py b/modules/signatures/CAPE.py index e03e40b0f0c..45b644d2a9c 100644 --- a/modules/signatures/CAPE.py +++ b/modules/signatures/CAPE.py @@ -140,7 +140,6 @@ def __init__(self, *args, **kwargs): filter_apinames = set(["NtAllocateVirtualMemory", "NtProtectVirtualMemory", "VirtualProtectEx"]) def on_call(self, call, process): - if process["process_name"] in ("WINWORD.EXE", "EXCEL.EXE", "POWERPNT.EXE"): return False if call["api"] == "NtAllocateVirtualMemory": @@ -571,7 +570,6 @@ def __init__(self, *args, **kwargs): self.transacted_hollowing = False def on_call(self, call, process): - if call["api"] == "RtlSetCurrentTransaction": self.transaction_set = True diff --git a/pyproject.toml b/pyproject.toml index 1dcb7194cd0..db93009380b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,13 +114,6 @@ httpretty = "^1.1.4" func-timeout = "^4.3.5" pre-commit = "^2.19.0" -[tool.ruff] -select = ["E", "F"] -ignore = ["E402","E501"] -exclude = [ - "./analyzer/linux/dbus_next", -] - [tool.black] line-length = 132 include = "\\.py(_disabled)?$" @@ -146,9 +139,48 @@ norecursedirs = "tests/zip_compound" requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" -[lint] -select = ["E", "F"] -ignore = ["E402","E501"] +[tool.ruff] +line-length = 132 +exclude = [ + "./analyzer/linux/dbus_next", +] + +[tool.ruff.lint] +select = [ + "F", # pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort + # "N", # pep8-naming + "G", # flake8-logging-format +] + +ignore = [ + "E501", # ignore due to conflict with formatter + "N818", # exceptions don't need the Error suffix + "E741", # allow ambiguous variable names + "E402", + "W605", # ToDo to fix - Invalid escape sequence +] + +fixable = ["ALL"] + +[tool.ruff.lint.per-file-ignores] +"stubs/*" = [ + "N", # naming conventions don't matter in stubs + "F403", # star imports are okay in stubs + "F405", # star imports are okay in stubs +] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" + +[tool.ruff.lint.isort] +known-first-party = ["libqtile", "test"] +default-section = "third-party" [tool.mypy] warn_unused_configs = true diff --git a/tests/grab_samples.py b/tests/grab_samples.py index d74bfd5747b..87187835f2b 100644 --- a/tests/grab_samples.py +++ b/tests/grab_samples.py @@ -43,7 +43,7 @@ def get_filepaths(directory, args): def load_sample_lists(args): sample_json_list = get_filepaths("tests/Extractors/StandAlone/unit_tests", args) for sample_json_location in sample_json_list: - logging.warning("Found sample.json: " + sample_json_location) + logging.warning("Found sample.json: %s", sample_json_location) with open(sample_json_location, "r") as samples: sample_dict = json.load(samples) for hash_item in sample_dict["hashes"]: @@ -60,7 +60,7 @@ def run(args): if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Grab malicious samples from sample.json files via https://10.203.112.173/centralrepo/" + description="Grab malicious samples from sample.json" ) parser.add_argument("--family", action="store", dest="family", type=str) diff --git a/tests/integrity.py b/tests/integrity.py index b36bba3a635..d725de75b71 100644 --- a/tests/integrity.py +++ b/tests/integrity.py @@ -9,6 +9,7 @@ that there are no remaining tasks in the queue this utility will clean the entire database before starting various analyses. """ + import argparse import json import logging diff --git a/tests/tcr_misc.py b/tests/tcr_misc.py index 2406a2236fd..8c7771973eb 100644 --- a/tests/tcr_misc.py +++ b/tests/tcr_misc.py @@ -65,7 +65,7 @@ def get_malware_paths(path): def get_sample(hash, download_location): if os.path.isfile(download_location) and hash == hashlib.sha256(open(download_location, "rb").read()).hexdigest(): - logging.warning(download_location + " already there, skipping!") + logging.warning("%s already there, skipping!", download_location) else: r = s.get(SAMPLE_STORAGE + hash, verify=False, timeout=10) if r and r.status_code == 200: @@ -74,7 +74,7 @@ def get_sample(hash, download_location): raise Exception("Hashes doens't match") with open(download_location, mode="wb+") as file: file.write(r.content) - logging.warning(download_location + " grabbed!") + logging.warning("%s grabbed!", download_location) else: - logging.warning("Status code: {} - content: {}".format(r.status_code, r.content)) + logging.warning("Status code: %d - content: %s", r.status_code, r.text) raise Exception("Non 200 status code") diff --git a/tests/test_objects.py b/tests/test_objects.py index d6859890375..8752f4b02b6 100644 --- a/tests/test_objects.py +++ b/tests/test_objects.py @@ -82,7 +82,7 @@ def test_get_ssdeep(self, empty_file): assert empty_file["file"].get_ssdeep() is not None except ImportError: assert empty_file["file"].get_ssdeep() is None - logging.warn("Need to install pydeep python module") + logging.warning("Need to install pydeep python module") def test_get_type(self, empty_file): assert empty_file["file"].get_type() == "empty" diff --git a/tests/test_tls_utils.py b/tests/test_tls_utils.py index 5d6a166644a..c0e2d5f48c4 100644 --- a/tests/test_tls_utils.py +++ b/tests/test_tls_utils.py @@ -15,7 +15,6 @@ class TestTlsUtils: - def test_tlslog_to_sslkeylogfile(self, tmpdir): input_log = f"{tmpdir}/tlsdump.log" dest_log = f"{tmpdir}/sslkeys.log" diff --git a/utils/community.py b/utils/community.py index 80bf41d50cc..e6902f95a85 100644 --- a/utils/community.py +++ b/utils/community.py @@ -57,9 +57,10 @@ def flare_capa(proxy=None): path_mkdir(capa_sigs_path) for url in signature_urls: signature_name = url.rsplit("/", 1)[-1] - with http.request("GET", url, preload_content=False) as sig, open( - os.path.join(capa_sigs_path, signature_name), "wb" - ) as out_sig: + with ( + http.request("GET", url, preload_content=False) as sig, + open(os.path.join(capa_sigs_path, signature_name), "wb") as out_sig, + ): shutil.copyfileobj(sig, out_sig) print("[+] FLARE CAPA rules/signatures installed") diff --git a/utils/db_migration/versions/2_3_1_square_hammer.py b/utils/db_migration/versions/2_3_1_square_hammer.py index 10c6d2d2efa..f0f3cb81f70 100644 --- a/utils/db_migration/versions/2_3_1_square_hammer.py +++ b/utils/db_migration/versions/2_3_1_square_hammer.py @@ -9,6 +9,7 @@ Create Date: 2021-05-02 18:24:43.075702 """ + from contextlib import suppress # revision identifiers, used by Alembic. diff --git a/utils/db_migration/versions/add_shrike_and_parent_id_columns.py b/utils/db_migration/versions/add_shrike_and_parent_id_columns.py index e33fbb91bbf..63acf17615c 100644 --- a/utils/db_migration/versions/add_shrike_and_parent_id_columns.py +++ b/utils/db_migration/versions/add_shrike_and_parent_id_columns.py @@ -9,6 +9,7 @@ Create Date: 2015-03-29 08:43:11.468664 """ + # revision identifiers, used by Alembic. revision = "f111620bb8" down_revision = "4b09c454108c" diff --git a/utils/db_migration/versions/add_task_tlp.py b/utils/db_migration/versions/add_task_tlp.py index 85ab54d9bd8..76108b3e1cd 100644 --- a/utils/db_migration/versions/add_task_tlp.py +++ b/utils/db_migration/versions/add_task_tlp.py @@ -9,6 +9,7 @@ Create Date: 2020-04-10 12:17:18.530901 """ + # revision identifiers, used by Alembic. revision = "7331c4d994fd" down_revision = "30d0230de7cd" diff --git a/utils/dist.py b/utils/dist.py index 365115f5387..a3a32a4d67b 100644 --- a/utils/dist.py +++ b/utils/dist.py @@ -51,9 +51,10 @@ TASK_REPORTED, TASK_RUNNING, Database, + _Database, + init_database, ) from lib.cuckoo.core.database import Task as MD_Task -from lib.cuckoo.core.database import _Database, init_database dist_conf = Config("distributed") main_server_name = dist_conf.distributed.get("main_server_name", "master") @@ -141,6 +142,21 @@ def required(package): def node_status(url: str, name: str, apikey: str) -> dict: + """ + Retrieve the status of a CAPE node. + + This function sends a GET request to the specified CAPE node URL to retrieve its status. + It uses the provided API key for authorization. + + Args: + url (str): The base URL of the CAPE node. + name (str): The name of the CAPE node. + apikey (str): The API key for authorization. + + Returns: + dict: A dictionary containing the status data of the CAPE node. If an error occurs, + an empty dictionary is returned. + """ try: r = requests.get( os.path.join(url, "cuckoo", "status/"), headers={"Authorization": f"Token {apikey}"}, verify=False, timeout=300 @@ -152,6 +168,19 @@ def node_status(url: str, name: str, apikey: str) -> dict: def node_fetch_tasks(status, url, apikey, action="fetch", since=0): + """ + Fetches tasks from a remote server based on the given status and other parameters. + + Args: + status (str): The status of the tasks to fetch (e.g., "completed", "pending"). + url (str): The base URL of the remote server. + apikey (str): The API key for authentication. + action (str, optional): The action to perform. Defaults to "fetch". + since (int, optional): The timestamp to fetch tasks completed after. Defaults to 0. + + Returns: + list: A list of tasks fetched from the remote server. Returns an empty list if an error occurs. + """ try: url = os.path.join(url, "tasks", "list/") params = dict(status=status, ids=True) @@ -159,8 +188,7 @@ def node_fetch_tasks(status, url, apikey, action="fetch", since=0): params["completed_after"] = since r = requests.get(url, params=params, headers={"Authorization": f"Token {apikey}"}, verify=False) if not r.ok: - log.error(f"Error fetching task list. Status code: {r.status_code} - {r.url}") - log.info("Saving error to /tmp/dist_error.html") + log.error("Error fetching task list. Status code: %d - %s. Saving error to /tmp/dist_error.html", r.status_code, r.url) _ = path_write_file("/tmp/dist_error.html", r.content) return [] return r.json().get("data", []) @@ -171,6 +199,19 @@ def node_fetch_tasks(status, url, apikey, action="fetch", since=0): def node_list_machines(url, apikey): + """ + Retrieves a list of machines from a CAPE node and yields Machine objects. + + Args: + url (str): The base URL of the CAPE node. + apikey (str): The API key for authentication. + + Yields: + Machine: An instance of the Machine class with the machine's details. + + Raises: + HTTPException: If the request to the CAPE node fails or returns an error. + """ try: r = requests.get(os.path.join(url, "machines", "list/"), headers={"Authorization": f"Token {apikey}"}, verify=False) for machine in r.json()["data"]: @@ -180,6 +221,19 @@ def node_list_machines(url, apikey): def node_list_exitnodes(url, apikey): + """ + Fetches a list of exit nodes from a given URL using the provided API key. + + Args: + url (str): The base URL of the CAPE node. + apikey (str): The API key for authorization. + + Yields: + dict: Each exit node data as a dictionary. + + Raises: + HTTPException: If the request fails or the response is invalid. + """ try: r = requests.get(os.path.join(url, "exitnodes/"), headers={"Authorization": f"Token {apikey}"}, verify=False) for exitnode in r.json()["data"]: @@ -189,6 +243,22 @@ def node_list_exitnodes(url, apikey): def node_get_report(task_id, fmt, url, apikey, stream=False): + """ + Fetches a report for a given task from a specified URL. + + Args: + task_id (int): The ID of the task for which the report is to be fetched. + fmt (str): The format of the report (e.g., 'json', 'html'). + url (str): The base URL of the server from which to fetch the report. + apikey (str): The API key for authorization. + stream (bool, optional): Whether to stream the response. Defaults to False. + + Returns: + requests.Response: The response object containing the report. + + Raises: + Exception: If there is an error fetching the report. + """ try: url = os.path.join(url, "tasks", "get", "report", "%d/" % task_id, fmt) return requests.get(url, stream=stream, headers={"Authorization": f"Token {apikey}"}, verify=False, timeout=800) @@ -197,16 +267,33 @@ def node_get_report(task_id, fmt, url, apikey, stream=False): def node_get_report_nfs(task_id, worker_name, main_task_id) -> bool: + """ + Retrieves a report from a worker node via NFS and copies it to the main task's analysis directory. + + Args: + task_id (int): The ID of the task on the worker node. + worker_name (str): The name of the worker node. + main_task_id (int): The ID of the main task on the main node. + + Returns: + bool: True if the operation was successful, False otherwise. + + Raises: + Exception: If there is an error during the copying process. + + Logs: + Error messages if the worker node is not mounted, the file does not exist, or if there is an exception during copying. + """ worker_path = os.path.join(CUCKOO_ROOT, dist_conf.NFS.mount_folder, str(worker_name)) if not path_mount_point(worker_path): - log.error(f"[-] Worker: {worker_name} is not mounted to: {worker_path}!") + log.error("[-] Worker: %s is not mounted to: %s!", worker_name, worker_path) return True worker_path = os.path.join(worker_path, "storage", "analyses", str(task_id)) if not path_exists(worker_path): - log.error(f"File on destiny doesn't exist: {worker_path}") + log.error("File on destiny doesn't exist: %s", worker_path) return True analyses_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(main_task_id)) @@ -223,12 +310,32 @@ def node_get_report_nfs(task_id, worker_name, main_task_id) -> bool: def _delete_many(node, ids, nodes, db): + """ + Deletes multiple tasks from a specified node if the node is not the main server. + + Args: + node (str): The identifier of the node from which tasks are to be deleted. + ids (list): A list of task IDs to be deleted. + nodes (dict): A dictionary containing node information, where keys are node identifiers and values are node details. + db (object): The database connection object to perform rollback in case of failure. + + Returns: + None + + Raises: + Exception: If there is an error during the deletion process. + + Logs: + Debug: Logs the task IDs and node name from which tasks are being deleted. + Info: Logs the status code and content if the response status code is not 200. + Critical: Logs the error message if an exception occurs during the deletion process. + """ if nodes[node].name == main_server_name: return try: url = os.path.join(nodes[node].url, "tasks", "delete_many/") apikey = nodes[node].apikey - log.debug("Removing task id(s): {0} - from node: {1}".format(ids, nodes[node].name)) + log.debug("Removing task id(s): %s - from node: %s", ids, nodes[node].name) res = requests.post( url, headers={"Authorization": f"Token {apikey}"}, @@ -236,7 +343,7 @@ def _delete_many(node, ids, nodes, db): verify=False, ) if res and res.status_code != 200: - log.info("{} - {}".format(res.status_code, res.content)) + log.info("%d - %s", res.status_code, res.content) db.rollback() except Exception as e: @@ -245,6 +352,29 @@ def _delete_many(node, ids, nodes, db): def node_submit_task(task_id, node_id, main_task_id): + """ + Submits a task to a specified node for processing. + + Args: + task_id (int): The ID of the task to be submitted. + node_id (int): The ID of the node to which the task will be submitted. + main_task_id (int): The ID of the main task associated with this task. + + Returns: + bool: True if the task was successfully submitted, False otherwise. + + Raises: + Exception: If there is an error during the task submission process. + + The function performs the following steps: + 1. Retrieves the node and task information from the database. + 2. Checks if the node is the main server and returns if it is. + 3. Prepares the task data for submission based on the task category. + 4. Submits the task to the node using an HTTP POST request. + 5. Handles different response statuses from the node. + 6. Updates the task status in the database based on the submission result. + 7. Logs relevant information and errors during the process. + """ db = session() node = db.query(Node).with_entities(Node.id, Node.name, Node.url, Node.apikey).filter_by(id=node_id).first() task = db.query(Task).filter_by(id=task_id).first() @@ -314,7 +444,7 @@ def node_submit_task(task_id, node_id, main_task_id): files = dict(file=open(task.path, "rb")) r = requests.post(url, data=data, files=files, headers={"Authorization": f"Token {apikey}"}, verify=False) else: - log.debug("Target category is: {}".format(task.category)) + log.debug("Target category is: %s", task.category) db.close() return @@ -336,13 +466,12 @@ def node_submit_task(task_id, node_id, main_task_id): check = True else: log.debug( - "Failed to submit: main_task_id: {} task {} to node: {}, code: {}, msg: {}".format( + "Failed to submit: main_task_id: %d task %d to node: %s, code: %d, msg: %s", task.main_task_id, task_id, node.name, r.status_code, r.content - ) ) if task.task_id: - log.debug("Submitted task to worker: {} - {} - {}".format(node.name, task.task_id, task.main_task_id)) + log.debug("Submitted task to worker: %s - %d - %d", node.name, task.task_id, task.main_task_id) elif r.status_code == 500: log.info("Saving error to /tmp/dist_error.html") @@ -353,7 +482,7 @@ def node_submit_task(task_id, node_id, main_task_id): log.info((r.status_code, "see api auth for more details")) else: - log.info("Node: {} - Task submit to worker failed: {} - {}".format(node.id, r.status_code, r.content)) + log.info("Node: %d - Task submit to worker failed: %d - %s", node.id, r.status_code, r.text) if check: task.node_id = node.id @@ -380,6 +509,38 @@ def node_submit_task(task_id, node_id, main_task_id): # class Retriever(): class Retriever(threading.Thread): + """ + A class that retrieves and processes tasks from distributed nodes. + + Methods + ------- + run(): + Initializes and starts various threads for fetching and processing tasks. + + free_space_mon(): + Monitors free disk space and logs an error if space is insufficient. + + notification_loop(): + Sends notifications for completed tasks to configured callback URLs. + + failed_cleaner(): + Cleans up failed tasks from nodes and updates their status in the database. + + fetcher(): + Continuously fetches tasks from enabled nodes and processes them. + + delete_target_file(task_id: int, sample_sha256: str, target: str): + Deletes the original file and its binary copy if configured to do so. + + fetch_latest_reports_nfs(): + Fetches the latest reports from nodes using NFS and processes them. + + fetch_latest_reports(): + Fetches the latest reports from nodes using REST API and processes them. + + remove_from_worker(): + Removes tasks from worker nodes and updates their status in the database. + """ def run(self): self.cleaner_queue = queue.Queue() self.fetcher_queue = queue.Queue() @@ -443,12 +604,24 @@ def run(self): for thr in self.threads: try: thr.join(timeout=0.0) - log.info(f"Thread: {thr.name} - Alive: {thr.is_alive()}") + log.info("Thread: %s - Alive: %s", thr.name, str(thr.is_alive())) except Exception as e: log.exception(e) time.sleep(60) def free_space_mon(self): + """ + Monitors the free disk space in the analysis folder and logs an error + message if the available space is below the configured threshold. This + check is performed periodically every 10 minutes. The check is ignored + if the 'freespace' configuration variable is set to zero. + + The analysis folder path is resolved to its full base path to handle + cases where it might be a symbolic link. + + Returns: + None + """ # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). @@ -461,6 +634,22 @@ def free_space_mon(self): time.sleep(600) def notification_loop(self): + """ + Continuously checks for completed tasks that have not been notified and sends notifications to specified URLs. + + This method runs an infinite loop that: + 1. Queries the database for tasks that are finished, retrieved, but not yet notified. + 2. For each task, updates the main task status to `TASK_REPORTED`. + 3. Sends a POST request to each URL specified in the configuration with the task ID in the payload. + 4. Marks the task as notified if the POST request is successful. + 5. Logs the status of each notification attempt. + + The loop sleeps for 20 seconds before repeating the process. + + Raises: + requests.exceptions.ConnectionError: If there is a connection error while sending the POST request. + Exception: For any other exceptions that occur during the notification process. + """ urls = reporting_conf.callback.url.split(",") headers = {"x-api-key": reporting_conf.callback.key} @@ -471,30 +660,54 @@ def notification_loop(self): for task in tasks: with main_db.session.begin(): main_db.set_status(task.main_task_id, TASK_REPORTED) - log.debug("reporting main_task_id: {}".format(task.main_task_id)) + log.debug("reporting main_task_id: %d", task.main_task_id) for url in urls: try: res = requests.post(url, headers=headers, data=json.dumps({"task_id": int(task.main_task_id)})) if res and res.ok: task.notificated = True else: - log.info("failed to report: {} - {}".format(task.main_task_id, res.status_code)) + log.info("failed to report: %d - %d", task.main_task_id, res.status_code) except requests.exceptions.ConnectionError: log.info("Can't report to callback") except Exception as e: - log.info("failed to report: {} - {}".format(task.main_task_id, e)) + log.info("failed to report: %d - %s", task.main_task_id, str(e)) db.commit() time.sleep(20) def failed_cleaner(self): + """ + Periodically checks for failed tasks on enabled nodes and cleans them up. + + This method continuously queries the database for nodes that are enabled and + checks for tasks that have failed either during analysis or processing. If a + failed task is found, it updates the task status to indicate failure, marks + the task as finished, retrieved, and notified, and then adds the task to the + cleaner queue for further processing. + + The method runs indefinitely, sleeping for 600 seconds between each iteration. + + Attributes: + self.cleaner_queue (Queue): A queue to hold tasks that need to be cleaned. + + Notes: + - This method acquires and releases a lock (`lock_retriever`) to ensure + thread-safe operations when adding tasks to the cleaner queue. + - The method commits changes to the database after processing each node. + - The method closes the database session before exiting. + + Raises: + Any exceptions raised during database operations or task processing are + not explicitly handled within this method. + """ db = session() while True: for node in db.query(Node).with_entities(Node.id, Node.name, Node.url, Node.apikey).filter_by(enabled=True).all(): - log.info("Checking for failed tasks on: {}".format(node.name)) + log.info("Checking for failed tasks on: %s", node.name) for task in node_fetch_tasks("failed_analysis|failed_processing", node.url, node.apikey, action="delete"): t = db.query(Task).filter_by(task_id=task["id"], node_id=node.id).order_by(Task.id.desc()).first() if t is not None: - log.info("Cleaning failed for id:{}, node:{}: main_task_id: {}".format(t.id, t.node_id, t.main_task_id)) + log.info("Cleaning failed for id: %d, node: %s: main_task_id: %d", t.id, t.node_id, t.main_task_id) with main_db.session.begin(): main_db.set_status(t.main_task_id, TASK_FAILED_REPORTING) t.finished = True @@ -505,7 +718,7 @@ def failed_cleaner(self): self.cleaner_queue.put((t.node_id, t.task_id)) lock_retriever.release() else: - log.debug("failed_cleaner t is None for: {} - node_id: {}".format(task["id"], node.id)) + log.debug("failed_cleaner t is None for: %s - node_id: %d", str(task["id"]), node.id) lock_retriever.acquire() if (node.id, task["id"]) not in self.cleaner_queue.queue: self.cleaner_queue.put((node.id, task["id"])) @@ -515,7 +728,25 @@ def failed_cleaner(self): db.close() def fetcher(self): - """Method that runs forever""" + """ + Method that runs indefinitely to fetch tasks from nodes and process them. + + This method continuously checks for tasks from enabled nodes and processes them. + It maintains a status count and last check time for each node. If a node's tasks + are fetched successfully, they are added to the fetcher queue. If a node is deemed + dead after a certain number of failures, it is logged. + + Attributes: + last_checks (dict): Dictionary to keep track of the last check time for each node. + status_count (dict): Dictionary to keep track of the status count for each node. + stop_dist (threading.Event): Event to signal stopping the distribution. + cleaner_queue (queue.Queue): Queue to hold tasks that need cleaning. + fetcher_queue (queue.Queue): Queue to hold tasks that need fetching. + current_queue (dict): Dictionary to keep track of the current queue for each node. + + Raises: + Exception: If an error occurs during task processing, it is logged and the status count is incremented + """ last_checks = {} # to not exit till cleaner works with session() as db: @@ -564,7 +795,7 @@ def fetcher(self): ): limit += 1 self.fetcher_queue.put(({"id": task.task_id}, node.id)) - # log.debug("{} - {}".format(task, node.id)) + # log.debug("%s - %d", task, node.id) """ completed_on = datetime.strptime(task["completed_on"], "%Y-%m-%d %H:%M:%S") if node.last_check is None or completed_on > node.last_check: @@ -576,9 +807,9 @@ def fetcher(self): """ except Exception as e: self.status_count[node.name] += 1 - log.error(e, exc_info=True) + log.exception(e) if self.status_count[node.name] == dead_count: - log.info("[-] {} dead".format(node.name)) + log.info("[-] %s dead", node.name) # node_data = db.query(Node).filter_by(name=node.name).first() # node_data.enabled = False # db.commit() @@ -586,6 +817,22 @@ def fetcher(self): # time.sleep(5) def delete_target_file(self, task_id: int, sample_sha256: str, target: str): + """ + Deletes the target file and its binary copy if certain conditions are met. + + Args: + task_id (int): The ID of the task associated with the file. + sample_sha256 (str): The SHA-256 hash of the sample file. + target (str): The path to the target file. + + Behavior: + - Deletes the target file if `cfg.cuckoo.delete_original` is True and the target file exists. + - Deletes the binary copy of the file if `cfg.cuckoo.delete_bin_copy` is True and no other tasks are using the sample. + + Note: + - The function checks if the target file exists before attempting to delete it. + - The function checks if the binary copy is still in use by other tasks before deleting it. + """ # Is ok to delete original file, but we need to lookup on delete_bin_copy if no more pendings tasks if cfg.cuckoo.delete_original and target and path_exists(target): path_delete(target) @@ -600,6 +847,32 @@ def delete_target_file(self, task_id: int, sample_sha256: str, target: str): # This should be executed as external thread as it generates bottle neck def fetch_latest_reports_nfs(self): + """ + Fetches the latest reports from NFS (Network File System) for distributed tasks. + + This method continuously checks for new tasks in the fetcher queue and processes them. + It retrieves the task details from the database, fetches the corresponding report from + the specified node, and updates the task status in the main database. + + The method performs the following steps: + 1. Continuously checks for new tasks in the fetcher queue. + 2. Retrieves task details from the database. + 3. Fetches the report from the specified node. + 4. Updates the task status in the main database. + 5. Moves the report to the appropriate location. + 6. Creates a symbolic link to the analysis folder. + 7. Deletes the target file if necessary. + 8. Marks the task as retrieved and finished in the database. + + The method handles various exceptions and logs relevant information for debugging purposes. + + Note: + This method runs indefinitely until the `stop_dist` event is set. + + Raises: + Exception: If any error occurs during the processing of tasks. + + """ # db = session() with session() as db: # to not exit till cleaner works @@ -632,9 +905,8 @@ def fetch_latest_reports_nfs(self): continue log.debug( - "Fetching dist report for: id: {}, task_id: {}, main_task_id: {} from node: {}".format( + "Fetching dist report for: id: %d, task_id: %d, main_task_id: %d from node: %s", t.id, t.task_id, t.main_task_id, ID2NAME[t.node_id] if t.node_id in ID2NAME else t.node_id - ) ) with main_db.session.begin(): # set completed_on time @@ -643,18 +915,18 @@ def fetch_latest_reports_nfs(self): main_db.set_status(t.main_task_id, TASK_REPORTED) # Fetch each requested report. - report_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", f"{t.main_task_id}") + report_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(t.main_task_id)) # ToDo option node = db.query(Node).with_entities(Node.id, Node.name, Node.url, Node.apikey).filter_by(id=node_id).first() start_copy = timeit.default_timer() copied = node_get_report_nfs(t.task_id, node.name, t.main_task_id) timediff = timeit.default_timer() - start_copy log.info( - f"It took {timediff:.2f} seconds to copy report {t.task_id} from node: {node.name} for task: {t.main_task_id}" + "It took %s seconds to copy report %d from node: %s for task: %d", f"{timediff:.2f}", t.task_id, node.name, t.main_task_id ) if not copied: - log.error(f"Can't copy report {t.task_id} from node: {node.name} for task: {t.main_task_id}") + log.error("Can't copy report %d from node: %s for task: %d", t.task_id, node.name, t.main_task_id) continue # this doesn't exist for some reason @@ -674,9 +946,7 @@ def fetch_latest_reports_nfs(self): try: shutil.move(t.path, destination) except FileNotFoundError as e: - print(f"Failed to move: {t.path} - {e}") - pass - + log.error("Failed to move: %s - %s", t.path, str(e)) # creating link to analysis folder if path_exists(destination): try: @@ -698,6 +968,24 @@ def fetch_latest_reports_nfs(self): # This should be executed as external thread as it generates bottle neck def fetch_latest_reports(self): + """ + Continuously fetches the latest reports from distributed nodes and processes them. + + This method runs in an infinite loop until `self.stop_dist` is set. It retrieves tasks from the `fetcher_queue`, + fetches the corresponding reports from the nodes, and processes them. The reports are saved to the local storage + and the task status is updated in the database. + + The method handles various scenarios such as: + - Task not found or already processed. + - Report retrieval failures. + - Report extraction and saving. + - Handling of sample binaries associated with the tasks. + + The method also manages a cleaner queue to handle tasks that need to be cleaned up. + + Raises: + Exception: If any unexpected error occurs during the report fetching and processing. + """ db = session() # to not exit till cleaner works while True: @@ -730,9 +1018,8 @@ def fetch_latest_reports(self): continue log.debug( - "Fetching dist report for: id: {}, task_id: {}, main_task_id:{} from node: {}".format( + "Fetching dist report for: id: %d, task_id: %d, main_task_id: %d from node: %s", t.id, t.task_id, t.main_task_id, ID2NAME[t.node_id] if t.node_id in ID2NAME else t.node_id - ) ) with main_db.session.begin(): # set completed_on time @@ -745,23 +1032,21 @@ def fetch_latest_reports(self): report = node_get_report(t.task_id, "dist/", node.url, node.apikey, stream=True) if report is None: - log.info("dist report retrieve failed NONE: task_id: {} from node: {}".format(t.task_id, node_id)) + log.info("dist report retrieve failed NONE: task_id: %d from node: %d", t.task_id, node_id) continue if report.status_code != 200: log.info( - "dist report retrieve failed - status_code {}: task_id: {} from node: {}".format( - report.status_code, t.task_id, node_id - ) + "dist report retrieve failed - status_code %d: task_id: %d from node: %s", report.status_code, t.task_id, node_id ) if report.status_code == 400 and (node_id, task.get("id")) not in self.cleaner_queue.queue: self.cleaner_queue.put((node_id, task.get("id"))) - log.info(f"Status code: {report.status_code} - MSG: {report.text}") + log.info("Status code: %d - MSG: %s", report.status_code, report.text) continue - log.info(f"Report size for task {t.task_id} is: {int(report.headers.get('Content-length', 1))/int(1<<20):,.0f} MB") + log.info("Report size for task %s is: %s MB", t.task_id, f"{int(report.headers.get('Content-length', 1))/int(1<<20):,.0f}") - report_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", "{}".format(t.main_task_id)) + report_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(t.main_task_id)) if not path_exists(report_path): path_mkdir(report_path, mode=0o755) try: @@ -774,7 +1059,7 @@ def fetch_latest_reports(self): if (node_id, task.get("id")) not in self.cleaner_queue.queue: self.cleaner_queue.put((node_id, task.get("id"))) except OSError: - log.error("Permission denied: {}".format(report_path)) + log.error("Permission denied: %s", report_path) if path_exists(t.path): sample_sha256 = None @@ -803,7 +1088,7 @@ def fetch_latest_reports(self): self.delete_target_file(t.main_task_id, sample_sha256, t.path) else: - log.debug(f"{t.path} doesn't exist") + log.debug("%s doesn't exist", t.path) t.retrieved = True t.finished = True @@ -814,7 +1099,7 @@ def fetch_latest_reports(self): except pyzipper.zipfile.BadZipFile: log.error("File is not a zip file") except Exception as e: - log.exception("Exception: %s" % e) + log.exception("Exception: %s", str(e)) if path_exists(os.path.join(report_path, "reports", "report.json")): path_delete(os.path.join(report_path, "reports", "report.json")) except Exception as e: @@ -824,6 +1109,30 @@ def fetch_latest_reports(self): db.close() def remove_from_worker(self): + """ + Removes tasks from worker nodes. + + This method continuously processes tasks from the cleaner queue and removes them from the worker nodes. + It retrieves the list of nodes from the database and processes tasks in the cleaner queue. + If a task is found in the `t_is_none` dictionary for a node, it is removed from the list. + The method then sends a request to delete the tasks from the worker node. + + The method performs the following steps: + 1. Retrieves the list of nodes from the database. + 2. Continuously processes tasks from the cleaner queue. + 3. Groups tasks by node ID. + 4. Removes tasks from the `t_is_none` dictionary if present. + 5. Sends a request to delete tasks from the worker node. + 6. Commits the changes to the database. + 7. Sleeps for 20 seconds before processing the next batch of tasks. + + Note: + The method runs indefinitely until manually stopped. + + ToDo: + Determine if additional actions are needed when the length of `t_is_none[node_id]` exceeds 50. + + """ nodes = {} with session() as db: for node in db.query(Node).with_entities(Node.id, Node.name, Node.url, Node.apikey).all(): @@ -855,12 +1164,43 @@ def remove_from_worker(self): class StatusThread(threading.Thread): + """ + A thread that handles the submission of tasks to nodes and manages the status of nodes. + + Methods + ------- + submit_tasks(node_id, pend_tasks_num, options_like=False, force_push_push=False, db=None) + Submits tasks to a specified node. + + load_vm_tags(db, node_id, node_name) + Loads the tags for virtual machines associated with a node. + + run() + The main loop that continuously checks the status of nodes and submits tasks. + """ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_push=False, db=None): + """ + Submits tasks to a specified node. + + Args: + node_id (str): The identifier of the node to which tasks will be submitted. + pend_tasks_num (int): The number of pending tasks to be submitted. + options_like (bool, optional): Flag to filter tasks based on options. Defaults to False. + force_push_push (bool, optional): Flag to forcefully push tasks to the node. Defaults to False. + db (Session, optional): The database session to use. Defaults to None. + + Returns: + bool: True if tasks were successfully submitted, False otherwise. + + Raises: + OperationalError: If there is an operational error when querying the database. + SQLAlchemyError: If there is a SQLAlchemy error when querying the database. + """ # HACK do not create a new session if the current one (passed as parameter) is still valid. try: node = db.query(Node).with_entities(Node.id, Node.name, Node.url, Node.apikey).filter_by(name=node_id).first() except (OperationalError, SQLAlchemyError) as e: - log.warning(f"Got an operational Exception when trying to submit tasks: {e}") + log.warning("Got an operational Exception when trying to submit tasks: %s", str(e)) return False if node.name not in SERVER_TAGS: @@ -897,7 +1237,7 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p # Check if file exist, if no wipe from db and continue, rare cases if t.category in ("file", "pcap", "static"): if not path_exists(t.target): - log.info(f"Task id: {t.id} - File doesn't exist: {t.target}") + log.info("Task id: %d - File doesn't exist: %s", t.id, t.target) main_db.set_status(t.id, TASK_BANNED) continue @@ -906,7 +1246,7 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p file_size = path_get_size(t.target) if file_size > web_conf.general.max_sample_size: log.warning( - f"File size: {file_size} is bigger than allowed: {web_conf.general.max_sample_size}" + "File size: %d is bigger than allowed: %d", file_size, web_conf.general.max_sample_size ) main_db.set_status(t.id, TASK_BANNED) continue @@ -925,12 +1265,12 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p if "timeout=" in t.options: t.timeout = options.get("timeout", 0) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) # wtf are you doing in pendings? tasks = db.query(Task).filter_by(main_task_id=t.id).all() if tasks: for task in tasks: - # log.info("Deleting incorrectly uploaded file from dist db, main_task_id: {}".format(t.id)) + # log.info("Deleting incorrectly uploaded file from dist db, main_task_id: %s", t.id) if node.name == main_server_name: main_db.set_status(t.id, TASK_RUNNING) else: @@ -960,7 +1300,7 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p if t.options: t.options += "," - t.options += "main_task_id={}".format(t.id) + t.options += f"main_task_id={t.id}" args = dict( package=t.package, category=t.category, @@ -1033,7 +1373,7 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p log.info("nothing to upload? How? o_O") return False # Submit appropriate tasks to node - log.debug("going to upload {} tasks to node {}".format(pend_tasks_num, node.name)) + log.debug("going to upload %d tasks to node %s", pend_tasks_num, node.name) for task in to_upload: submitted = node_submit_task(task.id, node.id, task.main_task_id) if submitted: @@ -1042,7 +1382,7 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p else: main_db.set_status(task.main_task_id, TASK_DISTRIBUTED) else: - log.info("something is wrong with submission of task: {}".format(task.id)) + log.info("something is wrong with submission of task: %d", task.id) db.delete(task) db.commit() limit += 1 @@ -1053,6 +1393,17 @@ def submit_tasks(self, node_id, pend_tasks_num, options_like=False, force_push_p return True def load_vm_tags(self, db, node_id, node_name): + """ + Load virtual machine tags for a specific node and store them in the global SERVER_TAGS dictionary. + + Args: + db (Session): The database session to query the machines. + node_id (int): The ID of the node to load tags for. + node_name (str): The name of the node to load tags for. + + Returns: + None + """ global SERVER_TAGS # Get available node tags machines = db.query(Machine).filter_by(node_id=node_id).all() @@ -1124,7 +1475,7 @@ def run(self): failed_count[node.name] += 1 # This will declare worker as dead after X failed connections checks if failed_count[node.name] == dead_count: - log.info("[-] {} dead".format(node.name)) + log.info("[-] %s dead", node.name) # node.enabled = False db.commit() if node.name in STATUSES: @@ -1140,7 +1491,7 @@ def run(self): res = self.submit_tasks( node.name, MINIMUMQUEUE[node.name], - options_like="node={}".format(node.name), + options_like=f"node={node.name}", force_push_push=True, db=db, ) @@ -1188,7 +1539,7 @@ def run(self): continue db.commit() except Exception as e: - log.error("Got an exception when trying to check nodes status and submit tasks: {}.".format(e), exc_info=True) + log.error("Got an exception when trying to check nodes status and submit tasks: %s.", str(e)) # ToDo hard test this rollback, this normally only happens on db restart and similar db.rollback() @@ -1198,6 +1549,17 @@ def run(self): def output_json(data, code, headers=None): + """ + Create a JSON response with the given data, HTTP status code, and optional headers. + + Args: + data (dict): The data to be serialized to JSON. + code (int): The HTTP status code for the response. + headers (dict, optional): Additional headers to include in the response. Defaults to None. + + Returns: + Response: A Flask response object with the JSON data and specified headers. + """ resp = make_response(json.dumps(data), code) resp.headers.extend(headers or {}) return resp @@ -1399,16 +1761,16 @@ def update_machine_table(node_name): # delete all old vms _ = db.query(Machine).filter_by(node_id=node.id).delete() - log.info("Available VM's on %s:" % node_name) + log.info("Available VM's on %s:", node_name) # replace with new vms for machine in new_machines: - log.info("-->\t%s" % machine.name) + log.info("-->\t%s", machine.name) node.machines.append(machine) db.add(machine) db.commit() - log.info("Updated the machine table for node: %s" % node_name) + log.info("Updated the machine table for node: %s", node_name) def delete_vm_on_node(node_name, vm_name): @@ -1438,6 +1800,23 @@ def node_enabled(node_name, status): def cron_cleaner(clean_x_hours=False): + """ + Method that runs forever to clean up tasks. + + Args: + clean_x_hours (bool or int, optional): If provided, only clean up tasks that were + notified and created within the last `clean_x_hours` hours. + + The method performs the following steps: + 1. Checks if the cleaner is already running by looking for a PID file at "/tmp/dist_cleaner.pid". + 2. If the cleaner is not running, it creates a PID file to indicate that it is running. + 3. Connects to the database and retrieves all nodes. + 4. Depending on the `clean_x_hours` argument, it retrieves tasks that need to be cleaned up. + 5. Marks the retrieved tasks as deleted and groups them by node. + 6. Deletes the tasks from the nodes. + 7. Commits the changes to the database and closes the connection. + 8. Deletes the PID file to indicate that the cleaner has finished running. + """ """Method that runs forever""" # Check if we are not runned diff --git a/utils/fstab.py b/utils/fstab.py index a4758b44a9f..9daa139bb61 100644 --- a/utils/fstab.py +++ b/utils/fstab.py @@ -69,7 +69,6 @@ def add_nfs_entry(hostname: str, worker_folder: str): def remove_nfs_entry(hostname: str): - worker_path = os.path.join(CUCKOO_ROOT, dist_conf.NFS.mount_folder, hostname) with lock: @@ -193,14 +192,6 @@ def handle_sigterm(sig, f): try: output = handlers[command](*args, **kwargs) except Exception as e: - log.exception("Error executing command: {}".format(command)) + log.exception("Error executing command: %s", command) error = str(e) - server.sendto( - json.dumps( - { - "output": output, - "exception": error, - } - ).encode(), - addr, - ) + server.sendto(json.dumps({"output": output, "exception": error}).encode(), addr) diff --git a/utils/process.py b/utils/process.py index aa49776c337..04020430f1b 100644 --- a/utils/process.py +++ b/utils/process.py @@ -76,6 +76,15 @@ # https://stackoverflow.com/questions/41105733/limit-ram-usage-to-python-program def memory_limit(percentage: float = 0.8): + """ + Sets a memory limit for the current process on Linux systems. + + Args: + percentage (float): Percentage of the total system memory that is allowed to be used. Defaults to 0.8 (80%). + + Returns: + None + """ if platform.system() != "Linux": print("Only works on linux!") return @@ -185,6 +194,16 @@ def init_worker(): def get_formatter_fmt(task_id=None, main_task_id=None): + """ + Generates a logging format string with optional task identifiers. + + Args: + task_id (int, optional): The ID of the task. Defaults to None. + main_task_id (int, optional): The ID of the main task. Defaults to None. + + Returns: + str: A formatted string for logging that includes the task information if provided. + """ task_info = f"[Task {task_id}" if task_id is not None else "" if main_task_id: task_info += f" ({main_task_id})" @@ -201,6 +220,20 @@ def set_formatter_fmt(task_id=None, main_task_id=None): def init_logging(debug=False): + """ + Initializes logging for the application. + + This function sets up logging handlers for console output, syslog, and file output. + It also configures log rotation if enabled in the configuration. + + Args: + debug (bool): If True, sets the logging level to DEBUG. Otherwise, sets it to INFO. + + Returns: + tuple: A tuple containing the console handler, file handler, and syslog handler (if configured). + + Raises: + PermissionError: If there is an issue creating or accessing the log file, typically due to incorrect user permissions. # Pyattck creates root logger which we don't want. So we must use this dirty hack to remove it # If basicConfig was already called by something and had a StreamHandler added, # replace it with a ConsoleHandler. @@ -208,7 +241,7 @@ def init_logging(debug=False): if isinstance(h, logging.StreamHandler) and h.stream == sys.stderr: log.removeHandler(h) h.close() - + """ """ Handlers: - ch - console handler @@ -286,6 +319,21 @@ def init_per_analysis_logging(tid=0, debug=False): def processing_finished(future): + """ + Callback function to handle the completion of a processing task. + + This function is called when a future task is completed. It retrieves the task ID from the + pending_future_map, logs the result, and updates the task status in the database. If an + exception occurs during processing, it logs the error and sets the task status to failed. + + Args: + future (concurrent.futures.Future): The future object representing the asynchronous task. + + Raises: + TimeoutError: If the processing task times out. + pebble.ProcessExpired: If the processing task expires. + Exception: For any other exceptions that occur during processing. + """ task_id = pending_future_map.get(future) with db.session.begin(): try: @@ -295,7 +343,7 @@ def processing_finished(future): log.error("[%d] Processing timeout: %s. Function: %s", task_id, error, error.args[1]) db.set_status(task_id, TASK_FAILED_PROCESSING) except (pebble.ProcessExpired, Exception) as error: - log.error("[%d] Exception when processing task: %s", task_id, error, exc_info=True) + log.exception("[%d] Exception when processing task: %s", task_id, error) db.set_status(task_id, TASK_FAILED_PROCESSING) pending_future_map.pop(future) @@ -307,6 +355,24 @@ def processing_finished(future): def autoprocess( parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300, debug: bool = False ): + """ + Automatically processes analysis data using a process pool. + + Args: + parallel (int): Number of parallel processes to use. Default is 1. + failed_processing (bool): Whether to process failed tasks. Default is False. + maxtasksperchild (int): Maximum number of tasks per child process. Default is 7. + memory_debugging (bool): Whether to enable memory debugging. Default is False. + processing_timeout (int): Timeout for processing each task in seconds. Default is 300. + debug (bool): Whether to enable debug mode. Default is False. + + Raises: + KeyboardInterrupt: If the process is interrupted by the user. + MemoryError: If there is not enough free RAM to run processing. + OSError: If an OS-related error occurs. + Exception: If any other exception occurs during processing. + + """ maxcount = cfg.cuckoo.max_analysis_count count = 0 # pool = multiprocessing.Pool(parallel, init_worker) @@ -394,6 +460,18 @@ def autoprocess( def _load_report(task_id: int): + """ + Load the analysis report for a given task ID from the configured database. + + This function attempts to load the analysis report from MongoDB if it is enabled. + If MongoDB is not enabled, it tries to load the report from Elasticsearch if it is enabled and not in search-only mode. + + Args: + task_id (int): The ID of the task for which to load the analysis report. + + Returns: + dict or bool: The analysis report as a dictionary if found, otherwise False. + """ if repconf.mongodb.enabled: analysis = mongo_find_one("analysis", {"info.id": task_id}, sort=[("_id", -1)]) for process in analysis.get("behavior", {}).get("processes", []): @@ -419,6 +497,20 @@ def _load_report(task_id: int): def parse_id(id_string: str): + """ + Parses a string representing a range or list of ranges of IDs and returns a list of tuples. + + Args: + id_string (str): A string representing IDs. It can be "auto" or a string of comma-separated + ranges (e.g., "1-3,5,7-9"). + + Returns: + list: A list of tuples where each tuple represents a range of IDs. If the input is "auto", + it returns the string "auto". + + Raises: + TypeError: If the input string is not in the correct format or if a range is invalid. + """ if id_string == "auto": return id_string id_string = id_string.replace(" ", "") diff --git a/utils/rooter.py b/utils/rooter.py index fee42a49127..25d293f3bd1 100644 --- a/utils/rooter.py +++ b/utils/rooter.py @@ -16,8 +16,8 @@ import subprocess import sys -if sys.version_info[:2] < (3, 8): - sys.exit("You are running an incompatible version of Python, please use >= 3.8") +if sys.version_info[:2] < (3, 10): + sys.exit("You are running an incompatible version of Python, please use >= 3.10") CUCKOO_ROOT = os.path.join(os.path.abspath(os.path.dirname(__file__)), "..") sys.path.append(CUCKOO_ROOT) @@ -197,7 +197,7 @@ def delete_dev_from_vrf(dev): def vpn_status(name): """Gets current VPN status.""" ret = {} - for line in run(settings.systemctl, "status", "openvpn@{}.service".format(name))[0].split("\n"): + for line in run(settings.systemctl, "status", f"openvpn@{name}.service")[0].split("\n"): if "running" in line: ret[name] = "running" break @@ -687,7 +687,7 @@ def inetsim_disable(ipaddr, inetsim_ip, dns_port, resultserver_port, ports): def interface_route_tun_enable(ipaddr: str, out_interface: str, task_id: str): """Enable routing and NAT via tun output_interface.""" - log.info(f"Enabling interface routing via: {out_interface} for task: {task_id}") + log.info("Enabling interface routing via: %s for task: %s", out_interface, task_id) # mark packets from analysis VM run_iptables("-t", "mangle", "-I", "PREROUTING", "--source", ipaddr, "-j", "MARK", "--set-mark", task_id) @@ -701,7 +701,7 @@ def interface_route_tun_enable(ipaddr: str, out_interface: str, task_id: str): peer_ip = get_tun_peer_address(out_interface) if peer_ip: - log.info(f"interface_route_enable {out_interface} has peer {peer_ip}") + log.info("interface_route_enable %s has peer: %s ", out_interface, peer_ip) run(s.ip, "route", "add", "default", "via", peer_ip, "table", task_id) else: log.error("interface_route_enable missing peer IP ") @@ -709,7 +709,7 @@ def interface_route_tun_enable(ipaddr: str, out_interface: str, task_id: str): def interface_route_tun_disable(ipaddr: str, out_interface: str, task_id: str): """Disable routing and NAT via tun output_interface.""" - log.info(f"Disable interface routing via: {out_interface} for task: {task_id}") + log.info("Disable interface routing via: %s for task: %s", out_interface, task_id) # mark packets from analysis VM run_iptables("-t", "mangle", "-D", "PREROUTING", "--source", ipaddr, "-j", "MARK", "--set-mark", task_id) @@ -723,7 +723,7 @@ def interface_route_tun_disable(ipaddr: str, out_interface: str, task_id: str): peer_ip = get_tun_peer_address(out_interface) if peer_ip: - log.info(f"interface_route_disable {out_interface} has peer {peer_ip}") + log.info("interface_route_disable %s has peer %s", out_interface, peer_ip) run(s.ip, "route", "del", "default", "via", peer_ip, "table", task_id) else: log.error("interface_route_disable missing peer IP ") @@ -983,14 +983,6 @@ def handle_sigterm(sig, f): try: output = handlers[command](*args, **kwargs) except Exception as e: - log.exception("Error executing command: {}".format(command)) + log.exception("Error executing command: %s", command) error = str(e) - server.sendto( - json.dumps( - { - "output": output, - "exception": error, - } - ).encode(), - addr, - ) + server.sendto(json.dumps({"output": output, "exception": error}).encode(), addr) diff --git a/utils/route.py b/utils/route.py index 16517d47d3e..5bbf62726ff 100755 --- a/utils/route.py +++ b/utils/route.py @@ -1,21 +1,21 @@ #!/usr/bin/python """ - Aux script for VPN setup - - Get a look on utils/vpn2cape.py - Example: - /etc/iproute2/rt_tables - 5 host1 - 6 host2 - 7 host3 - - conf/routing.conf - [vpn5] - name = X.ovpn - description = X - interface = tunX - rt_table = host1 +Aux script for VPN setup + +Get a look on utils/vpn2cape.py +Example: + /etc/iproute2/rt_tables + 5 host1 + 6 host2 + 7 host3 + + conf/routing.conf + [vpn5] + name = X.ovpn + description = X + interface = tunX + rt_table = host1 """ import os diff --git a/utils/vpn2cape.py b/utils/vpn2cape.py index ee037345385..911efdd0ea1 100644 --- a/utils/vpn2cape.py +++ b/utils/vpn2cape.py @@ -30,7 +30,7 @@ def main(folder, port): # rt_table rt = "" - rt = re.findall(f"remote\s(.*)\s{port}", tmp) + rt = re.findall(fr"remote\s(.*)\s{port}", tmp) if rt: # start from id idx_start rt_table.setdefault(str(index + idx_start), rt[0]) @@ -70,7 +70,7 @@ def main(folder, port): ) vpns.append(f"vpn_{index + idx_start}") - file = file.replace(" ", "\ ") + file = file.replace(" ", r"\ ") paths.append(f"sudo openvpn --config {file} &") if write: diff --git a/web/analysis/templatetags/analysis_tags.py b/web/analysis/templatetags/analysis_tags.py index bf75af0fe6d..791dfffcd2b 100644 --- a/web/analysis/templatetags/analysis_tags.py +++ b/web/analysis/templatetags/analysis_tags.py @@ -71,7 +71,6 @@ def get_detection_by_pid(dictionary, key): return detections = dictionary.get(str(key), "") if detections: - if len(detections) > 1: output = " -> ".join([malware_name_url_pattern.format(malware_name=name) for name in detections]) else: diff --git a/web/analysis/urls.py b/web/analysis/urls.py index 46f5ed47c96..540ab467f82 100644 --- a/web/analysis/urls.py +++ b/web/analysis/urls.py @@ -2,9 +2,10 @@ # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file "docs/LICENSE" for copying permission. -from analysis import views from django.urls import re_path +from analysis import views + urlpatterns = [ re_path(r"^$", views.index, name="analysis"), re_path(r"^page/(?P\d+)/$", views.index, name="index"), diff --git a/web/apiv2/urls.py b/web/apiv2/urls.py index aa512323add..bfe7b616595 100644 --- a/web/apiv2/urls.py +++ b/web/apiv2/urls.py @@ -2,12 +2,12 @@ # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file "docs/LICENSE" for copying permission. -from apiv2 import views - # from django.conf.urls import include from django.urls import path, re_path from rest_framework.authtoken.views import obtain_auth_token +from apiv2 import views + urlpatterns = [ re_path(r"^$", views.index, name="apiv2"), # disabled due to token auth diff --git a/web/apiv2/views.py b/web/apiv2/views.py index 882f4005cd0..29a769ceb25 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -1265,7 +1265,7 @@ def tasks_report(request, task_id, report_format="json", make_zip=False): else: zf.write(filepath, filedir) except Exception as e: - log.error(e, exc_info=True) + log.exception(e) # exception for lite report that is under reports/lite.json if report_format.lower() == "lite": @@ -2438,7 +2438,7 @@ def _stream_iterator(fp, guest_name, chunk_size=1024): resp = {"error": True, "error_value": "filepath not set"} return Response(resp) if request.data.get("is_local", ""): - if filepath.startswith(("/", "\/")): + if filepath.startswith(("/", r"\/")): resp = {"error": True, "error_value": "Filepath mustn't start with /"} return Response(resp) filepath = os.path.join(CUCKOO_ROOT, "storage", "analyses", f"{task_id}", filepath) @@ -2455,7 +2455,7 @@ def _stream_iterator(fp, guest_name, chunk_size=1024): return Response(resp) return StreamingHttpResponse(streaming_content=r.iter_content(chunk_size=1024), content_type="application/octet-stream") except requests.exceptions.RequestException as ex: - log.error(ex, exc_info=True) + log.exception(ex) resp = {"error": True, "error_value": f"Requests exception: {ex}"} return Response(resp) diff --git a/web/compare/urls.py b/web/compare/urls.py index 3b32527f6f1..610c56b9c02 100644 --- a/web/compare/urls.py +++ b/web/compare/urls.py @@ -2,9 +2,10 @@ # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file "docs/LICENSE" for copying permission. -from compare import views from django.urls import re_path +from compare import views + urlpatterns = [ re_path(r"^(?P\d+)/$", views.left, name="compare_left"), re_path(r"^(?P\d+)/(?P\d+)/$", views.both, name="compare_both"), diff --git a/web/dashboard/urls.py b/web/dashboard/urls.py index ff2e6b25406..de7ca558275 100644 --- a/web/dashboard/urls.py +++ b/web/dashboard/urls.py @@ -2,9 +2,10 @@ # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file "docs/LICENSE" for copying permission. -from dashboard import views from django.urls import re_path +from dashboard import views + urlpatterns = [ re_path(r"^$", views.index), ] diff --git a/web/guac/urls.py b/web/guac/urls.py index bf0bb539242..dbf41e6f560 100644 --- a/web/guac/urls.py +++ b/web/guac/urls.py @@ -1,4 +1,5 @@ from django.urls import re_path + from guac import views urlpatterns = [ diff --git a/web/submission/urls.py b/web/submission/urls.py index 756d8efc0e8..a2b2f8fde69 100644 --- a/web/submission/urls.py +++ b/web/submission/urls.py @@ -3,6 +3,7 @@ # See the file 'docs/LICENSE' for copying permission. from django.urls import re_path + from submission import views urlpatterns = [ diff --git a/web/users/migrations/0001_initial.py b/web/users/migrations/0001_initial.py index 04677ea1c82..e7631ace34b 100644 --- a/web/users/migrations/0001_initial.py +++ b/web/users/migrations/0001_initial.py @@ -4,7 +4,6 @@ class Migration(migrations.Migration): - initial = True dependencies = [ diff --git a/web/users/migrations/0002_reports.py b/web/users/migrations/0002_reports.py index 35bc9b28e74..bdffb93d232 100644 --- a/web/users/migrations/0002_reports.py +++ b/web/users/migrations/0002_reports.py @@ -5,7 +5,6 @@ class Migration(migrations.Migration): - dependencies = [ ("users", "0001_initial"), ] diff --git a/web/users/migrations/0003_rename_field_subscription.py b/web/users/migrations/0003_rename_field_subscription.py index 54ab307011f..11c8e6f8434 100644 --- a/web/users/migrations/0003_rename_field_subscription.py +++ b/web/users/migrations/0003_rename_field_subscription.py @@ -24,7 +24,6 @@ def reverse_migrate(apps, schema_editor): class Migration(migrations.Migration): - dependencies = [ ("users", "0002_reports"), ]