From ea991c22301e9ba2634e9b678721e90cf9db358a Mon Sep 17 00:00:00 2001 From: Josh Feather <142008135+josh-feather@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:01:32 +0100 Subject: [PATCH] Only process PCAPs with httpreplay when tlsdump.log exists These changes prevent PCAPs from being processed by httpreplay (Pcap2 processing module) when there are not TLS keys available. This is because httpreplay processing is very time intensive due to its pure-python implementation. Httpreplay's core use is to decrypt TLS traffic so it can be processed by Suricata. If there are no TLS keys available, there is no requirement to use it for processing. For context, when CAPE attempted to process a ~250MB PCAP with httpreplay, it took ~960 seconds. Without httpreplay, it took ~16 seconds. --- modules/processing/network.py | 14 +++++++++++++- utils/profiling.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 utils/profiling.py diff --git a/modules/processing/network.py b/modules/processing/network.py index b5de5331994..aa5ad68fc34 100644 --- a/modules/processing/network.py +++ b/modules/processing/network.py @@ -38,6 +38,7 @@ from lib.cuckoo.common.path_utils import path_delete, path_exists, path_mkdir, path_read_file, path_write_file from lib.cuckoo.common.safelist import is_safelisted_domain from lib.cuckoo.common.utils import convert_to_printable +import utils.profiling as profiling # from lib.cuckoo.common.safelist import is_safelisted_ip @@ -78,6 +79,7 @@ sys.path.append(CUCKOO_ROOT) TLS_HANDSHAKE = 22 +PCAP_BYTES_HTTPREPLAY_WARN_LIMIT = 30*1024*1024 Keyed = namedtuple("Keyed", ["key", "obj"]) Packet = namedtuple("Packet", ["raw", "ts"]) @@ -922,6 +924,11 @@ def run(self): log.debug('The PCAP file does not exist at path "%s"', self.pcap_path) return {} + httpreplay_start = profiling.Counter() + log.info("starting processing pcap with httpreplay") + if os.path.getsize(self.pcap_path) > PCAP_BYTES_HTTPREPLAY_WARN_LIMIT: + log.warning("httpreplay processing may timeout due to pcap size") + r = httpreplay.reader.PcapReader(open(self.pcap_path, "rb")) r.tcp = httpreplay.smegma.TCPPacketStreamer(r, self.handlers) @@ -1060,6 +1067,8 @@ def run(self): results[f"{protocol}_ex"].append(tmp_dict) + log.info("finished processing pcap with httpreplay") + log.debug("httpreplay processing time: %s", (profiling.Counter() - httpreplay_start)) return results @@ -1117,7 +1126,10 @@ def run(self): if HAVE_HTTPREPLAY: try: - p2 = Pcap2(self.pcap_path, self.get_tlsmaster(), self.network_path).run() + p2 = {} + tls_master = self.get_tlsmaster() + if tls_master: + p2 = Pcap2(self.pcap_path, tls_master, self.network_path).run() if p2: results.update(p2) except Exception: diff --git a/utils/profiling.py b/utils/profiling.py new file mode 100644 index 00000000000..ddd965d2b22 --- /dev/null +++ b/utils/profiling.py @@ -0,0 +1,29 @@ +import time +from dataclasses import dataclass, field + + +@dataclass +class Counter: + """Profiler that counts real and CPU time.""" + real: float = field(default_factory=time.perf_counter) + cpu: float = field(default_factory=time.process_time) + + def __sub__(self, other): + real = self.real - other.real + cpu = self.cpu - other.cpu + return Counter(real, cpu) + + def __add__(self, other): + real = self.real + other.real + cpu = self.cpu + other.cpu + return Counter(real, cpu) + + def __str__(self) -> str: + return f"{self.real:.2f}s (cpu {self.cpu:.2f}s)".format(self.real, self.cpu) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + elapsed = (Counter() - self) + self.__dict__.update(**elapsed.__dict__)