Skip to content

Commit

Permalink
peepdf update and test (kevoreilly#2491)
Browse files Browse the repository at this point in the history
  • Loading branch information
enzok authored Feb 12, 2025
1 parent f89c890 commit df2152c
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 9 deletions.
18 changes: 9 additions & 9 deletions lib/cuckoo/common/integrations/peepdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def _set_base_uri(pdf):
try:
for version in range(pdf.updates + 1):
trailer, _ = pdf.trailer[version]
if trailer is not None:
elem = trailer.dict.getElementByName("/Root")
if trailer:
elem = trailer.getTrailerDictionary().getElementByName("/Root")
if elem:
elem = _get_obj_val(pdf, version, elem)
if elem:
Expand All @@ -60,7 +60,7 @@ def _set_base_uri(pdf):
return elem.getValue()
except Exception as e:
log.exception(e)
return ""
return


def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
Expand Down Expand Up @@ -92,16 +92,16 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
metadata = metatmp
objects = body.objects
for index in objects:
oid = objects[index].id
oid = objects[index].thisId
offset = objects[index].offset
size = objects[index].size
details = objects[index].object
details = objects[index].obj
obj_data = {
"Object ID": oid,
"Offset": offset,
"Size": size,
}
if details.type == "stream":
if details.objType == "stream":
decoded_stream = details.decodedStream
if isJavascript(decoded_stream.strip()):
jsdata = None
Expand Down Expand Up @@ -129,7 +129,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
ret_data += tmp
obj_data["Data"] = ret_data
retobjects.append(obj_data)
elif details.type == "dictionary" and details.containsJScode:
elif details.objType == "dictionary" and details.containsJScode:
js_elem = details.getElementByName("/JS")
if js_elem:
jsdata = None
Expand Down Expand Up @@ -157,7 +157,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
ret_data += tmp
obj_data["Data"] = ret_data
retobjects.append(obj_data)
elif details.type == "dictionary" and details.hasElement("/A"):
elif details.objType == "dictionary" and details.hasElement("/A"):
# verify it to be a link type annotation
subtype_elem = details.getElementByName("/Subtype")
type_elem = details.getElementByName("/Type")
Expand All @@ -169,7 +169,7 @@ def peepdf_parse(filepath: str, pdfresult: Dict[str, Any]) -> Dict[str, Any]:
continue
a_elem = details.getElementByName("/A")
a_elem = _get_obj_val(pdf, i, a_elem)
if a_elem and a_elem.type == "dictionary" and a_elem.hasElement("/URI"):
if a_elem and a_elem.getType() == "dictionary" and a_elem.hasElement("/URI"):
uri_elem = a_elem.getElementByName("/URI")
if uri_elem:
uri_elem = _get_obj_val(pdf, i, uri_elem)
Expand Down
43 changes: 43 additions & 0 deletions tests/test_peepdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from pathlib import Path

import pytest

from lib.cuckoo.common.integrations.peepdf import peepdf_parse

data_dir = Path(__file__).parent / "data" / "malware"
pdf_path = data_dir / "ad6cedb0d1244c1d740bf5f681850a275c4592281cdebb491ce533edd9d6a77d"

expected_result = {
"Info": {
"Creator": "Scribus 1.3.3.12",
"Producer": "Scribus PDF Library 1.3.3.12",
"Author": ""
},
"Dates": [],
"Keywords": {},
"JSStreams": [
{
"Object ID": 13,
"Offset": 872,
"Size": 1255,
}
],
"All_URLs": []
}

pdfresult = {"Info": {}, "Dates": [], "Keywords": {}, "JSStreams": [], "All_URLs": []}


@pytest.mark.skipif(not data_dir.exists(), reason="Required data file is not present")
class TestPeepdf:
"""Class to test peepdf_parse."""
@pytest.mark.skipif(
not pdf_path.exists(),
reason="Required data file is not present",
)
def test_peepdf_parse_valid_pdf(self):
"""Test parsing a valid PDF sample."""
result = peepdf_parse(str(pdf_path), pdfresult)
del result["JSStreams"][0]["Data"]

assert result == expected_result

0 comments on commit df2152c

Please sign in to comment.