Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
3c7 committed May 27, 2024
2 parents 3c86474 + 7d20734 commit dcc9380
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 170 deletions.
97 changes: 59 additions & 38 deletions common_osint_model/models/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,20 @@
from pydantic import BaseModel
from hhhash import hash_from_banner

from common_osint_model.models import ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger
from common_osint_model.models import (
ShodanDataHandler,
CensysDataHandler,
BinaryEdgeDataHandler,
Logger,
)
from common_osint_model.utils import hash_all


class HTTPComponentContentFavicon(BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger):
class HTTPComponentContentFavicon(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger
):
"""Represents the favicon which might be included in HTTP components."""

raw: Optional[str]
md5: Optional[str]
sha1: Optional[str]
Expand All @@ -22,23 +30,27 @@ class HTTPComponentContentFavicon(BaseModel, ShodanDataHandler, CensysDataHandle
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(f"Method HTTPComponentContentFavicon.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}.")
raise TypeError(
f"Method HTTPComponentContentFavicon.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)

raw = d["http"]["favicon"]["data"]
raw = base64.b64decode(raw)
md5, sha1, sha256, murmur = hash_all(raw)
shodan_murmur = mmh3.hash(d["http"]["favicon"]["data"])
cls.info("Shodan's favicon hash only hashes the base64 encoded favicon, not the data itself. The hash can be "
"found as \"shodan_murmur\" in this instance. \"murmur\" and the other hashes are calculated based on "
"the raw data of the favicon.")
cls.info(
"Shodan's favicon hash only hashes the base64 encoded favicon, not the data itself. The hash can be "
'found as "shodan_murmur" in this instance. "murmur" and the other hashes are calculated based on '
"the raw data of the favicon."
)
return HTTPComponentContentFavicon(
raw=d["http"]["favicon"]["data"],
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur,
shodan_murmur=shodan_murmur
shodan_murmur=shodan_murmur,
)

@classmethod
Expand All @@ -58,12 +70,13 @@ def from_binaryedge(cls, d: Union[Dict, List]):
sha1=sha1,
sha256=sha256,
murmur=murmur,
shodan_murmur=shodan_murmur
shodan_murmur=shodan_murmur,
)


class HTTPComponentContentRobots(BaseModel, ShodanDataHandler, CensysDataHandler):
"""Represents the robots.txt file in webroots."""

raw: Optional[str]
md5: Optional[str]
sha1: Optional[str]
Expand All @@ -76,16 +89,13 @@ def from_shodan(cls, d: Dict):
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContentRobots.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}.")
f"but it was {type(d)}."
)

raw = d["http"]["robots"].encode("utf-8")
md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContentRobots(
raw=raw,
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur
raw=raw, md5=md5, sha1=sha1, sha256=sha256, murmur=murmur
)

@classmethod
Expand All @@ -96,6 +106,7 @@ def from_censys(cls, d: Dict):

class HTTPComponentContentSecurity(BaseModel, ShodanDataHandler, CensysDataHandler):
"""Represents the security.txt file in webroots."""

raw: Optional[str]
md5: Optional[str]
sha1: Optional[str]
Expand All @@ -108,16 +119,13 @@ def from_shodan(cls, d: Dict):
if not isinstance(d, Dict):
raise TypeError(
f"Method HTTPComponentContentRobots.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}.")
f"but it was {type(d)}."
)

raw = d["http"]["securitytxt"].encode("utf-8")
md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContentSecurity(
raw=raw,
md5=md5,
sha1=sha1,
sha256=sha256,
murmur=murmur
raw=raw, md5=md5, sha1=sha1, sha256=sha256, murmur=murmur
)

@classmethod
Expand All @@ -126,8 +134,11 @@ def from_censys(cls, d: Dict):
return None


class HTTPComponentContent(BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger):
class HTTPComponentContent(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler, Logger
):
"""Represents the content (body) of HTTP responses."""

raw: Optional[str]
length: Optional[int]
md5: Optional[str]
Expand All @@ -142,8 +153,10 @@ class HTTPComponentContent(BaseModel, ShodanDataHandler, CensysDataHandler, Bina
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(f"Method HTTPComponentContent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}.")
raise TypeError(
f"Method HTTPComponentContent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)

favicon = None
if "favicon" in d["http"]:
Expand All @@ -164,7 +177,13 @@ def from_shodan(cls, d: Dict):
if not raw:
raw = ""

raw = raw.encode("utf-8")
try:
raw = raw.encode("utf-8")
except UnicodeEncodeError as uee:
# TODO: This is very ugly, but spontanously I can't find a solution for the weird Shodan encoding issue.
cls.error(f"UnicodeEncodeError during Shodan result encoding: {uee}")
cls.warning("Using empty strings as HTML body.")
raw = "".encode("utf-8")

md5, sha1, sha256, murmur = hash_all(raw)
return HTTPComponentContent(
Expand All @@ -176,7 +195,7 @@ def from_shodan(cls, d: Dict):
murmur=murmur,
favicon=favicon,
robots_txt=robots_txt,
security_txt=security_txt
security_txt=security_txt,
)

@classmethod
Expand All @@ -194,7 +213,7 @@ def from_censys(cls, d: Dict):
murmur=murmur,
favicon=HTTPComponentContentFavicon.from_censys(d),
robots_txt=HTTPComponentContentRobots.from_censys(d),
security_txt=HTTPComponentContentSecurity.from_censys(d)
security_txt=HTTPComponentContentSecurity.from_censys(d),
)

@classmethod
Expand All @@ -211,12 +230,15 @@ def from_binaryedge(cls, d: Union[Dict, List]):
sha1=sha1,
sha256=sha256,
murmur=murmur,
favicon=HTTPComponentContentFavicon.from_binaryedge(d)
favicon=HTTPComponentContentFavicon.from_binaryedge(d),
)


class HTTPComponent(BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler):
class HTTPComponent(
BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeDataHandler
):
"""Represents the HTTP component of services."""

headers: Optional[Dict[str, str]]
content: Optional[HTTPComponentContent]
shodan_headers_hash: Optional[str]
Expand All @@ -226,8 +248,10 @@ class HTTPComponent(BaseModel, ShodanDataHandler, CensysDataHandler, BinaryEdgeD
def from_shodan(cls, d: Dict):
"""Creates an instance of this class based on Shodan data given as dictionary."""
if not isinstance(d, Dict):
raise TypeError(f"Method HTTPComponent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}.")
raise TypeError(
f"Method HTTPComponent.from_shodan expects parameter d to be a dictionary, "
f"but it was {type(d)}."
)

content = HTTPComponentContent.from_shodan(d)
banner = d["data"]
Expand All @@ -243,7 +267,7 @@ def from_shodan(cls, d: Dict):
headers=headers,
content=content,
shodan_headers_hash=d.get("http", {}).get("headers_hash", None),
hhhash=hash_from_banner(banner)
hhhash=hash_from_banner(banner),
)

@classmethod
Expand All @@ -255,9 +279,7 @@ def from_censys(cls, d: Dict):
if k[0] == "_":
continue

headers.update({
k.replace("_", "-"): " ".join(v)
})
headers.update({k.replace("_", "-"): " ".join(v)})

banner_lines = d["banner"].replace("\r", "").split("\n")
banner_keys = banner_lines[0]
Expand All @@ -271,14 +293,13 @@ def from_censys(cls, d: Dict):
headers=headers,
content=HTTPComponentContent.from_censys(d),
shodan_headers_hash=headers_hash,
hhhash=hash_from_banner(d["banner"])
hhhash=hash_from_banner(d["banner"]),
)

@classmethod
def from_binaryedge(cls, d: Union[Dict, List]):
http_response = d["result"]["data"]["response"]
headers = http_response["headers"]["headers"]
return HTTPComponent(
headers=headers,
content=HTTPComponentContent.from_binaryedge(d)
headers=headers, content=HTTPComponentContent.from_binaryedge(d)
)
Loading

0 comments on commit dcc9380

Please sign in to comment.