Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev additions #18

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5b1857c
Added functionality: Scan for Subresource Integrity Check (require-sr…
mattkrau Jan 27, 2019
3cd94c2
Added Feature: Scan for various server leaks: .env .gitlab-ci.yml .n…
mattkrau Jan 27, 2019
e86d784
Added Feature: Reading of Generatortag(s), adding to result for compa…
mattkrau Jan 28, 2019
227db23
Added Feature: Reading of Generatortag(s), adding to result for compa…
mattkrau Jan 28, 2019
9df40be
Removed duplicate serverleak-test
mattkrau Feb 27, 2019
af1c173
Added consistency: Matched module name to file name
mattkrau Jan 27, 2019
04c8c40
Added Feature: Scan for various server leaks: .env .gitlab-ci.yml .n…
mattkrau Jan 27, 2019
7309014
Removed duplicate serverleak-test
mattkrau Feb 27, 2019
2c76e6b
Rename variables not matching the content
mattkrau Apr 10, 2019
583830f
Added scripts_disabled as wished in pull request. Remove unused code …
mattkrau Apr 10, 2019
ce49a3a
Typo fixed
mattkrau Apr 10, 2019
ec7c6b9
Fixed questionable explanation of uniquify()
mattkrau Apr 10, 2019
fa72e2b
PEP8 compliance
mattkrau Apr 10, 2019
eed7ad8
Rebuild match function with any()
mattkrau Apr 10, 2019
43d319b
SRI Check implementation Part 1 - SRI does not seem to trigger in chr…
mattkrau Apr 10, 2019
27625e3
POC: Collecting Info about Integrity Availability & Hash for each sty…
mattkrau Apr 11, 2019
5dcd6c7
Added script disabled part
mattkrau Apr 11, 2019
582fa7b
Added functionality to obtain debug messages for SRI events
mattkrau Apr 14, 2019
551ded6
Q&D integration of Debug messages into SRI list, working for first te…
mattkrau Apr 14, 2019
488c216
Stylesheets consistently detected / sri tested
mattkrau Apr 15, 2019
58dde77
Removed SRI Check from Security Headers
mattkrau Apr 28, 2019
7242f85
PEP8
mattkrau Apr 28, 2019
d175d43
Added Check for SRI in CSP Header into sricheck module.
mattkrau Apr 28, 2019
4d3c8a2
Put resulting list in dict, w/ proper key/values.
mattkrau Apr 28, 2019
9cb351c
Currently Chrome is configured to IGNORE require-sri-for. Only if the…
mattkrau Apr 29, 2019
a09a8c6
- Changed search mechanism to DOM.querySelectorAll.
mattkrau Apr 29, 2019
704a18c
Removed debugging print()
mattkrau Apr 29, 2019
b2ea23f
Removed unused code
mattkrau Apr 29, 2019
461b222
Code readded
mattkrau Apr 29, 2019
f1402a4
Changed generatortag to DOM.querySelectorAll
mattkrau Apr 29, 2019
76fa721
PEP8 cleanup, todo added for SRI extractor: Sanity Check
mattkrau Apr 29, 2019
b2862a6
Nicer wording for SRI (fail -> info)
mattkrau Apr 30, 2019
0050f87
typo fix
mattkrau Apr 30, 2019
4ba1049
Fixes for SRI parsing of CSP value
mattkrau Apr 30, 2019
6a0b7e0
Unique function removed
mattkrau Apr 30, 2019
09f984d
Renaming of dict entries, adding new result cases - As discussed
mattkrau Jan 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions privacyscanner/scanmodules/chromedevtools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
TLSDetailsExtractor, CertificateExtractor, ThirdPartyExtractor, InsecureContentExtractor, \
FailedRequestsExtractor, SecurityHeadersExtractor, TrackerDetectExtractor, \
CookieStatsExtractor, JavaScriptLibsExtractor, ScreenshotExtractor, ImprintExtractor, \
HSTSPreloadExtractor, FingerprintingExtractor
GeneratorTagExtractor, HSTSPreloadExtractor, FingerprintingExtractor, SriExtractor
from privacyscanner.scanmodules.chromedevtools.utils import TLDEXTRACT_CACHE_FILE, parse_domain
from privacyscanner.utils import file_is_outdated, set_default_options, calculate_jaccard_index

Expand All @@ -19,7 +19,8 @@
CertificateExtractor, ThirdPartyExtractor, InsecureContentExtractor,
FailedRequestsExtractor, SecurityHeadersExtractor, TrackerDetectExtractor,
CookieStatsExtractor, JavaScriptLibsExtractor, ScreenshotExtractor,
ImprintExtractor, HSTSPreloadExtractor, FingerprintingExtractor]
ImprintExtractor, HSTSPreloadExtractor, FingerprintingExtractor,
GeneratorTagExtractor, SriExtractor]

EXTRACTOR_CLASSES_HTTPS_RUN = [FinalUrlExtractor, TLSDetailsExtractor, CertificateExtractor,
InsecureContentExtractor, SecurityHeadersExtractor,
Expand Down Expand Up @@ -79,3 +80,4 @@ def update_dependencies(self):
for extractor_class in EXTRACTOR_CLASSES:
if hasattr(extractor_class, 'update_dependencies'):
extractor_class.update_dependencies(self.options)

14 changes: 14 additions & 0 deletions privacyscanner/scanmodules/chromedevtools/chromescan.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ def scan(self, browser, result, logger, options):
self._tab.Security.enable()
self._tab.Security.setIgnoreCertificateErrors(ignore=True)

self._register_log_callbacks()
self._tab.Log.enable()

self._tab.Page.loadEventFired = self._cb_load_event_fired
self._tab.Page.frameScheduledNavigation = self._cb_frame_scheduled_navigation
self._tab.Page.frameClearedScheduledNavigation = self._cb_frame_cleared_scheduled_navigation
Expand Down Expand Up @@ -485,6 +488,9 @@ def _cb_security_state_changed(self, **state):
def _cb_loading_failed(self, **failed_request):
self._page.add_failed_request(failed_request)

def _cb_log_entryAdded(self, **log):
self._page.add_log_event(log)

def _register_network_callbacks(self):
self._tab.Network.requestWillBeSent = self._cb_request_will_be_sent
self._tab.Network.responseReceived = self._cb_response_received
Expand All @@ -498,6 +504,9 @@ def _unregister_network_callbacks(self):
def _register_security_callbacks(self):
self._tab.Security.securityStateChanged = self._cb_security_state_changed

def _register_log_callbacks(self):
self._tab.Log.entryAdded = self._cb_log_entryAdded

def _unregister_security_callbacks(self):
self._tab.Security.securityStateChanged = None

Expand Down Expand Up @@ -573,6 +582,7 @@ def __init__(self, tab=None):
self.failed_request_log = []
self.response_log = []
self.security_state_log = []
self.logging_log = []
self.scan_start = None
self.tab = tab
self._response_lookup = defaultdict(list)
Expand All @@ -594,6 +604,10 @@ def add_request(self, request):
def add_failed_request(self, failed_request):
self.failed_request_log.append(failed_request)

def add_log_event(self, log_event):
self.logging_log.append(log_event)


def add_response(self, response):
self.response_log.append(response)
self._response_lookup[response['requestId']].append(response)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .cookiestats import CookieStatsExtractor
from .failedrequests import FailedRequestsExtractor
from .finalurl import FinalUrlExtractor
from .generatortag import GeneratorTagExtractor
from .googleanalytics import GoogleAnalyticsExtractor
from .insecurecontent import InsecureContentExtractor
from .javascriptlibs import JavaScriptLibsExtractor
Expand All @@ -15,4 +16,5 @@
from .screenshot import ScreenshotExtractor
from .imprint import ImprintExtractor
from .hstspreload import HSTSPreloadExtractor
from .fingerprinting import FingerprintingExtractor
from .fingerprinting import FingerprintingExtractor
from .sricheck import SriExtractor
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pychrome

from privacyscanner.scanmodules.chromedevtools.extractors.base import Extractor
from privacyscanner.scanmodules.chromedevtools.utils import scripts_disabled


ELEMENT_NODE = 1


class GeneratorTagExtractor(Extractor):

GENERATOR_KEYWORDS = ['generator', 'Generator']

def extract_information(self):
# Disable scripts to avoid DOM changes while searching for generator tags, see imprint.py / pull request
with scripts_disabled(self.page.tab, self.options):
self._extract_information()

def extract_information(self):
tags = []

node_id = self.page.tab.DOM.getDocument()['root']['nodeId']
meta_node_ids = self.page.tab.DOM.querySelectorAll(nodeId=node_id, selector='meta')['nodeIds']

for node_id in meta_node_ids:
while node_id is not None:
try:
node = self.page.tab.DOM.describeNode(nodeId=node_id)['node']
except pychrome.CallMethodException:
# For some reason, nodes seem to disappear in-between,
# so just ignore these cases.
break
if node['nodeType'] == ELEMENT_NODE and node['nodeName'].lower() == 'meta':
if node['attributes'][1] == 'generator':
tags.append(node['attributes'][3])
break
node_id = node.get('parentId')

tags = list(set(tags))
generator_tags = {}
if tags:
i = 0
for element in tags:
generator_tags[str(i + 1)] = tags[i]
i += 1
self.result['generator'] = generator_tags
else:
self.result['generator'] = None
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def extract_information(self):
csp_value = None
if 'content-security-policy' in headers:
csp_value = self._parse_csp(headers['content-security-policy'])

security_headers['Content-Security-Policy'] = csp_value

xss_protection = None
Expand Down
135 changes: 135 additions & 0 deletions privacyscanner/scanmodules/chromedevtools/extractors/sricheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import pychrome

from privacyscanner.scanmodules.chromedevtools.extractors.base import Extractor
from privacyscanner.scanmodules.chromedevtools.utils import scripts_disabled

ELEMENT_NODE = 3


class SriExtractor(Extractor):

def extract_information(self):
# Disable scripts to avoid DOM changes while searching for generator tags, see imprint.py / pull request
with scripts_disabled(self.page.tab, self.options):
self.extract_sri()

def extract_sri(self):
sri_dict = {}
final_sri_list = []
failed_urls = []

sri_dict['require_sri_for'] = None
sri_dict['all_sri_active_and_valid'] = None
sri_dict['at_least_one_sri_active'] = None
sri_dict['all_sri_active'] = None

# Check already read CSP Values in _self
# Currently Chrome is configured to IGNORE require_sri_for. Only if the flag
# #enable-experimental-web-platform-features is enabled, it correctly throws an error if a script / style
# has no integrity-hash.

security_headers = self.result['security_headers']
if security_headers['Content-Security-Policy'] is not None:
if 'require-sri-for' in security_headers['Content-Security-Policy']:
sri_dict['require_sri_for'] = security_headers['Content-Security-Policy']['require-sri-for'][0]
# This results in privacyscanner reading the CSP header for SRI but chromedevtools is currently not enforcing it

node_id = self.page.tab.DOM.getDocument()['root']['nodeId']
links = self.page.tab.DOM.querySelectorAll(nodeId=node_id, selector='link')['nodeIds']

for node_id in links:
while node_id is not None:
try:
node = self.page.tab.DOM.describeNode(nodeId=node_id)['node']
except pychrome.CallMethodException:
# For some reason, nodes seem to disappear in-between,
# so just ignore these cases.
break

if node['nodeType'] == 1 and 'href' in node['attributes']:
if "stylesheet" in node['attributes']:
self._add_element_to_linklist(final_sri_list, None, node['attributes'])
break
if "script" in node['attributes']:
self._add_element_to_linklist(final_sri_list, None, node['attributes'])
break
node_id = node.get('parentId')

# Check if href is in entry list, if yes set attributes accordingly.
logging_log = self.page.logging_log
for element in logging_log:
if element['entry']['source'] == 'security' and element['entry']['level'] == 'error':
if 'Failed to find a valid digest' in element['entry']['text']:
failed_urls.append(element['entry']['text'].split('\'')[3])

for element in final_sri_list:
if len(failed_urls) == 0:
if element['integrity_active']:
element['integrity_valid'] = True
for final_url in failed_urls:
if '/' + element['href'].replace('/', '', 1) in final_url:
element['integrity_valid'] = False
elif element['integrity_active']:
element['integrity_valid'] = True
else:
element['integrity_valid'] = None

# Check if all links have SRI enabled and have a valid hash

active_counter, valid_counter = 0, 0

for element in final_sri_list:
if element['integrity_active'] and not None:
active_counter += 1
if element['integrity_valid'] and not None:
valid_counter += 1

# Case 1: All CSS/JS have SRI active

if len(final_sri_list) == active_counter:
sri_dict['all_sri_active'] = True
else:
sri_dict['all_sri_active'] = False

# Case 2: At least one of CSS/JS has SRI active (but can be invalid)
# This is to not punish websites for using SRI and having a bad hash due to changed code.

if active_counter > 0:
sri_dict['at_least_one_sri_active'] = True
else:
sri_dict['at_least_one_sri_active'] = False

# Case 3: All of the used CSS and JS have SRI enabled and all hashes match.

if active_counter == valid_counter == len(final_sri_list):
sri_dict['all_sri_active_and_valid'] = True
else:
sri_dict['all_sri_active_and_valid'] = False

sri_dict['link-list'] = final_sri_list

self.result['sri-info'] = sri_dict

def _add_element_to_linklist(self, final_sri_list, node_value, node_attributes):
global new_entry
new_entry = dict(href=None, type=None, integrity_active=False, integrity_hash=None, integrity_valid=None)
if node_value is not None:
value_parts = node_value.split()
for element in value_parts:
if 'href=' in element:
new_entry['href'] = element.split('"')[1]
if 'integrity' in element:
new_entry['integrity_active'] = True
new_entry['integrity_hash'] = element.split('"')[1]

if node_attributes is not None:
new_entry['href'] = node_attributes[node_attributes.index('href') + 1]
new_entry['type'] = node_attributes[node_attributes.index('rel') + 1]
if new_entry['type'] == 'preload':
new_entry['type'] = node_attributes[node_attributes.index('preload') + 2]
if 'integrity' in node_attributes:
new_entry['integrity_active'] = True
new_entry['integrity_hash'] = node_attributes[node_attributes.index('integrity') + 1]

if new_entry not in final_sri_list:
final_sri_list.append(new_entry)
25 changes: 18 additions & 7 deletions privacyscanner/scanmodules/serverleaks.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,18 @@ def scan_site(self, result, meta):

def _match_db_dump(content):
targets = ["SQLite", "CREATE TABLE", "INSERT INTO", "DROP TABLE"]
matched = False
for target in targets:
matched |= target in content
return matched
return any(target in content for target in targets)


def _match_env_file(content):
targets = ["TERM", "PATH", "COMPOSER", "INSTALL"]
return any(target in content for target in targets)


def _match_package_file(content):
targets = ["name", "author", "contributors", "bugs", "homepage", "version", "license", "keywords", "description",
"repository", "main", "private", "scripts", "dependencies", "devDependencies", "engines", "browserslist"]
return any(target in content for target in targets)


def _concat_sub(url, suffix):
Expand Down Expand Up @@ -102,7 +110,11 @@ def _gen_db_full_domain_pem(url):
('.svn/wc.db', 'SQLite'),
('core', 'ELF'),
('.DS_Store', 'Bud1'),

('.npmrc', '='),
('package.json', _match_package_file),
# ('.htaccess', 'unknown'),
('workspace.xml', 'FileEditorManager'),
('.gitlab-ci.yml', 'job'),
# Check for Database dumps
# sqldump - MySQL/MariaDB
('dump.db', _match_db_dump),
Expand Down Expand Up @@ -140,8 +152,7 @@ def _gen_db_full_domain_pem(url):
# https://infosec.rm-it.de/2018/08/19/scanning-the-alexa-top-1m-sites-for-dockerfiles/
('Dockerfile', 'FROM'),
# https://twitter.com/svblxyz/status/1045013939904532482
('docker.env', '='),
('.env', '='),
('docker.env', _match_env_file),
# Docker Compose
('docker-compose.yml', 'version:'),
]
Expand Down