diff --git a/pyproject.toml b/pyproject.toml index c4396b7..4a44ca7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,9 +11,10 @@ urls = {Homepage = "https://github.com/4armed/sri-check"} requires-python = ">=3.6" dependencies = [ "beautifulsoup4>=4.0", + "blinker==1.7.0", # Required for Seleniumwire "lxml>=4.8", "requests>=2.0", - "selenium>=4.10", + "selenium>=4.25", ] [project.readme] diff --git a/requirements.txt b/requirements.txt index 4c5fadc..91460c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ beautifulsoup4>=4.0 lxml>=4.8 requests>=2.0 -selenium>=4.10 \ No newline at end of file +selenium>=4.25 \ No newline at end of file diff --git a/sricheck/__init__.py b/sricheck/__init__.py index 95fb1fc..5382d91 100644 --- a/sricheck/__init__.py +++ b/sricheck/__init__.py @@ -1 +1 @@ -__version__ = "1.10.0" \ No newline at end of file +__version__ = "1.12.11" \ No newline at end of file diff --git a/sricheck/sricheck.py b/sricheck/sricheck.py index ba900db..d58b505 100755 --- a/sricheck/sricheck.py +++ b/sricheck/sricheck.py @@ -3,6 +3,7 @@ import argparse import base64 import hashlib +import os import re import sys import requests @@ -91,10 +92,9 @@ def is_allowlisted(self, netloc): def get_html(self): if self.browser: - from selenium import webdriver - from selenium.webdriver.chrome.options import Options + from seleniumwire import webdriver - chrome_options = Options() + chrome_options = webdriver.ChromeOptions() chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") @@ -104,20 +104,31 @@ def get_html(self): } } - browser = webdriver.Chrome(options=chrome_options) + browser = webdriver.Chrome( + options=chrome_options, + seleniumwire_options={ + 'proxy': { + 'http': os.environ.get("http_proxy"), + 'https': os.environ.get("https_proxy"), + } + } + ) def interceptor(request): - request.headers.update(self.headers) + for key, value in self.headers.items(): + del request.headers[key] + request.headers[key] = value browser.request_interceptor = interceptor browser.get(self.url) - return browser.execute_script("return document.documentElement.outerHTML;") + content = browser.execute_script("return document.documentElement.outerHTML;") + + browser.quit() + return content else: # file deepcode ignore Ssrf: The purpose of the script is to parse remote URLs from the CLI - return requests.get(self.url, headers=self.headers).content - def get_remote_resource_tags(self, html): soup = BeautifulSoup(html, 'lxml')