Skip to content

Commit

Permalink
Merge pull request #922 from ScrapeGraphAI/pre/beta
Browse files Browse the repository at this point in the history
Pre/beta
  • Loading branch information
VinciGit00 authored Feb 13, 2025
2 parents 1731396 + 9fe7265 commit 8472481
Showing 1 changed file with 23 additions and 42 deletions.
65 changes: 23 additions & 42 deletions tests/test_scrape_do.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,32 @@
import urllib.parse
import pytest

from unittest.mock import patch, Mock
from scrapegraphai.docloaders.scrape_do import scrape_do_fetch
from unittest.mock import Mock, patch

class TestScrapeDoFetch:
@patch('scrapegraphai.docloaders.scrape_do.requests.get')
@patch('scrapegraphai.docloaders.scrape_do.os.getenv')
def test_scrape_do_fetch_with_proxy_geocode_and_super_proxy(self, mock_getenv, mock_get):
"""
Test scrape_do_fetch function with proxy mode, geoCode, and super_proxy enabled.
This test verifies that the function correctly handles proxy settings,
geoCode parameter, and super_proxy flag when making a request.
"""
# Mock environment variable
mock_getenv.return_value = "proxy.scrape.do:8080"

# Mock the response
mock_response = Mock()
mock_response.text = "Mocked response content"
mock_get.return_value = mock_response

# Test parameters
token = "test_token"
target_url = "https://example.com"
use_proxy = True
geoCode = "US"
super_proxy = True

# Call the function
result = scrape_do_fetch(token, target_url, use_proxy, geoCode, super_proxy)
def test_scrape_do_fetch_without_proxy():
"""
Test scrape_do_fetch function using API mode (without proxy).
# Assertions
assert result == "Mocked response content"
mock_get.assert_called_once()
call_args = mock_get.call_args
This test verifies that:
1. The function correctly uses the API mode when use_proxy is False.
2. The correct URL is constructed with the token and encoded target URL.
3. The function returns the expected response text.
"""
token = "test_token"
target_url = "https://example.com"
encoded_url = urllib.parse.quote(target_url)
expected_response = "Mocked API response"

with patch("requests.get") as mock_get:
mock_response = Mock()
mock_response.text = expected_response
mock_get.return_value = mock_response

# Check if the URL is correct
assert call_args[0][0] == target_url
result = scrape_do_fetch(token, target_url, use_proxy=False)

# Check if proxies are set correctly
assert call_args[1]['proxies'] == {
"http": f"http://{token}:@proxy.scrape.do:8080",
"https": f"http://{token}:@proxy.scrape.do:8080",
}
expected_url = f"http://api.scrape.do?token={token}&url={encoded_url}"
mock_get.assert_called_once_with(expected_url)

# Check if verify is False
assert call_args[1]['verify'] is False
assert result == expected_response

# Check if params are set correctly
assert call_args[1]['params'] == {"geoCode": "US", "super": "true"}

0 comments on commit 8472481

Please sign in to comment.