From a317db02d16ede92a9b53a8c57ba2e5a36941bae Mon Sep 17 00:00:00 2001 From: sushil-rgb Date: Wed, 22 Nov 2023 01:52:43 +0545 Subject: [PATCH] added some print statement for debugging purposes --- main.py | 2 +- scrapers/scraper.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 4c2b07a..203a0f8 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ async def main(): - base_url = "https://www.amazon.se/s?bbn=20652066031&rh=n%3A20652066031%2Cp_n_deal_type%3A27060728031&_encoding=UTF8&content-id=amzn1.sym.363125ff-5b14-46ae-9206-bb409a91f72e&pd_rd_r=f0ddebb8-2417-41bf-9e42-0c44b701b027&pd_rd_w=BXn3l&pd_rd_wg=DavuY&pf_rd_p=363125ff-5b14-46ae-9206-bb409a91f72e&pf_rd_r=MDNW87RF07WEJ4XJSFNQ&ref=pd_gw_unk" + base_url = "https://www.amazon.co.jp/s?i=sporting&rh=n%3A15334571%2Cp_n_price_fma%3A401077011&dc&fs=true&language=en&ds=v1%3AsC3hKMKPXJRge3qllDNTAiZkbn8XKSOiqs7NI0DL0J4&qid=1700597065&rnid=401076011&ref=sr_nr_p_n_price_fma_1" status = await Amazon(base_url, None).status() if status == 503: diff --git a/scrapers/scraper.py b/scrapers/scraper.py index c61cc55..89a58d7 100644 --- a/scrapers/scraper.py +++ b/scrapers/scraper.py @@ -34,7 +34,7 @@ def __init__(self, base_url, proxy): self.region = region(base_url) # Define a regular expression pattern for currencies in different regions - self.currency = r'[$₹,R$€£kr()%¥\s]' # Characters representing various currencies + self.currency = r'["$₹,R$€£kr()%¥\s]' # Characters representing various currencies # Explanation: # - '[$₹,R\$€£kr()%¥\s]': Match any of the characters within the square brackets # - '$': Dollar sign @@ -264,7 +264,7 @@ async def scrape_product_info(self, url, max_retries = 13): if 'Page' in price.split(): price = await self.catch.text(soup.select_one(self.scrape['price_us_i'])) if price != "N/A": - price = float(re.sub(self.currency, '', price)) + price = re.sub(self.currency, '', price) try: deal_price = await self.catch.text(soup.select(self.scrape['deal_price'])[0]) if 'Page' in deal_price.split(): @@ -272,7 +272,7 @@ async def scrape_product_info(self, url, max_retries = 13): except Exception as e: deal_price = "N/A" if deal_price != "N/A": - deal_price = float(re.sub(self.currency, '', deal_price)) + deal_price = re.sub(self.currency, '', deal_price) try: savings = await self.catch.text(soup.select(self.scrape['savings'])[-1]) except IndexError: @@ -314,7 +314,7 @@ async def scrape_product_info(self, url, max_retries = 13): if retry < max_retries - 1: await asyncio.sleep(5) # Delay before retrying. except Exception as e: - print(f"Retry {retry + 1} failed: {str(e)}") + print(f"Retry {retry + 1} failed: {str(e)} | Error URL : {url}") if retry < max_retries - 1: await asyncio.sleep(4) # Delay before retrying. return amazon_dicts