-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
42 lines (33 loc) · 1.25 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""Ozon Search Queries Collector.
This script is the entry point for running the Ozon search queries collector spider.
It configures the Scrapy project settings dynamically, initializes the crawler, and
executes the `OzonDataQuerySpider`.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
from rich import print
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from ozon_collector.spiders.OzonDataQuerySpider import OzonDataQuerySpider
if __name__ == "__main__":
ABS_PATH: Path = Path(os.path.dirname(os.path.abspath(__file__)))
enf_filename = ABS_PATH / ".env.development"
assert enf_filename.exists()
load_dotenv(enf_filename)
def main() -> None:
settings = get_project_settings()
print(settings.copy_to_dict())
# Enhance settings dynamically
settings.set("CONCURRENT_REQUESTS", 1)
settings.set("PLAYWRIGHT_BROWSER_TYPE", "chromium")
settings.set("DEPTH_LIMIT", 1)
process = CrawlerProcess(settings=settings)
process.crawl(
OzonDataQuerySpider,
initial_query_keyword="сыр",
parse_in_depth=True,
query_popularity_threshold=0,
)
process.start()
main()