forked from sucv/paperCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
43 lines (35 loc) · 1.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# from scrapy import cmdline
#
# cmdline.execute("scrapy crawl mm -a years=2016,2017,2018,2019,2020,2021,2022 -a keys=emotion,affective -o emotion.csv".split())
# cmdline.execute("scrapy crawl nips -a years=2016,2017,2018,2019,2020,2021,2022 -a keys=emotion,affective -o emotion.csv".split())
#
# # cmdline.execute("scrapy crawl nips -a years=2015 -a keys=video -o test.csv".split())
# # cmdline.execute("scrapy crawl eccv -a years=2020,2021,2022 -a keys=video -o output.csv -s JOBDIR=folder6".split())
from scrapy.utils.project import get_project_settings
from scrapy.crawler import CrawlerProcess
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Crawl conference paper info")
parser.add_argument(
"conference",
type=str,
help="Conference names, seperated by comma. Supported names include "
"cvpr, iccv, eccv, neurips, aaai, icml, iclr, wacv",
)
parser.add_argument(
"years",
type=str,
help="Years of the conference, separated by comma",
)
parser.add_argument(
"--queries", default="", help="What keywords you want to query?"
)
args = parser.parse_args()
conference = args.conference
years = args.years
queries = args.queries
setting = get_project_settings()
process = CrawlerProcess(setting)
for conf in conference.split(","):
process.crawl(conf, years=years, queries=queries)
process.start()