-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcrawl_images.py
37 lines (26 loc) · 962 Bytes
/
crawl_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import sys
import argparse
import re
import pymongo
from numpy import array, random
import utils
import hit
import mongodb_config
def main():
parser = argparse.ArgumentParser(description = 'Analyze HIT results submitted by Amazon Mechnical Turk workers.')
parser.add_argument('-f', help = 'The mtk data source file.')
parser.add_argument('-d', help = 'The image path')
args = parser.parse_args()
if (args.f != None):
file_urls = utils.load_file(args.f)
data_metainfo = hit.regex_datasource(file_urls)
# data_labels: flickr high interesting 1, flickr low interesting 2, pinterest [3, 4, 5]
data_labels = data_metainfo[0]
# data_ids: (flickr, pinterest) image id
data_ids = data_metainfo[1]
count = 0
for url in file_urls:
utils.crawl_image_from_url(url, args.d + '/' + data_ids[count] + '.jpg')
count += 1
if __name__ == "__main__":
main()