-
Notifications
You must be signed in to change notification settings - Fork 98
/
Copy pathvagrant-brozzler-new-site.py
executable file
·91 lines (75 loc) · 2.83 KB
/
vagrant-brozzler-new-site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
"""
vagrant-brozzler-new-site.py - runs brozzler-new-site inside the vagrant vm to
queue a site for your vagrant brozzler deployment.
Fills in the --proxy option automatically. Some other options are passed
through.
This is a standalone script with no dependencies other than python, and should
work with python 2.7 or python 3.2+. The only reason it's not a bash script is
so we can use the argparse library.
Copyright (C) 2016 Internet Archive
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys
import os
import argparse
import subprocess
try:
from shlex import quote
except:
from pipes import quote
def main(argv=[]):
arg_parser = argparse.ArgumentParser(prog=os.path.basename(argv[0]))
arg_parser.add_argument("seed", metavar="SEED", help="seed url")
arg_parser.add_argument(
"--time-limit",
dest="time_limit",
default=None,
help="time limit in seconds for this site",
)
arg_parser.add_argument(
"--ignore-robots",
dest="ignore_robots",
action="store_true",
help="ignore robots.txt for this site",
)
arg_parser.add_argument(
"--warcprox-meta",
dest="warcprox_meta",
help=(
"Warcprox-Meta http request header to send with each request; "
"must be a json blob, ignored unless warcprox features are "
"enabled"
),
)
arg_parser.add_argument("-q", "--quiet", dest="quiet", action="store_true")
arg_parser.add_argument("-v", "--verbose", dest="verbose", action="store_true")
args = arg_parser.parse_args(args=argv[1:])
options = []
if args.time_limit:
options.append("--time-limit=%s" % args.time_limit)
if args.ignore_robots:
options.append("--ignore-robots")
if args.warcprox_meta:
# I think this shell escaping is correct?
options.append("--warcprox-meta=%s" % quote(args.warcprox_meta))
if args.quiet:
options.append("--quiet")
if args.verbose:
options.append("--verbose")
# cd to path with Vagrantfile so "vagrant ssh" knows what to do
os.chdir(os.path.dirname(__file__))
cmd = (
"/opt/brozzler-ve3/bin/python /opt/brozzler-ve3/bin/brozzler-new-site " "%s %s"
) % (" ".join(options), args.seed)
subprocess.call(["vagrant", "ssh", "--", cmd])
if __name__ == "__main__":
main(sys.argv)