-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMakefile
124 lines (90 loc) · 3.16 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
DATA_HOME = ./data
# --------------------------------------
# WORKFLOWS
# --------------------------------------
.PHONY: deploy
deploy: setup pulls3 pulls3-urls seed regen crawl pushs3 zip deploy-flyio
# --------------------------------------
# SETUP
# --------------------------------------
.PHONY: setup
QUIET := -q
setup:
pip install pip -Uq
pip install poetry==1.3.0 $(QUIET)
poetry install $(QUIET)
# --------------------------------------
# ARTICLE CRAWLING
# --------------------------------------
.PHONY: crawl
crawl:
cat newspapers.json | jq '.[].name' | xargs -n 1 -I {} scrapy crawl {} -o $(DATA_HOME)/articles/{}.jsonl -L DEBUG
# --------------------------------------
# WEB APP
# --------------------------------------
.PHONY: app zip
PORT=8004
app: setup
uvicorn climatedb.app:app --reload --port $(PORT) --host 0.0.0.0 --proxy-headers
zip:
cd $(DATA_HOME); zip -r ./climate-news-db-dataset.zip ./* -x "./html/*" -x "./opinions/*"
deploy-flyio:
flyctl deploy --wait-timeout 360
# --------------------------------------
# DATABASE
# --------------------------------------
.PHONY: seed regen
seed:
mkdir -p $(DATA_HOME)/articles
python scripts/seed.py
regen: seed
python scripts/regen_database.py
# --------------------------------------
# S3
# --------------------------------------
.PHONY: pulls3 pulls3-urls pushs3
S3_BUCKET=$(shell aws cloudformation describe-stacks --stack-name ClimateNewsDB --region ap-southeast-2 --query 'Stacks[0].Outputs[?OutputKey==`UnversionedBucket`].OutputValue' --output text)
S3_DIR=s3://$(S3_BUCKET)
VERISONED_S3_BUCKET=$(shell aws cloudformation describe-stacks --stack-name ClimateNewsDB --region ap-southeast-2 --query 'Stacks[0].Outputs[?OutputKey==`VersionedBucket`].OutputValue' --output text)
VERISONED_S3_DIR=s3://$(VERISONED_S3_BUCKET)
pulls3:
aws --region ap-southeast-2 s3 sync $(S3_DIR) $(DATA_HOME) --exclude 'html/*'
pulls3-urls:
echo "$(shell wc -l $(DATA_HOME)/urls.jsonl) urls"
aws --region ap-southeast-2 s3 cp $(VERISONED_S3_DIR)/urls.jsonl $(DATA_HOME)/urls.jsonl
echo "$$(wc -l $(DATA_HOME)/urls.jsonl) urls"
pushs3:
aws s3 sync $(DATA_HOME) $(S3_DIR)
# --------------------------------------
# AWS INFRA
# --------------------------------------
.PHONY: run-search-lambdas infra
infra: setup
cd infra && npx --yes [email protected] deploy -vv --all
# --------------------------------------
# CHECK
# --------------------------------------
.PHONY: check static
check: setup
ruff check climatedb infra scripts tests
static: setup
mypy climatedb
mypy tests
# --------------------------------------
# TEST
# --------------------------------------
.PHONY: test test-ci
test: setup
pytest tests -x --lf -s
test-ci: setup
coverage run -m pytest tests --showlocals --full-trace --tb=short --show-capture=no -v -s
coverage report -m
# --------------------------------------
# DEV
# --------------------------------------
gpt:
python ./climatedb/gpt.py
run-search-lambdas:
python scripts/run-search-lambdas.py
crawl-one:
scrapy crawl $(PAPER) -L DEBUG -o $(DATA_HOME)/articles/$(PAPER).jsonl