Skip to content

More title patterns (#163) #151

More title patterns (#163)

More title patterns (#163) #151

Workflow file for this run

name: e2etest
on: [push]
jobs:
geocoder:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
- name: Expand geocache
run: |
tar -xzf geocache.tgz
- name: Run geocoder
run: |
PYTHONPATH=. poetry run oldnyc/geocode/geocode.py --ids_filter test/random200-ids.txt --images_ndjson data/images.ndjson --output_format id-location.txt --geocode > >(tee test/random200-geocoded.txt) 2> >(tee test/random200.logs.txt >&2)
# See https://stackoverflow.com/a/692407/388951 for the stdout/stderr redirection
- name: Generate truth data
run: |
export PYTHONPATH=.
poetry run oldnyc/geocode/geocode.py --images_ndjson data/images.ndjson --output_format geojson --ids_filter data/geocode/random500-ids.txt --geocode > /tmp/images.geojson
poetry run oldnyc/geocode/truth/make_localturk_csv.py data/geocode/random500-ids.txt /tmp/images.geojson data/geocode/random500.csv
# We don't actually care about the diff on this file, just that make_localturk_csv.py doesn't error out.
git checkout data/geocode/random500.csv
poetry run oldnyc/geocode/truth/generate_truth_gtjson.py > data/geocode/truth.geojson
jq -r '.features[].id' data/geocode/truth.geojson > data/geocode/truth-ids.txt
poetry run python oldnyc/geocode/subjects/csv_to_geojson.py data/subjects/out.csv > data/subjects.geojson
- name: Check for diffs
run: |
git diff --exit-code test/ data/
cropper:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
- name: Run cropper
run: |
poetry run oldnyc/crop/crop_to_text.py --beta 2 --overwrite test/721675b.jpg
poetry run oldnyc/crop/crop_to_text.py --beta 2 --overwrite --border_only --output_pattern '%s.border.jpg' test/721675b.jpg
- name: Run photo detector
run: |
poetry run oldnyc/crop/find_pictures.py test/*f.jpg > test/detected-photos.ndjson
- name: Check for diffs
run: |
git diff --exit-code test/
generation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
- name: Clone web site
run: |
cd ..
git clone https://github.com/oldnyc/oldnyc.github.io.git
- name: Run ingestion
run: |
PYTHONPATH=. poetry run oldnyc/ingest/ingest.py
- name: Check for diffs
run: |
git diff --exit-code data/
- name: Split records to photos
run: |
export PYTHONPATH=.
poetry run oldnyc/crop/records_to_photos.py data/images.ndjson data/crops.ndjson data/photos.ndjson
# TODO: remove the OCR shootout after the dust settles, it only really needs to be work once.
- name: Run OCR shootout
run: |
PYTHONPATH=. poetry run oldnyc/ocr/ocr_shootout.py
- name: Check for diffs
run: |
git diff --exit-code data/
- name: Generate geocodes
run: |
export PYTHONPATH=.
tar -xzf geocache.tgz
poetry run oldnyc/geocode/geocode.py --images_ndjson data/photos.ndjson --lat_lon_map data/lat-lon-map.txt --output_format lat-lon-to-ids.json --geocode > data/lat-lon-to-ids.json
- name: Generate static site
run: |
export PYTHONPATH=.
echo '{"fixes": {}}' > data/feedback/fixes.json
poetry run oldnyc/site/generate_static_site.py --leave-timestamps-unchanged
- name: Check for diffs
run: |
cd ../oldnyc.github.io
git diff --exit-code