Skip to content

Commit

Permalink
wip: Provide a job to scrape OSM boundaries (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
claustres committed Dec 18, 2023
1 parent 289ed73 commit 35e2356
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ node_modules

# local data
output
osm-boundaries
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ notifications:
on_failure: always

env:
- JOB=osm-boundaries
- JOB=admin-express
- JOB=bdpr

Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# k-atlas

A Krawler based service to scrape various data related to administrative entities
Krawler based jobs to scrape various data related to administrative entities.

## OSM boundaries

This job relies on [osmium](https://osmcode.org/osmium-tool/) to extract administrative boundaries at different level from OSM pbf files.

## Admin-Express

Expand Down
88 changes: 88 additions & 0 deletions jobfile-osm-boundaries.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import _ from 'lodash'
import path from 'path'
import { fileURLToPath } from 'url'
import { hooks } from '@kalisio/krawler'

const __dirname = path.dirname(fileURLToPath(import.meta.url))
const storePath = process.env.STORE_PATH || 'data/OSM'

const files = ['https://download.geofabrik.de/europe/albania-latest.osm.pbf']

let generateTasks = (options) => {
return async (hook) => {
let tasks = []
files.forEach(file => {
const id = `osm-boundaries/${path.basename(file)}`
let task = {
id,
key: id.replace('-latest.osm.pbf', ''),
type: 'http',
options: {
url: file
}
}
console.log('Creating task for ' + file)
tasks.push(task)
})
hook.data.tasks = tasks
return hook
}
}
hooks.registerHook('generateTasks', generateTasks)

export default {
id: 'osm-boundaries',
store: 'fs',
options: {
workersLimit: 1
},
taskTemplate: {
store: 'fs'
},
hooks: {
tasks: {
after: {
extractAdministrative: {
hook: 'runCommand',
command: `osmium tags-filter <%= id %> /boundary=administrative -t --overwrite --output <%= key %>-administrative.pbf`
},
/*copyToStore: {
input: { key: '<%= key %>.geojson', store: 'fs' },
output: { key: `${storePath}/<%= key %>.geojson`, store: 's3',
params: { ContentType: 'application/geo+json' }
}
},*/
clearData: {}
}
},
jobs: {
before: {
createStores: [{
id: 'fs',
options: {
path: path.join(__dirname)
},
},
{
id: 's3',
type: 's3',
options: {
client: {
accessKeyId: process.env.S3_ACCESS_KEY,
secretAccessKey: process.env.S3_SECRET_ACCESS_KEY,
endpoint: process.env.S3_ENDPOINT
},
bucket: process.env.S3_BUCKET
}
}],
generateTasks: {}
},
after: {
removeStores: ['fs', 's3']
},
error: {
removeStores: ['fs', 's3']
}
}
}
}
21 changes: 12 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
"description": "",
"version": "1.0.1",
"homepage": "https://github.com/kalisio/k-atlas",
"type": "module",
"keywords": [
"krawler",
"kargo",
"admin-express"
"admin-express",
"bdpr",
"osm"
],
"license": "MIT",
"repository": {
Expand All @@ -23,17 +26,17 @@
},
"scripts": {
"test": "echo \"Warning: no test specified\"",
"changelog": "changelog -x build,ci,chore,docs,other,perf,refactor,revert,style,test",
"release:major": "npm run changelog -- -M && git add CHANGELOG.md && git commit -m \"chore: updated CHANGELOG.md\" && npm version --force major",
"release:minor": "npm run changelog -- -m && git add CHANGELOG.md && git commit -m \"chore: Updated CHANGELOG.md\" && npm version --force minor",
"release:patch": "npm run changelog -- -p && git add CHANGELOG.md && git commit -m \"chore: Updated CHANGELOG.md\" && npm version --force patch",
"postversion": "git push origin --tags"
"release:major": "npm version --force major",
"release:minor": "npm version --force minor",
"release:patch": "npm version --force patch",
"postversion": "git push origin HEAD --tags"
},
"dependencies": {
"lodash": "^4.17.15"
},
"dependencies": {},
"devDependencies": {
"generate-changelog": "^1.8.0"
},
"peerDependencies": {
"@kalisio/krawler": "1.0.0"
"@kalisio/krawler": "2.5.0"
}
}
8 changes: 8 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1


lodash@^4.17.15:
version "4.17.21"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==

0 comments on commit 35e2356

Please sign in to comment.