Skip to content

Commit

Permalink
chore: added toponyms creation for osm-boundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
cnouguier committed Jan 10, 2024
1 parent 7a6208e commit 4b791c2
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 5 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ Krawler based jobs to scrape various data related to administrative entities.

This job relies on [osmium](https://osmcode.org/osmium-tool/) to extract administrative boundaries at different level from OSM pbf files.

> [!IMPORTANT]
> [osmimum](https://osmcode.org/osmium-tool/) must be installed on your system.
To setup the regions to process, you must export the environment variables `REGIONS` with the [GeoFabrik](https://download.geofabrik.de/) regions. For instance:

```bash
export REGIONS="europe/france;europe/albania"
```

## Admin-Express

This job relies on archive shape files from IGN and the [mapshaper](https://github.com/mbloch/mapshaper) and [7z](https://www.7-zip.org/download.html) tools.
Expand Down
33 changes: 28 additions & 5 deletions jobfile-osm-boundaries.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import _ from 'lodash'
import path from 'path'
import { fileURLToPath } from 'url'
import centroid from '@turf/centroid'
import { hooks } from '@kalisio/krawler'

const __dirname = path.dirname(fileURLToPath(import.meta.url))
const storePath = process.env.STORE_PATH || 'data/OSM'
const dbUrl = process.env.DB_URL || 'mongodb://localhost:27017/atlas'

const files = ['https://download.geofabrik.de/europe-latest.osm.pbf']
const baseUrl = 'https://download.geofabrik.de'
const regions = process.env.REGIONS || 'europe/france;europe/albania'
const fabrikSuffix = '-latest.osm.pbf'
const minLevel = process.env.MIN_LEVEL || 2
const maxLevel = process.env.MAX_LEVEL || 8
const collection = 'osm-boundaries'
Expand All @@ -16,8 +19,8 @@ let generateTasks = (options) => {
return async (hook) => {
let tasks = []
for (let level = minLevel; level <= maxLevel; level++) {
files.forEach(file => {
const basename = path.basename(file).replace('-latest.osm.pbf', '')
_.forEach(regions.split(';'), region => {
const basename = path.basename(region)
const id = `osm-boundaries/${basename}.pbf`
const key = `osm-boundaries/${level}/${basename}`
const dir = path.dirname(key)
Expand All @@ -31,10 +34,10 @@ let generateTasks = (options) => {
// Skip download if file already exists
overwrite: false,
options: {
url: file
url: `${baseUrl}/${region}${fabrikSuffix}`
}
}
console.log(`Creating task for ${task.key} at level ${level}`)
console.log(`<i> creating task for ${task.key} at level ${level} [${task.options.url}]`)
tasks.push(task)
})
}
Expand Down Expand Up @@ -79,6 +82,26 @@ export default {
readJson: {
key: `<%= key %>.geojson`
},
generateToponyms: {
hook: 'apply',
function: (item) => {
let toponyms = []
_.forEach(item.data.features, feature => {
const toponym = centroid(feature.geometry)
toponym.properties = feature.properties
toponyms.push(toponym)
})
item.toponyms = {
type: 'FeatureCollection',
features: toponyms
}
}
},
writeToponyms: {
hook: 'writeJson',
dataPath: 'data.toponyms',
key: `<%= key %>-toponyms.geojson`
},
writeMongoCollection: {
chunkSize: 256,
collection,
Expand Down
2 changes: 2 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ signal-exit@^4.0.1:
integrity sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==

"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0:
name string-width-cjs
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
Expand All @@ -218,6 +219,7 @@ string-width@^5.0.1, string-width@^5.1.2:
strip-ansi "^7.0.1"

"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
name strip-ansi-cjs
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
Expand Down

0 comments on commit 4b791c2

Please sign in to comment.