Skip to content

Commit

Permalink
Download missing ways, nodes for entities near the borders
Browse files Browse the repository at this point in the history
  • Loading branch information
daohoangson committed Sep 17, 2023
1 parent 644a91e commit 8c50e09
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 25 deletions.
113 changes: 95 additions & 18 deletions downloader/03_osm.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,16 @@ import (
"fmt"
"github.com/paulmach/orb"
"github.com/paulmach/osm"
"github.com/paulmach/osm/osmapi"
"github.com/paulmach/osm/osmpbf"
"io"
"math"
"net/http"
"os"
"path"
"runtime"
"slices"
"strconv"
)

func main() {
Expand All @@ -38,35 +42,99 @@ func main() {
}
}

func buildRelationCoordinates(relation *osm.Relation) (*orb.MultiPolygon, error) {
func buildRelationCoordinates(relation *osm.Relation, shouldUseApi bool) (*orb.MultiPolygon, error) {
var lines []orb.LineString
var wayIds []osm.WayID
for _, member := range relation.Members {
if member.Type == osm.TypeWay && member.Role == "outer" {
if way, ok := pbf.ways[osm.WayID(member.Ref)]; ok {
wayId := osm.WayID(member.Ref)
if way, ok := pbf.ways[wayId]; ok {
line := make(orb.LineString, len(way.Nodes))
for i, wayNode := range way.Nodes {
if wayNodePoint, ok := pbf.points[wayNode.ID]; ok {
line[i] = wayNodePoint
} else {
return nil, fmt.Errorf("way.id=%d: nodePoints[%d] does not exist", way.ID, wayNode.ID)
// if the way exists in the data dump, its nodes must be
panic(fmt.Errorf("relation.id=%d: way.id=%d points[%d] does not exist", relation.ID, way.ID, wayNode.ID))
}
}
lines = append(lines, line)
wayIds = append(wayIds, way.ID)
} else {
if !shouldUseApi {
return nil, fmt.Errorf("relation.id=%d: ways[%d] does not exist", relation.ID, wayId)
}

line, apiError := buildRelationCoordinatesByWayId(relation, wayId)
if apiError != nil {
return nil, fmt.Errorf("relation.id=%d: way.id=%d %w", relation.ID, wayId, apiError)
}
lines = append(lines, *line)
wayIds = append(wayIds, wayId)
}
}
}
return (&multiPolygonBuilder{}).loop(lines, wayIds)
}

func getParentsAndSelf(relation *osm.Relation) (result []*osm.Relation) {
if parentId, ok := pbf.parentIds[int64(relation.ID)]; ok {
if parent, ok := pbf.relations[parentId]; ok {
result = append(result, getParentsAndSelf(parent)...)
func buildRelationCoordinatesByWayId(relation *osm.Relation, wayId osm.WayID) (*orb.LineString, error) {
ctx := context.Background()
fmt.Printf("Downloading way#%d for relation#%d...\n", wayId, relation.ID)
way, wayError := osmapi.Way(ctx, wayId)
if wayError != nil {
return nil, fmt.Errorf("osmapi.Way(%d): %w", wayId, wayError)
}

nodeIds := make([]osm.NodeID, len(way.Nodes))
for i, wayNode := range way.Nodes {
nodeIds[i] = wayNode.ID
}
line := make(orb.LineString, len(nodeIds))
lineError := buildRelationCoordinatesByNodeIds(ctx, line, nodeIds, 0)
if lineError != nil {
return nil, lineError
}
return &line, nil
}

func buildRelationCoordinatesByNodeIds(ctx context.Context, line orb.LineString, nodeIds []osm.NodeID, offset int) error {
// maximum URI length is about 2k, node ids are 10 characters in average
// that means we can fit about 200 ids per request, let's buffer a bit to be safe
const maxNodeIds = 150
if len(nodeIds) > maxNodeIds {
firstError := buildRelationCoordinatesByNodeIds(ctx, line, nodeIds[:maxNodeIds], offset)
if firstError != nil {
return firstError
}
secondError := buildRelationCoordinatesByNodeIds(ctx, line, nodeIds[maxNodeIds:], offset+maxNodeIds)
if secondError != nil {
return secondError
}
return nil
}
return append(result, relation)

nodes, nodesError := osmapi.Nodes(ctx, nodeIds)
if nodesError != nil {
return fmt.Errorf("osmapi.Nodes(%v): %w", nodeIds, nodesError)
}

for i, nodeId := range nodeIds {
// loop twice because the API doesn't return nodes in the requested order
for _, node := range nodes {
if node.ID == nodeId {
line[offset+i] = node.Point()
}
}
}

return nil
}

func getParentsAndSelf(relationId osm.RelationID) (result []string) {
if parentId, ok := pbf.parentIds[int64(relationId)]; ok {
result = append(result, getParentsAndSelf(parentId)...)
}
return append(result, strconv.FormatInt(int64(relationId), 10))
}

func getTagValue(tags osm.Tags, key string) string {
Expand All @@ -91,29 +159,38 @@ func pointsApproxEquals(a orb.Point, b orb.Point) bool {
}

func writeRelation(dir string, relation *osm.Relation) error {
coordinates, buildError := buildRelationCoordinates(relation)
ids := getParentsAndSelf(relation.ID)
outputPath := fmt.Sprintf("%s.json", path.Join(dir, path.Join(ids...)))
_, statError := os.Stat(outputPath)
if statError == nil {
// file already exists
return nil
}

vietnamId := "49915"
isPartOfVietnam := slices.Index(ids, vietnamId) > -1
isVietnam := ids[len(ids)-1] == vietnamId
shouldUseApi := isPartOfVietnam && !isVietnam // do not fetch the country's coordinates, it's huge
coordinates, buildError := buildRelationCoordinates(relation, shouldUseApi)
if buildError != nil {
return buildError
}

var parent string
if len(ids) > 2 {
parent = ids[len(ids)-2]
}

bbox := coordinates.Bound()
output := map[string]interface{}{
"bbox": []float64{bbox.Left(), bbox.Bottom(), bbox.Right(), bbox.Top()},
"coordinates": coordinates,
"id": relation.ID,
"parent": "",
"parent": parent,
"tags": relation.Tags,
"type": coordinates.GeoJSONType(),
}

outputPath := dir
for _, r := range getParentsAndSelf(relation) {
outputPath = path.Join(outputPath, fmt.Sprintf("%d", r.ID))
if r.ID != relation.ID {
output["parent"] = r.ID
}
}
outputPath = fmt.Sprintf("%s.json", outputPath)
_ = os.MkdirAll(path.Dir(outputPath), 0755)

outputBytes, _ := json.Marshal(output)
Expand Down
16 changes: 9 additions & 7 deletions transformers/osm/split.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,12 @@ function main()
}

// get all json files in this directory and sub
$paths = glob("$inDir/{*,*/*,*/*/*}.json", GLOB_BRACE);
$pathCount = 0;
foreach ($paths as $path) {
foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator(realpath($inDir))) as $fileInfo) {
$pathCount++;
$path = $fileInfo->getPathName();
if (substr($path, -5) !== '.json') continue;
if (basename($path) === $workingFileName) continue;
if (isset($workingWrittenPaths[$path])) {
statisticsTrack($outDir, $workingWrittenPaths[$path]);
fwrite(STDOUT, 'w'); // already written
continue;
}

$item = json_decode(file_get_contents($path), true);
$item['path'] = $path;
Expand All @@ -61,6 +57,12 @@ function main()
fwrite(STDOUT, sprintf("Paths: %d -> items: %d\n", $pathCount, count($array)));

foreach ($array as $item) {
if (isset($workingWrittenPaths[$item['path']])) {
statisticsTrack($outDir, $workingWrittenPaths[$item['path']]);
fwrite(STDOUT, 'w'); // already written
continue;
}

$fullName = getFullName($item);
if ((substr_count($fullName, ',') + 1) !== $item['level']) {
fwrite(STDERR, sprintf("%s: bad name\n", $item['path']));
Expand Down

0 comments on commit 8c50e09

Please sign in to comment.