-
-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathanalyze.js
179 lines (143 loc) · 6.08 KB
/
analyze.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
// constants for controlling how we parse ranges, eg: 'α-β'
// some ranges such as '1-7' are ambiguous; it could mean 'apt 7, no 1'; or
// it could mean 'apt 1, no 7'; or could even be a valid range 'one to seven'.
// note: these values provide a means of setting some sane defaults for which
// ranges we try to parse and which ones we leave.
var MIN_RANGE = 1; // the miniumum amount β is higher than α
var MAX_RANGE = 6; // the maximum amount β is higher than α
var MIN_RANGE_HOUSENUMBER = 10; // the minimum acceptible value for both α and β
/*
* Return the appropriate version of node-postal
*/
var _nodepostal_module;
function get_libpostal() {
// lazy load this dependency; since it's large (~2GB RAM) and may be
// accidentally required by a process which doesn't use it.
if (!_nodepostal_module) {
// load the mock library if MOCK_LIBPOSTAL env var is set
if (process.env.MOCK_LIBPOSTAL) {
_nodepostal_module = require('../test/lib/mock_libpostal');
// otherwise load the real thing
} else {
_nodepostal_module = require('node-postal');
}
}
return _nodepostal_module;
}
/**
analyze input streetname string and return a list of expansions.
**/
function street( streetName ){
const postal = get_libpostal();
// use libpostal to expand the address
var expansions = postal.expand.expand_address( streetName );
// remove ordinals
expansions = expansions.map(function( item ){
return item.replace( /(([0-9]+)(st|nd|rd|th)($|\s))/gi, '$2 ' ).trim();
});
// remove duplicates
expansions = expansions.filter(function(item, pos, self) {
return self.indexOf(item) === pos;
});
return expansions;
}
/**
analyze input housenumber string and return a float representing it's value.
**/
function housenumber( num ){
// num should be a string; if not, cast it to a string
if( 'number' === typeof num ){ num = num.toString(10); }
// still not a valid string?
if( 'string' !== typeof num ){ return NaN; }
// normalize string diacritics
// https://stackoverflow.com/a/37511463
var number = num.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
// replace fractions, eg: '1/2' with british style character suffices.
number = number.replace(' 1/4', '¼')
.replace(' 1/2', '½')
.replace(' 3/4', '¾');
// remove common english labels
number = number.replace(/\s+(apartment|apt|lot|space|ste|suite|unit)\s+/gi, '');
// remove common mandarin labels
// see: https://eastasiastudent.net/china/mandarin/postal-address/
number = number.replace(/(号|號|室|宅|楼)/g, '');
// only use the first part of a comma delimited string such as '27, 2º, 4ª'
number = number.split(',')[0].trim();
// do not merge adjacent numerals delimted by whitespace
// see: https://github.com/pelias/interpolation/issues/199
if (/[0-9]\s+[0-9]/.test(number) ){ number = number.split(/\s+/)[0]; }
// remove spaces from housenumber. eg: '2 A' -> '2A'
number = number.replace(/\s+/g, '').toLowerCase();
// remove forward slash, minus or hash, but only if between a number and a letter.
// eg: '2/a' or '2-a' -> '2a'
if( number.match(/^[0-9]+(\/|-|#)[a-z]$/) ){ number = number.replace(/\/|-|#/g, ''); }
// remove forward slash when apartment number is null, eg: '9/-' -> '9'
else if( number.match(/^[0-9]+\/-$/) ){ number = number.replace(/\/|-/g, ''); }
// replace decimal half with unicode half, eg: '9.5' -> '9½'
else if( number.match(/^[0-9]+\.5$/) ){ number = number.replace('.5', '½'); }
else {
// split the components to attempt more advanced parsing
var split = number.match(/^([0-9]+)([\/|-])([0-9]+)$/);
if( split ){
var house = split[1], delim = split[2], apt = split[3];
// if the housenumber and apartment number are the same we can safely use either.
// eg: '1/1' -> '1' or '31/31' -> '31'
if( house === apt ){ number = house; }
// handle small ranges, eg: '72-74' -> '73'
else if( delim === '-' ){
var start = parseInt( house, 10 ), end = parseInt( apt, 10 ), diff = end - start;
// don't parse single digit ranges, things like '1-4' are ambiguous
if( start < MIN_RANGE_HOUSENUMBER || end < MIN_RANGE_HOUSENUMBER ){ return NaN; }
// ensure the range is within acceptible limits
if( diff <= MAX_RANGE && diff > MIN_RANGE ){
number = '' + Math.floor( start + ( diff / 2 ) );
}
}
}
}
// a format we don't currently support
// @see: https://github.com/pelias/interpolation/issues/16
if( !number.match(/^[0-9]+([a-z]|¼|½|¾)?$/) ){ return NaN; }
// @note: removes letters such as '2a' -> 2
var float = parseFloat( number );
// zero house number
if( float <= 0 ){ return NaN; }
// if the house number is followed by a single letter [a-z] then we
// add a fraction to the house number representing the offset.
// eg: 1a -> 1.1
var apartment = number.match(/^[0-9]+([a-z]|¼|½|¾)$/);
if( apartment ){
switch( apartment[1] ){
case '¼': float += 0.25; break;
case '½': float += 0.5; break;
case '¾': float += 0.74; break;
default:
var offset = apartment[1].charCodeAt(0) - 96; // gives a:1, b:2 etc..
float += ( offset * 0.03 ); // add fraction to housenumber for apt;
}
}
return parseFloat( float.toFixed(2) ); // because floating point arithmetic
}
/**
take the float housenumber produced by the function above and convert it back to
an alphanumeric value.
eg. 1.1 -> 1a
**/
function housenumberFloatToString( f ){
if( 'number' !== typeof f ){ return ''; }
var suffix = '';
var fractional = Math.floor(( f % 1 ) * 100.1 /* add 0.1 to avoid floating point errors */ );
if( fractional > 0 && fractional <= 78 ){
switch( fractional ){
case 25: suffix = '¼'; break;
case 50: suffix = '½'; break;
case 74: suffix = '¾'; break;
default:
suffix = String.fromCharCode( 96 + Math.round( fractional / 3 ));
}
}
return '' + Math.floor( f ) + suffix;
}
module.exports.street = street;
module.exports.housenumber = housenumber;
module.exports.housenumberFloatToString = housenumberFloatToString;