mirror of
https://git.osgeo.org/gitea/postgis/postgis
synced 2024-10-23 16:42:35 +00:00
c6091a4bba
git-svn-id: http://svn.osgeo.org/postgis/trunk@12860 b70326c6-7e19-0410-871a-916f4a2858ee
735 lines
6.2 KiB
Perl
735 lines
6.2 KiB
Perl
#!/usr/bin/perl -w
|
|
use strict;
|
|
use Regexp::Assemble;
|
|
|
|
# TODO
|
|
# Add prefix types like:
|
|
# Ave(nue)? of( the)? names
|
|
# Ave(nue)? (d'|du|de)(la)?\s?names
|
|
# Ave(nue|nida)? \w{1,2}
|
|
# calle names
|
|
# suffix of( the)? names
|
|
# route (\d+([a-z]|bus(iness)?)(by(pass))?
|
|
# business (\d+([a-z]|bus(iness)?)(by(pass))?
|
|
# (interstate|I-) \d+\s*[nsew]?
|
|
#
|
|
# Add better number recognizer
|
|
# dir num dir num dir
|
|
# dir num letter
|
|
# num? fraction 123 1/2
|
|
#
|
|
# Add patterns to recognize intersections
|
|
# street & street, city, state
|
|
#
|
|
#
|
|
# Probably the following were removed as they are PREFIX TYPES
|
|
# RTE, ROUTE
|
|
# CALLE
|
|
#
|
|
# and maybe RUE
|
|
# RUE can be either: RUE d'la whatever; Charles Rue
|
|
#
|
|
# Many of the SUFFIX TYPES can be used in a prefix contexted like:
|
|
# AVENUE of the Americas
|
|
#
|
|
|
|
|
|
#my @cities = split(/[\r\n]+/, qx(cat usps-city-names.txt));
|
|
|
|
# ==============================
|
|
|
|
|
|
my @stwords = qw(
|
|
ALLEE
|
|
ALLEY
|
|
ALLY
|
|
ALY
|
|
ANEX
|
|
ANNEX
|
|
ANNX
|
|
ANX
|
|
ARC
|
|
ARCADE
|
|
AV
|
|
AVE
|
|
AVEN
|
|
AVENU
|
|
AVENUE
|
|
AVN
|
|
AVNUE
|
|
BAYOO
|
|
BAYOU
|
|
BCH
|
|
BEACH
|
|
BEND
|
|
BG
|
|
BGS
|
|
BLF
|
|
BLFS
|
|
BLUF
|
|
BLUFF
|
|
BLUFFS
|
|
BLVD
|
|
BND
|
|
BOT
|
|
BOTTM
|
|
BOTTOM
|
|
BOUL
|
|
BOULEVARD
|
|
BOULV
|
|
BR
|
|
BRANCH
|
|
BRDGE
|
|
BRG
|
|
BRIDGE
|
|
BRK
|
|
BRKS
|
|
BRNCH
|
|
BROOK
|
|
BROOKS
|
|
BTM
|
|
BURG
|
|
BURGS
|
|
BYP
|
|
BYPA
|
|
BYPAS
|
|
BYPASS
|
|
BYPS
|
|
BYU
|
|
CAMP
|
|
CANYN
|
|
CANYON
|
|
CAPE
|
|
CAUSEWAY
|
|
CAUSWAY
|
|
CEN
|
|
CENT
|
|
CENTER
|
|
CENTERS
|
|
CENTR
|
|
CENTRE
|
|
CIR
|
|
CIRC
|
|
CIRCL
|
|
CIRCLE
|
|
CIRCLES
|
|
CIRS
|
|
CK
|
|
CLB
|
|
CLF
|
|
CLFS
|
|
CLIFF
|
|
CLIFFS
|
|
CLUB
|
|
CMN
|
|
CMP
|
|
CNTER
|
|
CNTR
|
|
CNYN
|
|
COMMON
|
|
COR
|
|
CORNER
|
|
CORNERS
|
|
CORS
|
|
COURSE
|
|
COURT
|
|
COURTS
|
|
COVE
|
|
COVES
|
|
CP
|
|
CPE
|
|
CR
|
|
CRCL
|
|
CRCLE
|
|
CRECENT
|
|
CREEK
|
|
CRES
|
|
CRESCENT
|
|
CRESENT
|
|
CREST
|
|
CRK
|
|
CROSSING
|
|
CROSSROAD
|
|
CRSCNT
|
|
CRSE
|
|
CRSENT
|
|
CRSNT
|
|
CRSSING
|
|
CRSSNG
|
|
CRST
|
|
CRT
|
|
CSWY
|
|
CT
|
|
CTR
|
|
CTRS
|
|
CTS
|
|
CURV
|
|
CURVE
|
|
CV
|
|
CVS
|
|
CYN
|
|
DALE
|
|
DAM
|
|
DIV
|
|
DIVIDE
|
|
DL
|
|
DM
|
|
DR
|
|
DRIV
|
|
DRIVE
|
|
DRIVES
|
|
DRS
|
|
DRV
|
|
DV
|
|
DVD
|
|
EST
|
|
ESTATE
|
|
ESTATES
|
|
ESTS
|
|
EXP
|
|
EXPR
|
|
EXPRESS
|
|
EXPRESSWAY
|
|
EXPW
|
|
EXPY
|
|
EXT
|
|
EXTENSION
|
|
EXTENSIONS
|
|
EXTN
|
|
EXTNSN
|
|
EXTS
|
|
FALL
|
|
FALLS
|
|
FERRY
|
|
FIELD
|
|
FIELDS
|
|
FLAT
|
|
FLATS
|
|
FLD
|
|
FLDS
|
|
FLS
|
|
FLT
|
|
FLTS
|
|
FORD
|
|
FORDS
|
|
FOREST
|
|
FORESTS
|
|
FORG
|
|
FORGE
|
|
FORGES
|
|
FORK
|
|
FORKS
|
|
FORT
|
|
FRD
|
|
FRDS
|
|
FREEWAY
|
|
FREEWY
|
|
FRG
|
|
FRGS
|
|
FRK
|
|
FRKS
|
|
FRRY
|
|
FRST
|
|
FRT
|
|
FRWAY
|
|
FRWY
|
|
FRY
|
|
FT
|
|
FWY
|
|
GARDEN
|
|
GARDENS
|
|
GARDN
|
|
GATEWAY
|
|
GATEWY
|
|
GATWAY
|
|
GDN
|
|
GDNS
|
|
GLEN
|
|
GLENS
|
|
GLN
|
|
GLNS
|
|
GRDEN
|
|
GRDN
|
|
GRDNS
|
|
GREEN
|
|
GREENS
|
|
GRN
|
|
GRNS
|
|
GROV
|
|
GROVE
|
|
GROVES
|
|
GRV
|
|
GRVS
|
|
GTWAY
|
|
GTWY
|
|
HARB
|
|
HARBOR
|
|
HARBORS
|
|
HARBR
|
|
HAVEN
|
|
HAVN
|
|
HBR
|
|
HBRS
|
|
HEIGHT
|
|
HEIGHTS
|
|
HGTS
|
|
HIGHWAY
|
|
HIGHWY
|
|
HILL
|
|
HILLS
|
|
HIWAY
|
|
HIWY
|
|
HL
|
|
HLLW
|
|
HLS
|
|
HOLLOW
|
|
HOLLOWS
|
|
HOLW
|
|
HOLWS
|
|
HRBOR
|
|
HT
|
|
HTS
|
|
HVN
|
|
HWAY
|
|
HWY
|
|
INLET
|
|
INLT
|
|
IS
|
|
ISLAND
|
|
ISLANDS
|
|
ISLE
|
|
ISLES
|
|
ISLND
|
|
ISLNDS
|
|
ISS
|
|
JCT
|
|
JCTION
|
|
JCTN
|
|
JCTNS
|
|
JCTS
|
|
JUNCTION
|
|
JUNCTIONS
|
|
JUNCTN
|
|
JUNCTON
|
|
KEY
|
|
KEYS
|
|
KNL
|
|
KNLS
|
|
KNOL
|
|
KNOLL
|
|
KNOLLS
|
|
KY
|
|
KYS
|
|
LA
|
|
LAKE
|
|
LAKES
|
|
LAND
|
|
LANDING
|
|
LANE
|
|
LANES
|
|
LCK
|
|
LCKS
|
|
LDG
|
|
LDGE
|
|
LF
|
|
LGT
|
|
LGTS
|
|
LIGHT
|
|
LIGHTS
|
|
LINE
|
|
LK
|
|
LKS
|
|
LN
|
|
LNDG
|
|
LNDNG
|
|
LOAF
|
|
LOCK
|
|
LOCKS
|
|
LODG
|
|
LODGE
|
|
LOOP
|
|
LOOPS
|
|
MALL
|
|
MANOR
|
|
MANORS
|
|
MDW
|
|
MDWS
|
|
MEADOW
|
|
MEADOWS
|
|
MEDOWS
|
|
MEWS
|
|
MILL
|
|
MILLS
|
|
MISSION
|
|
MISSN
|
|
ML
|
|
MLS
|
|
MNR
|
|
MNRS
|
|
MNT
|
|
MNTAIN
|
|
MNTN
|
|
MNTNS
|
|
MOTORWAY
|
|
MOUNT
|
|
MOUNTAIN
|
|
MOUNTAINS
|
|
MOUNTIN
|
|
MSN
|
|
MSSN
|
|
MT
|
|
MTIN
|
|
MTN
|
|
MTNS
|
|
MTWY
|
|
NCK
|
|
NECK
|
|
OPAS
|
|
ORCH
|
|
ORCHARD
|
|
ORCHRD
|
|
OVAL
|
|
OVERPASS
|
|
OVL
|
|
PARK
|
|
PARKS
|
|
PARKWAY
|
|
PARKWAYS
|
|
PARKWY
|
|
PASS
|
|
PASSAGE
|
|
PATH
|
|
PATHS
|
|
PIKE
|
|
PIKES
|
|
PINE
|
|
PINES
|
|
PK
|
|
PKWAY
|
|
PKWY
|
|
PKWYS
|
|
PKY
|
|
PL
|
|
PLACE
|
|
PLAIN
|
|
PLAINES
|
|
PLAINS
|
|
PLAZA
|
|
PLN
|
|
PLNS
|
|
PLZ
|
|
PLZA
|
|
PNE
|
|
PNES
|
|
POINT
|
|
POINTS
|
|
PORT
|
|
PORTS
|
|
PR
|
|
PRAIRIE
|
|
PRARIE
|
|
PRK
|
|
PRR
|
|
PRT
|
|
PRTS
|
|
PSGE
|
|
PT
|
|
PTS
|
|
RAD
|
|
RADIAL
|
|
RADIEL
|
|
RADL
|
|
RAMP
|
|
RANCH
|
|
RANCHES
|
|
RAPID
|
|
RAPIDS
|
|
RD
|
|
RDG
|
|
RDGE
|
|
RDGS
|
|
RDS
|
|
REST
|
|
RIDGE
|
|
RIDGES
|
|
RIV
|
|
RIVER
|
|
RIVR
|
|
RNCH
|
|
RNCHS
|
|
ROAD
|
|
ROADS
|
|
ROW
|
|
RPD
|
|
RPDS
|
|
RST
|
|
RUE
|
|
RUN
|
|
RVR
|
|
SHL
|
|
SHLS
|
|
SHOAL
|
|
SHOALS
|
|
SHOAR
|
|
SHOARS
|
|
SHORE
|
|
SHORES
|
|
SHR
|
|
SHRS
|
|
SKWY
|
|
SKYWAY
|
|
SMT
|
|
SPG
|
|
SPGS
|
|
SPNG
|
|
SPNGS
|
|
SPRING
|
|
SPRINGS
|
|
SPRNG
|
|
SPRNGS
|
|
SPUR
|
|
SPURS
|
|
SQ
|
|
SQR
|
|
SQRE
|
|
SQRS
|
|
SQS
|
|
SQU
|
|
SQUARE
|
|
SQUARES
|
|
ST
|
|
STA
|
|
STATION
|
|
STATN
|
|
STN
|
|
STR
|
|
STRA
|
|
STRAV
|
|
STRAVE
|
|
STRAVEN
|
|
STRAVENUE
|
|
STRAVN
|
|
STREAM
|
|
STREET
|
|
STREETS
|
|
STREME
|
|
STRM
|
|
STRT
|
|
STRVN
|
|
STRVNUE
|
|
STS
|
|
SUMIT
|
|
SUMITT
|
|
SUMMIT
|
|
TER
|
|
TERR
|
|
TERRACE
|
|
THROUGHWAY
|
|
TPK
|
|
TPKE
|
|
TR
|
|
TRACE
|
|
TRACES
|
|
TRACK
|
|
TRACKS
|
|
TRAFFICWAY
|
|
TRAIL
|
|
TRAILS
|
|
TRAK
|
|
TRCE
|
|
TRFY
|
|
TRK
|
|
TRKS
|
|
TRL
|
|
TRLS
|
|
TRNPK
|
|
TRPK
|
|
TRWY
|
|
TUNEL
|
|
TUNL
|
|
TUNLS
|
|
TUNNEL
|
|
TUNNELS
|
|
TUNNL
|
|
TURNPIKE
|
|
TURNPK
|
|
UN
|
|
UNDERPASS
|
|
UNION
|
|
UNIONS
|
|
UNS
|
|
UPAS
|
|
VALLEY
|
|
VALLEYS
|
|
VALLY
|
|
VDCT
|
|
VIA
|
|
VIADCT
|
|
VIADUCT
|
|
VIEW
|
|
VIEWS
|
|
VILL
|
|
VILLAG
|
|
VILLAGE
|
|
VILLAGES
|
|
VILLE
|
|
VILLG
|
|
VILLIAGE
|
|
VIS
|
|
VIST
|
|
VISTA
|
|
VL
|
|
VLG
|
|
VLGS
|
|
VLLY
|
|
VLY
|
|
VLYS
|
|
VST
|
|
VSTA
|
|
VW
|
|
VWS
|
|
WALK
|
|
WALKS
|
|
WALL
|
|
WAY
|
|
WAYS
|
|
WELL
|
|
WELLS
|
|
WL
|
|
WLS
|
|
WY
|
|
XING
|
|
XRD
|
|
);
|
|
# ==============================
|
|
|
|
my @secwords = qw(
|
|
APARTMENT
|
|
APT
|
|
BASEMENT
|
|
BLDG
|
|
BSMT
|
|
BUILDING
|
|
DEPARTMENT
|
|
DEPT
|
|
FL
|
|
FLOOR
|
|
FRNT
|
|
FRONT
|
|
HANGAR
|
|
HNGR
|
|
LBBY
|
|
LOBBY
|
|
LOT
|
|
LOWER
|
|
LOWR
|
|
OFC
|
|
OFFICE
|
|
PENTHOUSE
|
|
PH
|
|
PIER
|
|
REAR
|
|
RM
|
|
ROOM
|
|
SIDE
|
|
SLIP
|
|
SPACE
|
|
SPC
|
|
STE
|
|
STOP
|
|
SUITE
|
|
TRAILER
|
|
TRLR
|
|
UNIT
|
|
UPPER
|
|
UPPR
|
|
);
|
|
|
|
my @dirs = qw(
|
|
NORTH N NORD
|
|
SOUTH S SUD
|
|
EAST E EST
|
|
WEST W OEST O
|
|
NORTHEAST NE
|
|
NORTHWEST NW
|
|
SOUTHEAST SE
|
|
SOUTHWEST SW
|
|
NORTH-EAST N-E
|
|
NORTH-WEST N-W
|
|
SOUTH-EAST S-E
|
|
SOUTH-WEST S-W
|
|
);
|
|
|
|
my @saints = (
|
|
"st",
|
|
"st.",
|
|
"ste",
|
|
"ste.",
|
|
"saint",
|
|
);
|
|
|
|
my $re;
|
|
my $l = Regexp::Assemble->new(flags => "i");
|
|
#$re = $l->set(modifiers=>'i')->list2re(@cities);
|
|
#$re =~ s/\\/\\\\/g;
|
|
#my $cities = $re;
|
|
|
|
#print " static const char *cities = \n";
|
|
#while ($re =~ s/^(.{1,75})//) {
|
|
# print " \"$1\"\n";
|
|
#}
|
|
#print " ;\n";
|
|
|
|
|
|
$l->add(@stwords);
|
|
$re = $l->re;
|
|
$re =~ s/\\/\\\\/g;
|
|
$re =~ s/\?\^/?-xism/g;
|
|
my $sttype = $re;
|
|
#print " static const char *sttype = \"$re\";\n\n";
|
|
|
|
$l->add(@secwords);
|
|
$re = $l->re;
|
|
$re =~ s/\\/\\\\/g;
|
|
$re =~ s/\?\^/?-xism/g;
|
|
my $unittype = $re;
|
|
#print " static const char *unittype = \"$re\";\n\n";
|
|
|
|
$l->add(@dirs);
|
|
$re = $l->re;
|
|
$re =~ s/\\/\\\\/g;
|
|
$re =~ s/\?\^/?-xism/g;
|
|
my $dirs = $re;
|
|
#print " static const char *dirtype = \"$re\";\n\n";
|
|
|
|
$l->add(@saints);
|
|
$re = $l->re;
|
|
$re =~ s/\\/\\\\/g;
|
|
$re =~ s/\?\^/?-xism/g;
|
|
my $saint = $re;
|
|
#print " static const char *saints = \"$re\";\n\n";
|
|
|
|
my $word = "\\\\w+";
|
|
my $words = "($word(\\\\s$word)*)";
|
|
|
|
my @reg = ();
|
|
#push @reg, "(?:,\\\\s*)([^,]+)\$";
|
|
#push @reg, "\\\\b($cities)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s(?:$dirs\\\\s))($dirs\\\\s$words)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s(?:$dirs\\\\s))($dirs\\\\s$saint\\\\s$words)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s)($dirs\\\\s$saint\\\\s$words)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s)($saint\\\\s$words)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s)($dirs\\\\s$words)\$";
|
|
push @reg, "(?:\\\\b$sttype\\\\s)($words)\$";
|
|
push @reg, "(?:\\\\s)($dirs\\\\s$words)\$";
|
|
push @reg, "^(?:\\\\d+\\\\s(?:(?:\\\\w+\\\\s)$sttype))()\$";
|
|
push @reg, "^(?:\\\\d+\\\\s(?:(?:\\\\w+\\\\s)*\\\\w+\\\\s))($word)\$";
|
|
|
|
my $nn = scalar @reg;
|
|
print " const int nreg = $nn;\n";
|
|
print " static const char *t_regx[$nn] = {\n \"";
|
|
print join("\",\n \"", @reg);
|
|
print "\"\n };\n";
|
|
|
|
|