From 721cf47b5ed8712520f469e1a25142fce7a09ef4 Mon Sep 17 00:00:00 2001 From: Regina Obe Date: Wed, 29 Jun 2011 05:06:06 +0000 Subject: [PATCH] fix for #1073 handling mangled zipcodes and update regress to include these git-svn-id: http://svn.osgeo.org/postgis/trunk@7517 b70326c6-7e19-0410-871a-916f4a2858ee --- .../tiger_2010/normalize/normalize_address.sql | 16 +++++++++++----- .../tiger_2010/regress/geocode_regress | 13 +++++++++++++ .../tiger_2010/regress/geocode_regress.sql | 4 ++++ .../tiger_2010/regress/normalize_address_regress | 10 ++++++++-- .../regress/normalize_address_regress.sql | 10 ++++++++-- 5 files changed, 44 insertions(+), 9 deletions(-) diff --git a/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql b/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql index c8582e5da..8b3185563 100644 --- a/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql +++ b/extras/tiger_geocoder/tiger_2010/normalize/normalize_address.sql @@ -89,18 +89,24 @@ BEGIN -- Assume that the address begins with a digit, and extract it from -- the input string. - addressString := substring(rawInput from '^([0-9].*?)[ ,/.]'); + addressString := substring(rawInput from E'^([0-9].*?)[ ,/.]'); IF debug_flag THEN raise notice '% addressString: %', clock_timestamp(), addressString; END IF; - -- There are two formats for zip code, the normal 5 digit, and + -- There are two formats for zip code, the normal 5 digit , and -- the nine digit zip-4. It may also not exist. - zipString := substring(rawInput from ws || '([0-9]{5})$'); + + zipString := substring(rawInput from ws || E'([0-9]{5})$'); IF zipString IS NULL THEN - zipString := substring(rawInput from ws || '([0-9]{5})-[0-9]{4}$'); - -- Check if all we got was a zipcode, of either form + -- Check if the zip is just a partial or a one with -s + -- or one that just has more than 5 digits + zipString := COALESCE(substring(rawInput from ws || '([0-9]{5})-[0-9]{0,4}$'), + substring(rawInput from ws || '([0-9]{2,5})$'), + substring(rawInput from ws || '([0-9]{6,14})$')); + + -- Check if all we got was a zipcode, of either form IF zipString IS NULL THEN zipString := substring(rawInput from '^([0-9]{5})$'); IF zipString IS NULL THEN diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress index 7d34b89d0..de46774c8 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress @@ -38,3 +38,16 @@ Federal Ct, Boston, MA 02110 | POINT(-71.0567205 42.354134) | 24 Federal Ln, Dedham, MA 02026 | POINT(-71.183565 42.238295) | 33 + 212 3rd Ave N, Minneapolis, MN 55401 | POINT(-93.2718114 44.9850188) | 1 + 212 3rd Ave S, Minneapolis, MN 55404 | POINT(-93.2635498396584 44.9806201397408) | 3 + 212 3rd Ave S, Minneapolis, MN 55401 | POINT(-93.2633388442551 44.9808723431463) | 3 + 212 3rd Ave NE, Minneapolis, MN 55413 | POINT(-93.2592025 44.990786) | 4 + 212 3rd Ave S, Minneapolis, MN 55415 | POINT(-93.2641725306122 44.9798755102041) | 4 + 212 3rd Ave SE, Minneapolis, MN 55414 | POINT(-93.2526539591837 44.9848287755102) | 5 + 212 3rd Ave N, Biwabik, MN 55708 | POINT(-92.3450754489796 47.5343256938775) | 8 + 212 3rd Ave N, Lakefield, MN 56150 | POINT(-95.1733443061224 43.6785488163265) | 9 + 212 3rd Ave N, Bayport, MN 55003 | POINT(-92.7789324000695 45.019825623949) | 9 + 212 3rd Ave N, Long Prairie, MN 56347 | POINT(-94.8618907833944 45.977553389256) | 10 + + Plymouth, MN 55340 | POINT(-93.5757500937089 45.076212213583) | 100 + diff --git a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql index c4622cf3e..0f17d2246 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/geocode_regress.sql @@ -25,4 +25,8 @@ SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geoco --Test misspellings and missing zip -- SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',1); SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('101 Fedaral Street, Boston, MA',50); + +--Geocoding mangled zipcodes +SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 553404'); +SELECT pprint_addy(addy) As address, ST_AsText(geomout) As pt, rating FROM geocode('212 3rd Ave N, MINNEAPOLIS, MN 55340-'); \timing \ No newline at end of file diff --git a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress index 488bbe220..9b406a8c3 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress +++ b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress @@ -2,7 +2,13 @@ 529 | | Main | St | | | Boston | MA | 02129 | t - 529 | | Main | St | | | Boston | MA | 02129 | t + 529 | | Main | St | | | Boston,MA | | 02129 | t - 529 | | Main | St | | | Boston MA | | | t + 529 | | Main | St | | | Boston | MA | | t + + 529 | | Main | St | | | Boston | MA | 021 | t + + 212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 553404 | t + + 212 | | 3rd | Ave | N | | MINNEAPOLIS | MN | 55401 | t diff --git a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql index ff8a1412d..8381090c2 100644 --- a/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql +++ b/extras/tiger_geocoder/tiger_2010/regress/normalize_address_regress.sql @@ -3,7 +3,13 @@ SELECT * FROM normalize_address('3937 43RD AVE S, MINNEAPOLIS, MN 55406'); -- comma in wrong spot SELECT * FROM normalize_address('529 Main Street, Boston MA, 02129'); -- comma in right spot -SELECT * FROM normalize_address('529 Main Street, Boston MA, 02129'); +SELECT * FROM normalize_address('529 Main Street, Boston,MA 02129'); -- partial address -SELECT * FROM normalize_address('529 Main Street, Boston MA'); +SELECT * FROM normalize_address('529 Main Street, Boston, MA'); +-- partial address +SELECT * FROM normalize_address('529 Main Street, Boston, MA 021'); + +-- Mangled zipcodes +SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 553404'); +SELECT * FROM normalize_address('212 3rd Ave N, MINNEAPOLIS, MN 55401-'); \timing \ No newline at end of file