Partial fix for #1068 -- handling of misspelled numeric streets such as 13nd or 22th and partial support for numeric streets with fractions like 1/2

git-svn-id: http://svn.osgeo.org/postgis/trunk@7509 b70326c6-7e19-0410-871a-916f4a2858ee
This commit is contained in:
Regina Obe 2011-06-28 03:55:52 +00:00
parent 7c0e4df9df
commit 2e42e2ac36
3 changed files with 23 additions and 4 deletions

View file

@ -93,8 +93,8 @@ BEGIN
|| ' statefp = ' || quote_literal(zip_info.statefp) || ''
|| coalesce(' AND b.zip IN (''' || array_to_string(zip_info.zip,''',''') || ''') ','')
|| CASE WHEN zip_info.exact
THEN ' AND lower(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = lower(a.name)'
ELSE ' AND soundex(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = soundex(a.name)'
THEN ' AND (lower(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = lower(a.name) OR numeric_streets_equal(' || coalesce(quote_literal(parsed.streetName), 'NULL') || ', a.name) ) '
ELSE ' AND (soundex(' || coalesce(quote_literal(parsed.streetName),'NULL') || ') = soundex(a.name) OR numeric_streets_equal(' || coalesce(quote_literal(parsed.streetName), 'NULL') || ', a.name) ) '
END
|| ' ORDER BY 11'
|| ' LIMIT 20'

View file

@ -20,8 +20,23 @@ CREATE OR REPLACE FUNCTION greatest_hn(fromhn varchar, tohn varchar)
$$ SELECT greatest(to_number( CASE WHEN trim($1) ~ '^[0-9]+$' THEN $1 ELSE '0' END,'99999999'),to_number(CASE WHEN trim($2) ~ '^[0-9]+$' THEN $2 ELSE '0' END,'99999999') )::integer; $$
LANGUAGE sql IMMUTABLE
COST 5;
-- function return true or false if 2 numeric streets are equal such as 15th St, 23rd st
-- it compares just the numeric part of the street for equality
-- PURPOSE: handle bad formats such as 23th St so 23th St = 23rd St
-- as described in: http://trac.osgeo.org/postgis/ticket/1068
-- This will always return false if one of the streets is not a numeric street
-- By numeric it must start with numbers (allow fractions such as 1/2 and spaces such as 12 1/2th) and be less than 10 characters
CREATE OR REPLACE FUNCTION numeric_streets_equal(input_street varchar, output_street varchar)
RETURNS boolean AS
$$
SELECT COALESCE(length($1) < 10 AND length($2) < 10
AND $1 ~ E'^[0-9\/\s]+' AND $2 ~ E'^[0-9\/\s]+'
AND trim(substring($1, E'^[0-9\/\s]+')) = trim(substring($2, E'^[0-9\/\s]+')), false);
$$
LANGUAGE sql IMMUTABLE
COST 5;
-- Generate script to create missing indexes in tiger tables.
-- This will generate sql you can run to index commonly used join columns in geocoder for tiger and tiger_data schemas --
CREATE OR REPLACE FUNCTION missing_indexes_generate_script()

View file

@ -46,7 +46,11 @@ BEGIN
result := result + levenshtein_ignore_case(cull_null($1), cull_null($2)) *
directionWeight;
IF streetNameA IS NOT NULL AND streetNameB IS NOT NULL THEN
result := result + levenshtein_ignore_case($3, $4) * nameWeight;
-- We want to treat numeric streets that have numerics as equal
-- and not penalize if they are spelled different e.g. have ND instead of TH
IF NOT numeric_streets_equal(streetNameA, streetNameB) THEN
result := result + levenshtein_ignore_case($3, $4) * nameWeight;
END IF;
ELSE
IF var_verbose THEN
RAISE NOTICE 'rate_attributes() - Street names cannot be null!';