#1113 take into consideration street prequals like Old in rating and filtering, also added btree varops on fullname (trigrams and fulltext proved too slow will need to reinvestigate those), also added helper function install_missing_indezes() which gets run as part of upgrade script. will need to add to documentation. Also added regress tests from snippets in #1113

git-svn-id: http://svn.osgeo.org/postgis/trunk@7634 b70326c6-7e19-0410-871a-916f4a2858ee
This commit is contained in:
Regina Obe 2011-07-14 07:45:31 +00:00
parent 2ad776d498
commit 98d817ade6
7 changed files with 117 additions and 21 deletions

View file

@ -38,6 +38,14 @@ That will keep the window open for you to see the error.
--To generate a bash script suitable for Unix command lines
SELECT loader_generate_script(ARRAY['DC','RI'], 'sh');
-- Next run the script to install any missing indexes --
SELECT install_missing_indexes();
-- Alternatively if you want to see what indexes will be created before you create them
-- run the below and manually run the steps generated
SELECT missing_indexes_generate_script();
9. Copy and paste the generated script into a .bat or .sh file and put in gisdata folder you created and then run it.
10. Test out the geocoder run this query
@ -50,4 +58,7 @@ FROM geocode('1731 New Hampshire Avenue Northwest, Washington, DC 20010') As g;
Steps to upgrade your install:
If you need to upgrade the geocoder/tiger loader from a pre-release 2.0.0 install -- run the upgrade_geocoder.sh or upgrade_geocoder.bat script.
CAUTION: The upgrade script will drop any table columns that have a norm_addy type for a column type. This is rare if ever done so you should be fine.
We plan to fix this later. It will also drop any customizations you have made to the tiger_loader configuration tables.
We plan to fix this later. It will also drop any customizations you have made to the tiger_loader configuration tables. To prevent this, you can
remark out the install loader part. This we plan to remedy in the future.
It will also install any missing indexes that are deemed needed by queries.

View file

@ -94,9 +94,10 @@ BEGIN
GROUP BY statefp,location,zip,exact, pref ORDER BY exact desc, pref, zip **/
FOR zip_info IN EXECUTE var_sql USING parsed.location, parsed.zip LOOP
-- For zip distance metric we consider both the distance of zip based on numeric as well aa levenshtein
stmt := 'SELECT DISTINCT ON (sub.predirabrv,sub.name,sub.suftypabrv,sub.sufdirabrv,coalesce(p.name,zip.city,cs.name,co.name),s.stusps,sub.zip)'
-- We use the prequalabr (these are like Old, that may or may not appear in front of the street name)
stmt := 'SELECT DISTINCT ON (sub.predirabrv,sub.fename,sub.suftypabrv,sub.sufdirabrv,coalesce(p.name,zip.city,cs.name,co.name),s.stusps,sub.zip)'
|| ' sub.predirabrv as fedirp,'
|| ' sub.name as fename,'
|| ' sub.fename,'
|| ' sub.suftypabrv as fetype,'
|| ' sub.sufdirabrv as fedirs,'
|| ' coalesce(p.name,zip.city,cs.name,co.name)::varchar as place,'
@ -111,10 +112,10 @@ BEGIN
|| ' as sub_rating,'
|| ' sub.exact_address as exact_address'
|| ' FROM ('
|| ' SELECT tlid, predirabrv, name, suftypabrv, sufdirabrv, fromhn, tohn, side, statefp, zip, rate_attributes($5, a.predirabrv,'
|| ' $2, a.name, $4,'
|| ' SELECT tlid, predirabrv, COALESCE(a.prequalabr || '' '','''' ) || a.name As fename, suftypabrv, sufdirabrv, fromhn, tohn, side, statefp, zip, rate_attributes($5, a.predirabrv,'
|| ' $2, a.name , $4,'
|| ' a.suftypabrv, $6,'
|| ' a.sufdirabrv) + '
|| ' a.sufdirabrv, a.prequalabr) + '
|| ' CASE '
|| ' WHEN $1::integer IS NULL OR b.fromhn IS NULL THEN 20'
|| ' WHEN $1::integer >= least_hn(b.fromhn, b.tohn) '
@ -133,14 +134,14 @@ BEGIN
|| ' as sub_rating,$1::integer >= least_hn(b.fromhn,b.tohn) '
|| ' AND $1::integer <= greatest_hn(b.fromhn,b.tohn) '
|| ' AND ($1 % 2)::numeric::integer = (to_number(b.fromhn,''99999999'') % 2)'
|| ' as exact_address'
|| ' as exact_address, a.name, a.prequalabr'
|| ' FROM featnames a join addr b using (tlid,statefp)'
|| ' WHERE'
|| ' statefp = ' || quote_literal(zip_info.statefp) || ''
|| coalesce(' AND b.zip IN (''' || array_to_string(zip_info.zip,''',''') || ''') ','')
|| CASE WHEN zip_info.exact
THEN ' AND (lower($2) = lower(a.name) OR numeric_streets_equal($2, a.name) ) '
ELSE ' AND (soundex($2) = soundex(a.name) OR numeric_streets_equal($2, a.name) ) '
THEN ' AND ( lower($2) = lower(a.name) OR ( a.prequalabr > '''' AND trim(lower($2), lower(a.prequalabr) || '' '') = lower(a.name) ) OR numeric_streets_equal($2, a.name) ) '
ELSE ' AND ( (soundex($2) = soundex(a.name) ) OR ( (length($2) > 10 or a.prequal IS NOT NULL) AND lower(a.fullname) LIKE lower($2) || ''%'' ) OR numeric_streets_equal($2, a.name) ) '
END
|| ' ORDER BY 11'
|| ' LIMIT 20'
@ -160,6 +161,9 @@ BEGIN
;
IF var_debug THEN
RAISE NOTICE '%', stmt;
RAISE NOTICE 'PREPARE query_base_geo(integer, varchar,varchar,varchar,varchar,varchar,varchar,geometry) As %', stmt;
RAISE NOTICE 'EXECUTE query_base_geo(%,%,%,%,%,%,%,%); ', parsed.address,quote_nullable(parsed.streetName), quote_nullable(parsed.location), quote_nullable(parsed.streetTypeAbbrev), quote_nullable(parsed.preDirAbbrev), quote_nullable(parsed.postDirAbbrev), quote_nullable(parsed.zip), quote_nullable(var_restrict_geom::text);
RAISE NOTICE 'DEALLOCATE query_base_geo;';
END IF;
-- If we got an exact street match then when we hit the non-exact
-- set of tests, just drop out.
@ -193,6 +197,11 @@ BEGIN
GEOMOUT := results.address_geom;
RATING := results.sub_rating;
var_n := var_n + 1;
-- If our ratings go above 99 exit because its a really bad match
IF RATING > 99 THEN
RETURN;
END IF;
RETURN NEXT;

View file

@ -83,9 +83,10 @@ FROM (SELECT table_name, table_schema FROM
ON (t.table_name = c.table_name AND t.table_schema = c.table_schema)
LEFT JOIN pg_catalog.pg_indexes i ON
(i.tablename = c.table_name AND i.schemaname = c.table_schema
AND indexdef LIKE '%soundex(%' || c.column_name || '%')
AND indexdef LIKE '%soundex(%' || c.column_name || '%' AND indexdef LIKE '%_snd_' || c.column_name || '%' )
WHERE i.tablename IS NULL AND c.table_schema IN('tiger','tiger_data')
AND (c.table_name LIKE '%county%' OR c.table_name LIKE '%featnames' OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%')
AND (c.table_name LIKE '%county%' OR c.table_name LIKE '%featnames'
OR c.table_name LIKE '%place' or c.table_name LIKE '%zip%')
-- Lower indexes --
UNION ALL
SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_lower_' || c.column_name || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(lower(' || c.column_name || '))' As index
@ -109,6 +110,52 @@ FROM (SELECT table_name, table_schema FROM
(i.tablename = c.table_name AND i.schemaname = c.table_schema
AND indexdef LIKE '%least_hn(%' || c.column_name || '%')
WHERE i.tablename IS NULL
-- var_ops fullname --
UNION ALL
SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_l' || c.column_name || '_var_ops' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING btree(lower(' || c.column_name || ') varchar_pattern_ops);' As index
FROM (SELECT table_name, table_schema FROM
information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%featnames' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN
(SELECT * FROM information_schema.columns WHERE column_name IN('fullname') ) AS c
ON (t.table_name = c.table_name AND t.table_schema = c.table_schema)
LEFT JOIN pg_catalog.pg_indexes i ON
(i.tablename = c.table_name AND i.schemaname = c.table_schema
AND indexdef LIKE '%btree%(' || c.column_name || '%varchar_pattern_ops%')
WHERE i.tablename IS NULL
--full text indexes on name field--
/**UNION ALL
SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_fullname_ft_gist' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING gist(to_tsvector(''english'',fullname))' As index
FROM (SELECT table_name, table_schema FROM
information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%featnames' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN
(SELECT * FROM information_schema.columns WHERE column_name IN('fullname') ) AS c
ON (t.table_name = c.table_name AND t.table_schema = c.table_schema)
LEFT JOIN pg_catalog.pg_indexes i ON
(i.tablename = c.table_name AND i.schemaname = c.table_schema
AND indexdef LIKE '%to_tsvector(%' || c.column_name || '%')
WHERE i.tablename IS NULL **/
-- trigram index --
/**UNION ALL
SELECT 'CREATE INDEX idx_' || c.table_schema || '_' || c.table_name || '_' || c.column_name || '_trgm_gist' || ' ON ' || c.table_schema || '.' || c.table_name || ' USING gist(' || c.column_name || ' gist_trgm_ops);' As index
FROM (SELECT table_name, table_schema FROM
information_schema.tables WHERE table_type = 'BASE TABLE' AND table_name LIKE '%featnames' AND table_schema IN('tiger','tiger_data')) As t INNER JOIN
(SELECT * FROM information_schema.columns WHERE column_name IN('fullname', 'name') ) AS c
ON (t.table_name = c.table_name AND t.table_schema = c.table_schema)
LEFT JOIN pg_catalog.pg_indexes i ON
(i.tablename = c.table_name AND i.schemaname = c.table_schema
AND indexdef LIKE '%gist%(' || c.column_name || '%gist_trgm_ops%')
WHERE i.tablename IS NULL **/
ORDER BY 1), E';\r');
$$
LANGUAGE sql VOLATILE;
LANGUAGE sql VOLATILE;
CREATE OR REPLACE FUNCTION install_missing_indexes() RETURNS boolean
AS
$$
DECLARE var_sql text = missing_indexes_generate_script();
BEGIN
EXECUTE(var_sql);
RETURN true;
END
$$
language plpgsql;

View file

@ -7,7 +7,7 @@
-- changed: 2010-10-18 Regina Obe - all references to verbose to var_verbose since causes compile errors in 9.0
-- changed: 2011-06-25 revise to use real named args and fix direction rating typo
CREATE OR REPLACE FUNCTION rate_attributes(dirpA VARCHAR, dirpB VARCHAR, streetNameA VARCHAR, streetNameB VARCHAR,
streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR, locationA VARCHAR, locationB VARCHAR) RETURNS INTEGER
streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR, locationA VARCHAR, locationB VARCHAR, prequalabr VARCHAR) RETURNS INTEGER
AS $_$
DECLARE
--$Id$
@ -23,7 +23,7 @@ BEGIN
END IF;
RETURN NULL;
END IF;
result := result + rate_attributes($1, $2, $3, $4, $5, $6, $7, $8);
result := result + rate_attributes($1, $2, streetNameA, streetNameB, $5, $6, $7, $8,prequalabr);
RETURN result;
END;
$_$ LANGUAGE plpgsql IMMUTABLE;
@ -34,22 +34,30 @@ $_$ LANGUAGE plpgsql IMMUTABLE;
-- required. If any others are null (either A or B) they are treated as
-- empty strings.
CREATE OR REPLACE FUNCTION rate_attributes(dirpA VARCHAR, dirpB VARCHAR, streetNameA VARCHAR, streetNameB VARCHAR,
streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR) RETURNS INTEGER
streetTypeA VARCHAR, streetTypeB VARCHAR, dirsA VARCHAR, dirsB VARCHAR, prequalabr VARCHAR) RETURNS INTEGER
AS $_$
DECLARE
result INTEGER := 0;
directionWeight INTEGER := 2;
nameWeight INTEGER := 10;
typeWeight INTEGER := 5;
var_verbose BOOLEAN := FALSE;
var_verbose BOOLEAN := false;
BEGIN
result := result + levenshtein_ignore_case(cull_null($1), cull_null($2)) *
directionWeight;
result := result + levenshtein_ignore_case(cull_null($1), cull_null($2)) * directionWeight;
IF var_verbose THEN
RAISE NOTICE 'streetNameA: %, streetNameB: %', streetNameA, streetNameB;
END IF;
IF streetNameA IS NOT NULL AND streetNameB IS NOT NULL THEN
-- We want to treat numeric streets that have numerics as equal
-- and not penalize if they are spelled different e.g. have ND instead of TH
IF NOT numeric_streets_equal(streetNameA, streetNameB) THEN
result := result + levenshtein_ignore_case($3, $4) * nameWeight;
IF prequalabr IS NOT NULL THEN
-- If the reference address (streetNameB) has a prequalabr streetNameA (prequalabr) - note: streetNameB usually comes thru without prequalabr
-- and the input street (streetNameA) is lacking the prequal -- only penalize a little
result := (result + levenshtein_ignore_case( trim( trim( lower(streetNameA),lower(prequalabr) ) ), trim( trim( lower(streetNameB),lower(prequalabr) ) ) )*nameWeight*0.75 + levenshtein_ignore_case(trim(streetNameA),prequalabr || ' ' || streetNameB) * nameWeight*0.25)::integer;
ELSE
result := result + levenshtein_ignore_case(streetNameA, streetNameB) * nameWeight;
END IF;
END IF;
ELSE
IF var_verbose THEN

View file

@ -96,7 +96,7 @@ T16|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|39
#1074a|Cottagewood Ter NE, Spring Lake Park, MN 55432|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.24464 45.1237)|33
#1074a|Cottage Wood Ln, Fifty Lakes, MN 56448|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.07085 46.75406)|34
#1074a|Cottage Wood, Fifty Lakes, MN 56448|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-94.07085 46.75406)|34
#1074a|Cottagewood Ave, Deephaven, MN 55331|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.5329 44.92732)|43
#1074a|Cottagewood Ave, Deephaven, MN 55331|8525 COTTAGE WOOD TERR, Blaine, MN 55434|POINT(-93.53206 44.92783)|43
#1074b|8525 Cottagewood Ter NE, Blaine, MN 55434|8525 COTTAGEWOOD TERR, Blaine, MN 55434|POINT(-93.24462 45.12481)|4
#1070a|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8
#1070b|100 Federal St, Lynn, MA 01905|POINT(-70.96783 42.4659)|8
@ -105,3 +105,10 @@ T16|Federal Ln, Dedham, MA 02026|POINT(-71.18356 42.23829)|39
#1112c|8401 W 35W Svc Rd NE, Blaine, MN 55449|8401 35W West, Blaine, MN 55449|POINT(-93.19084 45.12386)|36
#1112d|8401 W 35W Svc Rd NE, Blaine, MN 55449|8401 West 35W, Blaine, MN 55449|POINT(-93.19084 45.12386)|34
#1112e|8401 W 35W Svc Rd NE, Blaine, MN 55449|8401 W 35W, Blaine, MN 55449|POINT(-93.19084 45.12386)|34
#1113a|8040 Old Cedar Ave S, Bloomington, MN 55425|8040 OLD CEDAR AVE S, BLOOMINGTON, MN 55425|POINT(-93.24792 44.85708)|0
#1113b|8040 Old Cedar Ave S, Bloomington, MN 55425|8040 CEDAR AVE S, BLOOMINGTON, MN 55425|POINT(-93.24792 44.85708)|10
#1113c|17405 Old Rockford Rd, Plymouth, MN 55446|17405 Old Rockford Rd, Plymouth, MN 55446|POINT(-93.5012 45.0345)|0
#1113d|Rockford Rd, Plymouth, MN 55446|17405 Rockford Rd, Plymouth, MN 55446|POINT(-93.47977 45.02701)|5
#1113e|198 Old Constance Blvd NW, Andover, MN 55304|198 OLD CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.27027 45.26203)|4
#1113f|198 Constance Blvd NW, Andover, MN 55304|198 CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.26839 45.26229)|4
#1113f|198 Constance Blvd NE, Ham Lake, MN 55304|198 CONSTANCE BLVD, ANDOVER, MN 55304|POINT(-93.26114 45.2657)|11

View file

@ -68,4 +68,12 @@ SELECT '#1112b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(S
SELECT '#1112c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 35W West, Blaine, MN 55449'::text As target) As f) As foo;
SELECT '#1112d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 West 35W, Blaine, MN 55449'::text As target) As f) As foo;
SELECT '#1112e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8401 W 35W, Blaine, MN 55449'::text As target) As f) As foo;
-- working with prequalabrv such as Old .. something or other
SELECT '#1113a' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8040 OLD CEDAR AVE S, BLOOMINGTON, MN 55425'::text As target) As f) As foo;
SELECT '#1113b' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '8040 CEDAR AVE S, BLOOMINGTON, MN 55425'::text As target) As f) As foo;
SELECT '#1113c' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '17405 Old Rockford Rd, Plymouth, MN 55446'::text As target) As f) As foo;
SELECT '#1113d' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '17405 Rockford Rd, Plymouth, MN 55446'::text As target) As f) As foo;
SELECT '#1113e' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '198 OLD CONSTANCE BLVD, ANDOVER, MN 55304'::text As target) As f) As foo;
SELECT '#1113f' As ticket, pprint_addy((g).addy) As address, target, ST_AsText(ST_SnapToGrid((g).geomout, 0.00001)) As pt, (g).rating FROM (SELECT geocode(target,2) As g, target FROM (SELECT '198 CONSTANCE BLVD, ANDOVER, MN 55304'::text As target) As f) As foo;
\timing

View file

@ -57,6 +57,8 @@ CREATE TABLE zcta5
ALTER TABLE street_type_lookup ALTER COLUMN abbrev TYPE varchar(50);
ALTER TABLE street_type_lookup ALTER COLUMN name TYPE varchar(50);
ALTER TABLE street_type_lookup ADD COLUMN is_hw boolean NOT NULL DEFAULT false;
DROP FUNCTION IF EXISTS rate_attributes(character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying);
DROP FUNCTION IF EXISTS rate_attributes(character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying);
BEGIN;
-- Type used to pass around a normalized address between functions
@ -191,4 +193,8 @@ SELECT name, abbrev, false
-- Reverse Geocode API, called by user
\i geocode/reverse_geocode.sql
COMMIT;
COMMIT;
-- install missing indexes
\echo 'Installing missing indexes - this might take a while so be patient ..'
SELECT install_missing_indexes();
\echo 'Missing index Install completed'