significantly improve speed of reverse geocode (when all states loaded), but using CTEs and unparameterizing query (parameterized often does not use inheritance), reverse_geocode to use default parameters, put in table column comments to describe some of what loader columns mean.

git-svn-id: http://svn.osgeo.org/postgis/trunk@7377 b70326c6-7e19-0410-871a-916f4a2858ee
This commit is contained in:
Regina Obe 2011-06-12 04:52:03 +00:00
parent b2ca472ea1
commit d6fd367962
3 changed files with 77 additions and 38 deletions

View file

@ -7,24 +7,20 @@
-- and array of cross streets, as well as interpolated points along the streets
-- Use case example an address at the intersection of 3 streets: SELECT pprint_addy(r.addy[1]) As st1, pprint_addy(r.addy[2]) As st2, pprint_addy(r.addy[3]) As st3, array_to_string(r.street, ',') FROM reverse_geocode(ST_GeomFromText('POINT(-71.057811 42.358274)',4269)) As r;
--set search_path=tiger,public;
CREATE OR REPLACE FUNCTION reverse_geocode(
IN pt geometry,
IN include_strnum_range boolean,
OUT intpt geometry[],
OUT addy NORM_ADDY[],
OUT street varchar[]
) RETURNS RECORD
AS $_$
CREATE OR REPLACE FUNCTION reverse_geocode(IN pt geometry, IN include_strnum_range boolean, OUT intpt geometry[], OUT addy norm_addy[], OUT street character varying[])
RETURNS record AS
$$
DECLARE
var_redge RECORD;
var_states text[];
var_countyfp text[];
var_state text := NULL;
var_countyfp text := NULL;
var_addy NORM_ADDY;
var_strnum varchar;
var_nstrnum numeric(10);
var_primary_line geometry := NULL;
var_primary_dist numeric(10,2) ;
var_pt geometry;
var_stmt text;
var_debug boolean = false;
BEGIN
IF pt IS NULL THEN
@ -33,26 +29,34 @@ BEGIN
IF ST_SRID(pt) = 4269 THEN
var_pt := pt;
ELSIF ST_SRID(pt) > 0 THEN
var_pt := ST_Transform(pt, 4269);
var_pt := ST_Transform(pt, 4269);
ELSE --If srid is unknown, assume its 4269
var_pt := ST_SetSRID(pt, 4269);
END IF;
var_pt := ST_SnapToGrid(var_pt, 0.00005); /** Get rid of floating point junk that would prevent intersections **/
END IF;
-- Determine state tables to check
-- this is needed to take advantage of constraint exclusion
IF var_debug THEN
RAISE NOTICE 'Get matching states start: %', clock_timestamp();
RAISE NOTICE 'Get matching states start: %', clock_timestamp();
END IF;
var_states := ARRAY(SELECT statefp FROM state WHERE ST_Intersects(the_geom, var_pt) );
SELECT statefp INTO var_state FROM state WHERE ST_Intersects(the_geom, var_pt) LIMIT 1;
IF var_debug THEN
RAISE NOTICE 'Get matching states end: %', clock_timestamp();
RAISE NOTICE 'Get matching states end: % - %', var_state, clock_timestamp();
END IF;
IF array_upper(var_states, 1) IS NULL THEN
IF var_state IS NULL THEN
-- We don't have any data for this state
RETURN;
END IF;
var_countyfp := ARRAY(SELECT countyfp FROM county WHERE statefp = ANY(var_states) AND ST_Intersects(the_geom, var_pt) );
IF array_upper(var_countyfp, 1) IS NULL THEN
IF var_debug THEN
RAISE NOTICE 'Get matching counties start: %', clock_timestamp();
END IF;
SELECT countyfp INTO var_countyfp FROM county WHERE statefp = var_state AND ST_Intersects(the_geom, var_pt) LIMIT 1;
IF var_debug THEN
RAISE NOTICE 'Get matching counties end: % - %',var_countyfp, clock_timestamp();
END IF;
IF var_countyfp IS NULL THEN
-- We don't have any data for this county
RETURN;
END IF;
@ -62,25 +66,58 @@ BEGIN
IF var_debug THEN
RAISE NOTICE 'Get matching edges start: %', clock_timestamp();
END IF;
FOR var_redge IN
var_stmt := '
WITH
f AS
( SELECT * FROM faces
WHERE statefp = ' || quote_literal(var_state) || ' AND countyfp = ' || quote_literal(var_countyfp) || '
AND ST_DWithin(faces.the_geom, ' || quote_literal(var_pt::text) || '::geometry, 0.001) ),
e AS
( SELECT * FROM edges
WHERE statefp = ' || quote_literal(var_state) || ' AND countyfp = ' || quote_literal(var_countyfp) || '
AND ST_DWithin(edges.the_geom, ' || quote_literal(var_pt::text) || '::geometry, 0.05)
)
SELECT *
FROM (SELECT DISTINCT ON(fullname) foo.fullname, foo.stusps, foo.zip,
(SELECT z.place FROM zip_state_loc AS z WHERE z.zip = foo.zip and z.statefp = ' || quote_literal(var_state) || ' LIMIT 1) As place, foo.center_pt,
side, to_number(fromhn, ''999999'') As fromhn, to_number(tohn, ''999999'') As tohn, ST_GeometryN(ST_Multi(line),1) As line, foo.dist
FROM
(SELECT e.the_geom As line, e.fullname, a.zip, s.abbrev As stusps, ST_ClosestPoint(e.the_geom,' || quote_literal(var_pt::text) || '::geometry) As center_pt, e.statefp, a.side, a.fromhn, a.tohn,
ST_Distance_Sphere(e.the_geom, ' || quote_literal(var_pt::text) || '::geometry) As dist
FROM e
INNER JOIN (SELECT * FROM state_lookup WHERE statefp = ' || quote_literal(var_state) || ' ) As s ON (e.statefp = s.statefp )
INNER JOIN f As fl ON (e.tfidl = fl.tfid)
INNER JOIN f As fr ON (e.tfidr = fr.tfid)
INNER JOIN (SELECT * FROM addr WHERE statefp = ' || quote_literal(var_state) || ' ) As a ON ( e.tlid = a.tlid AND e.statefp = a.statefp AND
( ( ST_Covers(fl.the_geom, ' || quote_literal(var_pt::text) || '::geometry) AND a.side = ''L'') OR ( ST_Covers(fr.the_geom, ' || quote_literal(var_pt::text) || '::geometry) AND a.side = ''R'' ) ) )
-- INNER JOIN zip_state_loc As z ON (a.statefp = z.statefp AND a.zip = z.zip) /** really slow with this join **/
WHERE ST_DWithin(e.the_geom, ' || quote_literal(var_pt::text) || '::geometry, 0.005)
ORDER BY ST_Distance(e.the_geom, ' || quote_literal(var_pt::text) || '::geometry) LIMIT 4) As foo
WHERE dist < 150 --less than 150 m
ORDER BY foo.fullname, foo.dist) As f ORDER BY f.dist ';
RAISE NOTICE 'Statement 1: %', var_stmt;
/** FOR var_redge IN
SELECT *
FROM (SELECT DISTINCT ON(fullname) foo.fullname, foo.stusps, foo.zip,
(SELECT z.place FROM zip_state_loc AS z WHERE z.zip = foo.zip and z.statefp = foo.statefp LIMIT 1) As place, foo.center_pt,
side, to_number(fromhn, '999999') As fromhn, to_number(tohn, '999999') As tohn, ST_GeometryN(ST_Multi(line),1) As line, foo.dist
FROM
(SELECT e.the_geom As line, e.fullname, a.zip, s.abbrev As stusps, ST_ClosestPoint(e.the_geom, var_pt) As center_pt, e.statefp, a.side, a.fromhn, a.tohn, ST_Distance_Sphere(e.the_geom, var_pt) As dist
FROM (SELECT * FROM edges WHERE statefp = ANY(var_states) AND countyfp = ANY(var_countyfp) ) AS e INNER JOIN (SELECT * FROM state_lookup WHERE statefp = ANY(var_states) ) As s ON (e.statefp = s.statefp )
INNER JOIN (SELECT * FROM faces WHERE statefp = ANY(var_states) AND countyfp = ANY(var_countyfp) ) As fl ON (e.tfidl = fl.tfid AND e.statefp = fl.statefp)
INNER JOIN (SELECT * FROM faces WHERE statefp = ANY(var_states) AND countyfp = ANY(var_countyfp) ) As fr ON (e.tfidr = fr.tfid AND e.statefp = fr.statefp)
INNER JOIN (SELECT * FROM addr WHERE statefp = ANY(var_states) ) As a ON ( e.tlid = a.tlid AND e.statefp = a.statefp AND
FROM (SELECT * FROM edges WHERE statefp = var_state AND countyfp = var_countyfp ) AS e INNER JOIN (SELECT * FROM state_lookup WHERE statefp = var_state ) As s ON (e.statefp = s.statefp )
INNER JOIN (SELECT * FROM faces WHERE statefp = var_state AND countyfp = var_countyfp ) As fl ON (e.tfidl = fl.tfid AND e.statefp = fl.statefp)
INNER JOIN (SELECT * FROM faces WHERE statefp = var_state AND countyfp = var_countyfp ) As fr ON (e.tfidr = fr.tfid AND e.statefp = fr.statefp)
INNER JOIN (SELECT * FROM addr WHERE statefp = var_state ) As a ON ( e.tlid = a.tlid AND e.statefp = a.statefp AND
( ( ST_Covers(fl.the_geom, var_pt) AND a.side = 'L') OR ( ST_Covers(fr.the_geom, var_pt) AND a.side = 'R' ) ) )
-- INNER JOIN zip_state_loc As z ON (a.statefp = z.statefp AND a.zip = z.zip) /** really slow with this join **/
-- INNER JOIN zip_state_loc As z ON (a.statefp = z.statefp AND a.zip = z.zip)
WHERE ST_DWithin(e.the_geom, var_pt, 0.005)
ORDER BY ST_Distance_Sphere(e.the_geom, var_pt) LIMIT 4) As foo
ORDER BY ST_Distance(e.the_geom, var_pt) LIMIT 4) As foo
WHERE dist < 150 --less than 150 m
ORDER BY foo.fullname, foo.dist) As f ORDER BY f.dist LOOP
ORDER BY foo.fullname, foo.dist) As f ORDER BY f.dist LOOP **/
FOR var_redge IN EXECUTE var_stmt LOOP
IF var_debug THEN
RAISE NOTICE 'Get matching edges loop: %', clock_timestamp();
RAISE NOTICE 'Get matching edges loop: %,%', var_primary_line, clock_timestamp();
END IF;
IF var_primary_line IS NULL THEN --this is the first time in the loop and our primary guess
var_primary_line := var_redge.line;
@ -113,14 +150,5 @@ BEGIN
RETURN;
END;
$_$ LANGUAGE plpgsql STABLE COST 1000;
CREATE OR REPLACE FUNCTION reverse_geocode(IN pt geometry, OUT intpt geometry[],
OUT addy NORM_ADDY[],
OUT street varchar[]) RETURNS RECORD
AS
$$
-- default to not include street range in cross streets
SELECT reverse_geocode($1,false);
$$
language sql STABLE;
LANGUAGE plpgsql STABLE COST 1000;

View file

@ -3,7 +3,8 @@
-- PostGIS - Spatial Types for PostgreSQL
-- http://www.postgis.org
--
-- Copyright (C) 2010, 2011 Regina Obe and Leo Hsu
-- Copyright (C) 2010, 2011 Regina Obe and Leo Hsu
-- Paragon Corporation
--
-- This is free software; you can redistribute and/or modify it under
-- the terms of the GNU General Public Licence. See the COPYING file.
@ -99,7 +100,16 @@ CREATE TABLE loader_lookuptables(process_order integer NOT NULL DEFAULT 1000,
load boolean NOT NULL DEFAULT true,
level_county boolean NOT NULL DEFAULT false,
level_state boolean NOT NULL DEFAULT false,
post_load_process text, single_geom_mode boolean DEFAULT false, insert_mode char(1) NOT NULL DEFAULT 'c', pre_load_process text,columns_exclude text[]);
post_load_process text, single_geom_mode boolean DEFAULT false,
insert_mode char(1) NOT NULL DEFAULT 'c',
pre_load_process text,columns_exclude text[]);
-- put in explanatory comments of what each column is for
COMMENT ON COLUMN loader_lookuptables.lookup_name IS 'This is the table name to inherit from and suffix of resulting output table -- how the table will be named -- edges where would mean -- ma_edges , pa_edges etc.';
COMMENT ON COLUMN loader_lookuptables.table_name IS 'suffix of the tables to load e.g. edges would load all tables like *edges.dbf(shp) -- so tl_2010_42129_edges.dbf . ';
COMMENT ON COLUMN loader_lookuptables.load IS 'Whether or not to load the table. For states and zcta5 (you may just want to download states10, zcta510 nationwide file manually) load your own into a single table that inherits from tiger.states, tiger.zcta5. You''ll get improved performance for some geocoding cases.';
COMMENT ON COLUMN loader_lookuptables.columns_exclude IS 'List of columns to exclude as an array. This is excluded from both input table and output table and rest of columns remaining are assumed to be in same order in both tables. gid, geoid10,cpi,suffix1ce are excluded if no columns are specified.';
INSERT INTO loader_lookuptables(process_order, lookup_name, table_name, load, level_county, level_state, single_geom_mode, pre_load_process, post_load_process)
VALUES(2, 'county', 'county10', true, false, true,
false, '${psql} -c "CREATE TABLE ${data_schema}.${state_abbrev}_${lookup_name}(CONSTRAINT pk_${state_abbrev}_${lookup_name} PRIMARY KEY (gid) ) INHERITS(county); " ',

View file

@ -23,6 +23,7 @@ CREATE INDEX idx_tiger_faces_countyfp ON faces USING btree(countyfp);
CREATE INDEX tiger_place_the_geom_gist ON place USING gist(the_geom);
CREATE INDEX tiger_edges_the_geom_gist ON edges USING gist(the_geom);
CREATE INDEX tiger_state_the_geom_gist ON faces USING gist(the_geom);
DROP FUNCTION IF EXISTS reverse_geocode(geometry); /** changed to use default parameters **/
CREATE TABLE zcta5
(