Put the WKT new parser files into repo so others can see them and I can work in synch with trunk.

git-svn-id: http://svn.osgeo.org/postgis/trunk@6060 b70326c6-7e19-0410-871a-916f4a2858ee
This commit is contained in:
Paul Ramsey 2010-10-10 00:08:58 +00:00
parent 17443d862b
commit 37111647d5
8 changed files with 584 additions and 3 deletions

View file

@ -44,6 +44,7 @@ SA_OBJS = \
lwout_wkt.o \
lwout_wkb.o \
lwin_wkb.o \
lwin_wkt.o \
lwutil.o \
lwhomogenize.o \
lwalgorithm.o \
@ -99,6 +100,16 @@ $(SA_OBJS): %.o: %.c
$(NM_OBJS): %.o: %.c
$(CC) $(CFLAGS) $(NUMERICFLAGS) -c -o $@ $<
# Generate WKT parser from Flex/Yacc inputs
lwin_wkt_parse.c: lwin_wkt_parse.y
$(YACC) --debug --verbose -o $@ -d $^
# $(YACC) --debug --verbose -o $@ -d $^
# $(YACC) -o $@ -d $^
lwin_wkt_lex.c: lwin_wkt_lex.l lwin_wkt.h
$(LEX) --case-insensitive -o $@ $<
# Commands to generate the lexer and parser from input files
wktparse.tab.c: wktparse.y

View file

@ -692,6 +692,16 @@ extern BOX3D *ptarray_compute_box3d(const POINTARRAY *pa);
extern int ptarray_compute_box3d_p(const POINTARRAY *pa, BOX3D *out);
/**
* Create a new POINTARRAY with no points
*/
extern POINTARRAY* ptarray_construct_empty(char hasz, char hasm);
/**
* Add a point to an existing pointarray
*/
extern int ptarray_add_point(POINTARRAY *pa, POINT4D *pt);
/*
* size of point represeneted in the POINTARRAY

152
liblwgeom/lwin_wkt.c Normal file
View file

@ -0,0 +1,152 @@
#include "lwin_wkt.h"
/**
* Given flags and a string ("Z", "M" or "ZM") determine if the flags and
* string describe the same number of dimensions. If they do, update the flags
* to ensure they are using the correct higher dimension in the 3D case ("M" or "Z").
static int wkt_parser_dimensionality_check(uchar flags, uchar type)
{
if( ! ( FLAGS_GET_M(flags) == TYPE_HASM(type) ) )
return LW_FALSE;
if( ! ( FLAGS_GET_Z(flags) == TYPE_HASZ(type) ) )
return LW_FALSE;
return LW_TRUE;
}
*/
static uchar wkt_parser_dimensionality(POINTARRAY *pa, char *dimensionality)
{
uchar flags = 0;
int i;
/* If there's an explicit dimensionality, we use that */
if( dimensionality )
{
for( i = 0; i < strlen(dimensionality); i++ )
{
if( dimensionality[i] == 'Z' )
FLAGS_SET_Z(flags,1);
if( dimensionality[i] == 'M' )
FLAGS_SET_M(flags,1);
}
}
/* Otherwise we use the implicit dimensionality in the number of coordinate dimensions */
else if ( pa )
{
FLAGS_SET_Z(flags,TYPE_HASZ(pa->dims));
FLAGS_SET_M(flags,TYPE_HASM(pa->dims));
}
return flags;
}
/**
*/
POINT wkt_parser_coord_2(double c1, double c2)
{
POINT p;
p.flags = 0;
p.x = c1;
p.y = c2;
p.z = p.m = 0.0;
FLAGS_SET_Z(p.flags, 0);
FLAGS_SET_M(p.flags, 0);
return p;
};
/**
* Note, if this is an XYM coordinate we'll have to fix it later when we build
* the object itself and have access to the dimensionality token.
*/
POINT wkt_parser_coord_3(double c1, double c2, double c3)
{
POINT p;
p.flags = 0;
p.x = c1;
p.y = c2;
p.z = c3;
p.m = 0;
FLAGS_SET_Z(p.flags, 1);
FLAGS_SET_M(p.flags, 0);
return p;
};
/**
*/
POINT wkt_parser_coord_4(double c1, double c2, double c3, double c4)
{
POINT p;
p.flags = 0;
p.x = c1;
p.y = c2;
p.z = c3;
p.m = c4;
FLAGS_SET_Z(p.flags, 1);
FLAGS_SET_M(p.flags, 1);
return p;
};
void wkt_parser_ptarray_add_coord(POINTARRAY *pa, POINT p)
{
POINT4D pt;
/* While parsing the point arrays, XYM and XMZ points are both treated as XYZ */
pt.x = p.x;
pt.y = p.y;
if( TYPE_HASZ(pa->dims) )
pt.z = p.z;
if( TYPE_HASM(pa->dims) )
pt.m = p.m;
/* If the destination is XYM, we'll write the third coordinate to m */
if( TYPE_HASM(pa->dims) && ! TYPE_HASZ(pa->dims) )
pt.m = p.z;
ptarray_add_point(pa, &pt);
}
POINTARRAY* wkt_parser_ptarray_new(int ndims)
{
POINTARRAY *pa = ptarray_construct_empty((ndims>2), (ndims>3));
return pa;
}
/**
* Create a new linestring. Null point array implies empty. Null dimensionality
* implies no specified dimensionality in the WKT. Check for
*/
LWGEOM* wkt_parser_linestring_new(POINTARRAY *pa, char *dimensionality)
{
uchar flags;
/* TODO apply the parser checks? (not enough points, etc) */
/* If there's an explicit dimensionality, we use that */
flags = wkt_parser_dimensionality(pa, dimensionality);
/* No pointarray means it is empty */
if( ! pa )
return lwline_as_lwgeom(lwline_construct_empty(0, FLAGS_GET_Z(flags), FLAGS_GET_M(flags)));
/* If the number of dimensions is not consistent, we have a problem. */
if( FLAGS_NDIMS(flags) != TYPE_NDIMS(pa->dims) )
{
/* TODO: Error out of the parse. */
printf("________ ndims of array != ndims of dimensionalty tokens \n\n>>>>>> ERROR <<<<<<<\n\n");
}
TYPE_SETZM(pa->dims, FLAGS_GET_Z(flags), FLAGS_GET_M(flags));
return lwline_as_lwgeom(lwline_construct(0, NULL, pa));
}
/* Circular strings are just like linestrings! */
LWGEOM* wkt_parser_circularstring_new(POINTARRAY *pa, char *dimensionality)
{
/* TODO apply the parser checks? */
LWCIRCSTRING *circ = (LWCIRCSTRING*)wkt_parser_linestring_new(pa, dimensionality);
TYPE_SETTYPE(circ->type, CIRCSTRINGTYPE);
return lwcircstring_as_lwgeom(circ);
}

40
liblwgeom/lwin_wkt.h Normal file
View file

@ -0,0 +1,40 @@
#include "libgeom.h"
/**
* Coordinate object to hold information about last coordinate temporarily.
* We need to know how many dimensions there are at any given time.
*/
typedef struct
{
uchar flags;
double x;
double y;
double z;
double m;
}
POINT;
/*
** Globals that hold the final output geometry and some interim values
** like the current coordinate.
*/
LWGEOM *globalgeom;
/*
** Functions called from within the bison parser to construct geometries.
*/
/*
** Coordinates are stored in a "globalcoord" and picked up later when building
** the point arrays.
*/
POINT wkt_parser_coord_2(double c1, double c2);
POINT wkt_parser_coord_3(double c1, double c2, double c3);
POINT wkt_parser_coord_4(double c1, double c2, double c3, double c4);
void wkt_parser_ptarray_add_coord(POINTARRAY *pa, POINT p);
POINTARRAY* wkt_parser_ptarray_new(int ndims);
LWGEOM* wkt_parser_linestring_new(POINTARRAY *pa, char *dimensionality);
LWGEOM* wkt_parser_circularstring_new(POINTARRAY *pa, char *dimensionality);

50
liblwgeom/lwin_wkt_lex.l Normal file
View file

@ -0,0 +1,50 @@
%{
/* The lexer */
#include <stdio.h>
#include <string.h>
#include "../libgeom.h"
#include "g_wkt_parse.h"
static YY_BUFFER_STATE wkt_yy_buf_state;
void wkt_lexer_init(char *src) { wkt_yy_buf_state = wkt_yy_scan_string(src); }
void wkt_lexer_close() { wkt_yy_delete_buffer(wkt_yy_buf_state); }
%}
%option noyywrap
%option prefix="wkt_yy"
%%
SRID= { printf("lex: SRID\n"); return SRID_TOK; }
GEOMETRYCOLLECTION { printf("lex: GEOMETRYCOLLECTION\n"); return COLLECTION_TOK; }
MULTISURFACE { printf("lex: MULTISURFACE\n"); return MSURFACE_TOK; }
MULTIPOLYGON { printf("lex: MULTIPOLYGON\n"); return MPOLYGON_TOK; }
MULTICURVE { printf("lex: MULTICURVE\n"); return MCURVE_TOK; }
MULTILINESTRING { printf("lex: MULTILINESTRING\n"); return MLINESTRING_TOK; }
MULTIPOINT { printf("lex: MULTIPOINT\n"); return MPOINT_TOK; }
CURVEPOLYGON { printf("lex: CURVEPOLYGON\n"); return CURVEPOLYGON_TOK; }
POLYGON { printf("lex: POLYGON\n"); return POLYGON_TOK; }
COMPOUNDCURVE { printf("lex: COMPOUNDCURVE\n"); return COMPOUNDCURVE_TOK; }
CIRCULARSTRING { printf("lex: CIRCULARSTRING\n"); return CIRCULARSTRING_TOK; }
LINESTRING { printf("lex: LINESTRING\n"); return LINESTRING_TOK; }
POINT { printf("lex: POINT\n"); return POINT_TOK; }
Z|M|ZM { wkt_yylval.stringvalue = strdup(wkt_yytext); printf("lex: DIMENSIONALITY\n"); return DIMENSIONALITY_TOK; }
EMPTY { printf("lex: EMPTY\n"); return EMPTY_TOK; }
-?(([0-9]+)|([0-9]*\.[0-9]+)([eE][-+]?[0-9]+)?) { /* LWDEBUGF(5,"Lex got double: %s",wkt_yytext); */ printf("lex: DOUBLE\n"); wkt_yylval.doublevalue = atof(wkt_yytext); return DOUBLE_TOK; }
-?([0-9]+) { /* LWDEBUGF(5,"Lex got integer: %s",wkt_yytext); */ printf("lex: INTEGER\n"); wkt_yylval.integervalue = atoi(wkt_yytext); return INTEGER_TOK; }
\( { printf("lex: LBRACKET\n"); return LBRACKET_TOK; }
\) { printf("lex: RBRACKET\n"); return RBRACKET_TOK; }
, { printf("lex: COMMA\n"); return COMMA_TOK; }
\; { printf("lex: SEMICOLON\n"); return SEMICOLON_TOK; }
[ \t]+ /* ignore whitespace */;
%%

250
liblwgeom/lwin_wkt_parse.y Normal file
View file

@ -0,0 +1,250 @@
%{
/* WKT Parser */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "g_wkt_parse_utils.h"
char *wkt_yyerror_str = NULL;
int wkt_yydebug = 1;
void wkt_yyerror(const char *str)
{
if ( wkt_yyerror_str ) free(wkt_yyerror_str);
asprintf(&wkt_yyerror_str,"Parse error: %s",str);
}
int wkt_yywrap()
{
return 1;
}
/*
*/
extern int ggeometry_from_wkt_string(char *str, G_GEOMETRY **geom, char **errstr)
{
int rv = 0;
*geom = NULL;
*errstr = NULL;
wkt_lexer_init(str);
rv = wkt_yyparse();
LWDEBUGF(4,"wkt_yyparse returned %d", rv);
wkt_lexer_close();
if ( rv )
{
*errstr = wkt_yyerror_str;
return G_FAILURE;
}
*geom = globalgeom;
globalgeom = NULL;
return G_SUCCESS;
}
%}
%error-verbose
%name-prefix="wkt_yy"
%union {
integer integervalue;
double doublevalue;
char *stringvalue;
LWGEOM *geometryvalue;
POINT coordinatevalue;
POINTARRAY *ptarrayvalue;
}
%token POINT_TOK LINESTRING_TOK POLYGON_TOK
%token MPOINT_TOK MLINESTRING_TOK MPOLYGON_TOK
%token MSURFACE_TOK MCURVE_TOK CURVEPOLYGON_TOK COMPOUNDCURVE_TOK CIRCULARSTRING_TOK
%token COLLECTION_TOK
%token RBRACKET_TOK LBRACKET_TOK COMMA_TOK EMPTY_TOK
%token SRID_TOK SEMICOLON_TOK
%token <doublevalue> DOUBLE_TOK
%token <integervalue> INTEGER_TOK
%token <stringvalue> STRING_TOK
%token <stringvalue> DIMENSIONALITY_TOK
%type <geometryvalue> geometry
%type <geometryvalue> geometry_no_srid
%type <geometryvalue> geometrycollection
%type <geometryvalue> multisurface
%type <geometryvalue> multicurve
%type <geometryvalue> curvepolygon
%type <geometryvalue> compoundcurve
%type <geometryvalue> geometry_list
%type <geometryvalue> surface_list
%type <geometryvalue> polygon_list
%type <geometryvalue> curve_list
%type <geometryvalue> linestring_list
%type <geometryvalue> curvering_list
%type <geometryvalue> ring_list
%type <geometryvalue> point
%type <geometryvalue> circularstring
%type <geometryvalue> linestring
%type <geometryvalue> linestring_untagged
%type <geometryvalue> ring
%type <geometryvalue> polygon
%type <geometryvalue> polygon_untagged
%type <geometryvalue> multipoint
%type <geometryvalue> multilinestring
%type <geometryvalue> multipolygon
%type <ptarrayvalue> ptarray
%type <coordinatevalue> coordinate
%%
geometry:
geometry_no_srid {} |
SRID_TOK integer SEMICOLON_TOK geometry_no_srid {} ;
geometry_no_srid :
point {} |
linestring {} |
circularstring {} |
compoundcurve {} |
polygon {} |
curvepolygon {} |
multipoint {} |
multilinestring {} |
multipolygon {} |
multisurface {} |
multicurve {} |
geometrycollection {} ;
geometrycollection :
COLLECTION_TOK LBRACKET_TOK geometry_list RBRACKET_TOK {} |
COLLECTION_TOK DIMENSIONALITY_TOK LBRACKET_TOK geometry_list RBRACKET_TOK {} |
COLLECTION_TOK EMPTY_TOK {} ;
geometry_list :
geometry_list COMMA_TOK geometry {} |
geometry {} ;
multisurface :
MSURFACE_TOK LBRACKET_TOK surface_list RBRACKET_TOK {} |
MSURFACE_TOK DIMENSIONALITY_TOK LBRACKET_TOK surface_list RBRACKET_TOK {} |
MSURFACE_TOK EMPTY_TOK {} ;
surface_list :
surface_list COMMA_TOK polygon {} |
surface_list COMMA_TOK curvepolygon {} |
surface_list COMMA_TOK polygon_untagged {} |
polygon {} |
curvepolygon {} |
polygon_untagged {} ;
multipolygon :
MPOLYGON_TOK LBRACKET_TOK polygon_list RBRACKET_TOK {} |
MPOLYGON_TOK DIMENSIONALITY_TOK LBRACKET_TOK polygon_list RBRACKET_TOK {} |
MPOLYGON_TOK EMPTY_TOK {} ;
polygon_list :
polygon_list COMMA_TOK polygon_untagged {} |
polygon_untagged {} ;
polygon :
POLYGON_TOK LBRACKET_TOK ring_list RBRACKET_TOK {} |
POLYGON_TOK DIMENSIONALITY_TOK LBRACKET_TOK ring_list RBRACKET_TOK {} |
POLYGON_TOK EMPTY_TOK {} ;
polygon_untagged :
LBRACKET_TOK ring_list RBRACKET_TOK {} ;
curvepolygon :
CURVEPOLYGON_TOK LBRACKET_TOK curvering_list RBRACKET_TOK {} |
CURVEPOLYGON_TOK DIMENSIONALITY_TOK LBRACKET_TOK curvering_list RBRACKET_TOK {} |
CURVEPOLYGON_TOK EMPTY_TOK {} ;
curvering_list :
curvering_list COMMA_TOK ring {} |
curvering_list COMMA_TOK circularstring {} |
ring {} |
circularstring {} ;
ring_list :
ring_list COMMA_TOK ring {} |
ring {} ;
ring :
LBRACKET_TOK ptarray RBRACKET_TOK {} ;
compoundcurve :
COMPOUNDCURVE_TOK LBRACKET_TOK curve_list RBRACKET_TOK {} |
COMPOUNDCURVE_TOK DIMENSIONALITY_TOK LBRACKET_TOK curve_list RBRACKET_TOK {} |
COMPOUNDCURVE_TOK EMPTY_TOK {} ;
multicurve :
MCURVE_TOK LBRACKET_TOK curve_list RBRACKET_TOK {} |
MCURVE_TOK DIMENSIONALITY_TOK LBRACKET_TOK curve_list RBRACKET_TOK {} |
MCURVE_TOK EMPTY_TOK {} ;
curve_list :
curve_list COMMA_TOK circularstring {} |
curve_list COMMA_TOK linestring {} |
curve_list COMMA_TOK linestring_untagged {} |
circularstring {} |
linestring {} |
linestring_untagged {} ;
multilinestring :
MLINESTRING_TOK LBRACKET_TOK linestring_list RBRACKET_TOK {} |
MLINESTRING_TOK DIMENSIONALITY_TOK LBRACKET_TOK linestring_list RBRACKET_TOK {} |
MLINESTRING_TOK EMPTY_TOK {} ;
linestring_list :
linestring_list COMMA_TOK linestring_untagged {} |
linestring_untagged {} ;
circularstring :
CIRCULARSTRING_TOK LBRACKET_TOK ptarray RBRACKET_TOK {} |
CIRCULARSTRING_TOK DIMENSIONALITY_TOK LBRACKET_TOK ptarray RBRACKET_TOK {} |
CIRCULARSTRING_TOK EMPTY_TOK {} ;
linestring :
LINESTRING_TOK LBRACKET_TOK ptarray RBRACKET_TOK
{ $$ = wkt_parser_linestring($3, NULL); } |
LINESTRING_TOK DIMENSIONALITY_TOK LBRACKET_TOK ptarray RBRACKET_TOK
{ $$ = wkt_parser_linestring($4, $2); } |
LINESTRING_TOK DIMENSIONALITY_TOK EMPTY_TOK
{ $$ = wkt_parser_linestring(NULL, $2); } |
LINESTRING_TOK EMPTY_TOK
{ $$ = wkt_parser_linestring(NULL, NULL); } ;
linestring_untagged :
LBRACKET_TOK ptarray RBRACKET_TOK
{ $$ = wkt_parser_linestring($2); } ;
multipoint :
MPOINT_TOK LBRACKET_TOK ptarray RBRACKET_TOK {} |
MPOINT_TOK DIMENSIONALITY_TOK LBRACKET_TOK ptarray RBRACKET_TOK {} |
MPOINT_TOK EMPTY_TOK {} ;
point :
POINT_TOK LBRACKET_TOK coordinate RBRACKET_TOK {} |
POINT_TOK DIMENSIONALITY_TOK LBRACKET_TOK coordinate RBRACKET_TOK {} |
POINT_TOK EMPTY_TOK {} ;
ptarray :
ptarray COMMA_TOK coordinate
{ wkt_parser_ptarray_add_coord($$, $3); } |
coordinate
{ $$ = wkt_parser_ptarray_new(FLAGS_NDIMS(($1).flags); wkt_parser_ptarray_add_coord($$, $1); } ;
coordinate :
DOUBLE_TOK DOUBLE_TOK
{ $$ = wkt_parser_coord_2($1, $2); } |
DOUBLE_TOK DOUBLE_TOK DOUBLE_TOK
{ $$ = wkt_parser_coord_3($1, $2, $3); } |
DOUBLE_TOK DOUBLE_TOK DOUBLE_TOK DOUBLE_TOK
{ $$ = wkt_parser_coord_4($1, $2, $3, $4); } ;
%%

View file

@ -24,7 +24,7 @@
* use SRID=-1 for unknown SRID (will have 8bit type's S = 0)
*/
LWLINE *
lwline_construct(int SRID, BOX2DFLOAT4 *bbox, POINTARRAY *points)
lwline_construct(int srid, BOX2DFLOAT4 *bbox, POINTARRAY *points)
{
LWLINE *result;
result = (LWLINE*) lwalloc(sizeof(LWLINE));
@ -34,12 +34,12 @@ lwline_construct(int SRID, BOX2DFLOAT4 *bbox, POINTARRAY *points)
result->type = lwgeom_makeType_full(
TYPE_HASZ(points->dims),
TYPE_HASM(points->dims),
(SRID!=-1), LINETYPE,
(srid!=-1), LINETYPE,
0);
LWDEBUGF(3, "lwline_construct type=%d", result->type);
result->SRID = SRID;
result->SRID = srid;
result->points = points;
result->bbox = bbox;

View file

@ -42,6 +42,74 @@ ptarray_construct(char hasz, char hasm, uint32 npoints)
}
POINTARRAY*
ptarray_construct_empty(char hasz, char hasm)
{
uchar dims = 0;
size_t size;
POINTARRAY *pa = lwalloc(sizeof(POINTARRAY));
/* Set our dimsionality info on the bitmap */
TYPE_SETZM(dims, hasz?1:0, hasm?1:0);
pa->dims = dims;
/* We will be allocating a bit of room */
pa->npoints = 0;
pa->maxpoints = 32;
/* Allocate the coordinate array */
size = TYPE_NDIMS(dims) * pa->maxpoints * sizeof(double);
pa->serialized_pointlist = (uchar *)lwalloc(size);
return pa;
}
int
ptarray_add_point(POINTARRAY *pa, POINT4D *pt)
{
size_t size;
double *d = NULL;
int i = 0;
/* Check for pathology */
if( ! pa || ! pt )
return LW_FALSE;
/* If we have no storage, let's allocate some */
if( pa->maxpoints == 0 || ! pa->serialized_pointlist )
{
pa->maxpoints = 32;
size = TYPE_NDIMS(pa->dims) * pa->maxpoints * sizeof(double);
pa->serialized_pointlist = lwalloc(size);
}
/* Check if we have enough storage, add more if necessary */
if( pa->npoints == pa->maxpoints )
{
pa->maxpoints = (pa->maxpoints + 1) * 2;
size = pa->maxpoints * TYPE_NDIMS(pa->dims) * sizeof(double);
pa->serialized_pointlist = lwrealloc(pa->serialized_pointlist, size);
}
/* Hope this is double-aligned storage... */
/* Write in the double values */
d = (double*)pa->serialized_pointlist;
i = pa->npoints * TYPE_NDIMS(pa->dims);
d[i++] = pt->x;
d[i++] = pt->y;
if( TYPE_HASZ(pa->dims) )
d[i++] = pt->z;
if( TYPE_HASM(pa->dims) )
d[i++] = pt->m;
/* Increment our point count */
pa->npoints++;
return LW_TRUE;
}
POINTARRAY*
ptarray_construct_copy_data(char hasz, char hasm, uint32 npoints, const uchar *ptlist)
{