From 44842f599e63054307e3dc073479b419b5a39629 Mon Sep 17 00:00:00 2001 From: John Fieber Date: Tue, 4 Jun 1996 19:09:50 +0000 Subject: [PATCH] Upgrade from 1.1 to 1.1.91. Unknown to me, the latter version was actually available at the time I brought in the former. Lots of assorted bug fixes and much needed support for catalogs. --- usr.bin/sgmls/Makefile.inc | 6 +- usr.bin/sgmls/libsgmls/Makefile | 2 +- usr.bin/sgmls/libsgmls/sgmls.c | 32 +- usr.bin/sgmls/libsgmls/sgmls.h | 2 +- usr.bin/sgmls/rast/Makefile | 2 +- usr.bin/sgmls/rast/rast.c | 80 ++- usr.bin/sgmls/sgmls/Makefile | 13 +- usr.bin/sgmls/sgmls/action.h | 1 + usr.bin/sgmls/sgmls/alloc.h | 8 + usr.bin/sgmls/sgmls/ambig.c | 2 +- usr.bin/sgmls/sgmls/appl.h | 2 - usr.bin/sgmls/sgmls/catalog.c | 925 ++++++++++++++++++++++++++++++ usr.bin/sgmls/sgmls/catalog.h | 45 ++ usr.bin/sgmls/sgmls/config.h | 13 +- usr.bin/sgmls/sgmls/context.c | 41 +- usr.bin/sgmls/sgmls/context.h | 2 + usr.bin/sgmls/sgmls/ebcdic.h | 15 - usr.bin/sgmls/sgmls/entgen.c | 126 +++- usr.bin/sgmls/sgmls/entity.h | 5 +- usr.bin/sgmls/sgmls/etype.h | 4 +- usr.bin/sgmls/sgmls/genlex.c | 46 +- usr.bin/sgmls/sgmls/getopt.c | 2 +- usr.bin/sgmls/sgmls/latin1.h | 14 - usr.bin/sgmls/sgmls/lexcode.h | 1 + usr.bin/sgmls/sgmls/lexrf.c | 3 +- usr.bin/sgmls/sgmls/lextaba.c | 211 ++++++- usr.bin/sgmls/sgmls/lextabe.c | 175 +++++- usr.bin/sgmls/sgmls/lineout.c | 19 +- usr.bin/sgmls/sgmls/main.c | 62 +- usr.bin/sgmls/sgmls/md1.c | 10 +- usr.bin/sgmls/sgmls/md2.c | 47 +- usr.bin/sgmls/sgmls/msg.h | 28 +- usr.bin/sgmls/sgmls/msgcat.c | 33 +- usr.bin/sgmls/sgmls/pars1.c | 78 ++- usr.bin/sgmls/sgmls/pars2.c | 63 +- usr.bin/sgmls/sgmls/pcbrf.c | 129 +++-- usr.bin/sgmls/sgmls/portproc.c | 1 + usr.bin/sgmls/sgmls/serv.c | 2 +- usr.bin/sgmls/sgmls/sgml1.c | 28 +- usr.bin/sgmls/sgmls/sgml2.c | 43 +- usr.bin/sgmls/sgmls/sgmlaux.h | 2 + usr.bin/sgmls/sgmls/sgmldecl.c | 168 ++++-- usr.bin/sgmls/sgmls/sgmldecl.h | 48 +- usr.bin/sgmls/sgmls/sgmlfnsm.h | 1 + usr.bin/sgmls/sgmls/sgmlio.c | 2 +- usr.bin/sgmls/sgmls/sgmlmsg.c | 32 +- usr.bin/sgmls/sgmls/sgmls.1 | 184 ++++-- usr.bin/sgmls/sgmls/sgmlxtrn.c | 4 +- usr.bin/sgmls/sgmls/sgmlxtrn.h | 4 +- usr.bin/sgmls/sgmls/std.h | 6 - usr.bin/sgmls/sgmls/synxtrn.h | 2 + usr.bin/sgmls/sgmls/trace.h | 16 +- usr.bin/sgmls/sgmls/traceset.c | 41 +- usr.bin/sgmls/sgmls/version.c | 2 +- usr.bin/sgmls/sgmls/xfprintf.c | 20 +- usr.bin/sgmls/sgmlsasp/Makefile | 2 +- usr.bin/sgmls/sgmlsasp/replace.c | 15 +- usr.bin/sgmls/sgmlsasp/replace.h | 4 +- usr.bin/sgmls/sgmlsasp/sgmlsasp.1 | 2 +- usr.bin/sgmls/sgmlsasp/sgmlsasp.c | 6 +- usr.bin/sgmls/unix.cfg | 18 + 61 files changed, 2385 insertions(+), 515 deletions(-) create mode 100644 usr.bin/sgmls/sgmls/alloc.h create mode 100644 usr.bin/sgmls/sgmls/catalog.c create mode 100644 usr.bin/sgmls/sgmls/catalog.h diff --git a/usr.bin/sgmls/Makefile.inc b/usr.bin/sgmls/Makefile.inc index 1e4fc2b44b08..0faf5112c73d 100644 --- a/usr.bin/sgmls/Makefile.inc +++ b/usr.bin/sgmls/Makefile.inc @@ -1,13 +1,15 @@ # # Bmakefile for rast # -# $id$ +# $Id$ # +.if exists(${.CURDIR}/../../Makefile.inc) .include "${.CURDIR}/../../Makefile.inc" +.endif .if exists(${.CURDIR}/../libsgmls/obj) LIBSGMLS= ${.CURDIR}/../libsgmls/obj/libsgmls.a .else LIBSGMLS= ${.CURDIR}/../libsgmls/libsgmls.a -.endif \ No newline at end of file +.endif diff --git a/usr.bin/sgmls/libsgmls/Makefile b/usr.bin/sgmls/libsgmls/Makefile index e94fcc44f4ba..0d058f36546a 100644 --- a/usr.bin/sgmls/libsgmls/Makefile +++ b/usr.bin/sgmls/libsgmls/Makefile @@ -1,7 +1,7 @@ # # Bmakefile for libsgmls # -# $id$ +# $Id$ # LIB= sgmls diff --git a/usr.bin/sgmls/libsgmls/sgmls.c b/usr.bin/sgmls/libsgmls/sgmls.c index cbb03f123f8b..4e2595745ef4 100644 --- a/usr.bin/sgmls/libsgmls/sgmls.c +++ b/usr.bin/sgmls/libsgmls/sgmls.c @@ -8,22 +8,12 @@ #include "sgmls.h" #include "lineout.h" -#ifdef __GNUC__ -#define NO_RETURN volatile -#else -#define NO_RETURN /* as nothing */ -#endif - #ifdef USE_PROTOTYPES #define P(parms) parms #else #define P(parms) () #endif -#ifndef __STDC__ -#define const /* as nothing */ -#endif - typedef struct sgmls_data data_s; typedef struct sgmls_notation notation_s; typedef struct sgmls_internal_entity internal_entity_s; @@ -112,7 +102,7 @@ static char *errlist[] = { "Input line too long" }; -static void NO_RETURN error P((enum error_code)); +static void error P((enum error_code)); static int parse_data P((char *, unsigned long *)); static void parse_location P((char *, struct sgmls *)); static void parse_notation P((char *, notation_s *)); @@ -303,7 +293,7 @@ int sgmls_next(sp, e) char *name; attribute_s *a; external_entity_s *ext; - + name = scan_token(&p); a = parse_attribute(sp, p); ext = lookup_external_entity(sp, name); @@ -449,7 +439,7 @@ int parse_data(p, linenop) else *q++ = *p++; } - + if (q > start || is_sdata) { if (n >= datav_size) grow_datav(); @@ -656,7 +646,7 @@ data_s *copy_data(v, n) unsigned total; char *p; data_s *result; - + result = (data_s *)xmalloc(n*sizeof(data_s)); total = 0; for (i = 0; i < n; i++) @@ -683,7 +673,11 @@ char *unescape(s) char *s; { int len = unescape1(s); - if (memchr(s, '\0', len)) + if ( +#ifdef __BORLANDC__ + len > 0 && +#endif + memchr(s, '\0', len)) error(E_NULESCAPE); s[len] = '\0'; return s; @@ -810,7 +804,7 @@ int read_line(sp) error(E_SYSTEM); return 0; } - + sp->input_lineno++; input_lineno = sp->input_lineno; for (;;) { @@ -973,13 +967,15 @@ static void add_attribute(pp, a) attribute_s **pp, *a; { +#if 0 for (; *pp && strcmp((*pp)->name, a->name) < 0; pp = &(*pp)->next) ; +#endif a->next = *pp; *pp = a; } - + static char *strsave(s) char *s; @@ -1017,7 +1013,7 @@ UNIV xrealloc(p, n) return p; } -static NO_RETURN +static void error(num) enum error_code num; { diff --git a/usr.bin/sgmls/libsgmls/sgmls.h b/usr.bin/sgmls/libsgmls/sgmls.h index c327f15353ce..79b26588c03a 100644 --- a/usr.bin/sgmls/libsgmls/sgmls.h +++ b/usr.bin/sgmls/libsgmls/sgmls.h @@ -35,7 +35,7 @@ struct sgmls_external_entity { struct sgmls_attribute *attributes; struct sgmls_notation *notation; }; - + struct sgmls_entity { union { struct sgmls_internal_entity internal; diff --git a/usr.bin/sgmls/rast/Makefile b/usr.bin/sgmls/rast/Makefile index 4c8a7c26ba03..214286ac1fba 100644 --- a/usr.bin/sgmls/rast/Makefile +++ b/usr.bin/sgmls/rast/Makefile @@ -1,7 +1,7 @@ # # Bmakefile for rast # -# $id$ +# $Id$ # PROG= rast diff --git a/usr.bin/sgmls/rast/rast.c b/usr.bin/sgmls/rast/rast.c index 2634679efae0..f9571874896f 100644 --- a/usr.bin/sgmls/rast/rast.c +++ b/usr.bin/sgmls/rast/rast.c @@ -37,6 +37,7 @@ NO_RETURN void error VP((char *,...)); static void input_error P((int, char *, unsigned long)); static int do_file P((FILE *)); static void usage P((void)); +static void init_sort_code P((void)); static void output_processing_instruction P((char *, unsigned)); static void output_data P((struct sgmls_data *, int)); @@ -47,6 +48,7 @@ static void output_external_entity_info P((struct sgmls_external_entity *)); static void output_element_start P((char *, struct sgmls_attribute *)); static void output_element_end P((char *)); static void output_attribute P((struct sgmls_attribute *)); +static void output_attribute_list P((struct sgmls_attribute *)); static void output_tokens P((char **, int)); static void output_markup_chars P((char *, unsigned)); static void output_markup_string P((char *)); @@ -56,6 +58,8 @@ static void output_external_id P((char *, char *)); static void output_entity P((struct sgmls_entity *)); static void output_external_entity_info P((struct sgmls_external_entity *)); static void output_internal_entity P((struct sgmls_internal_entity *)); +/* Don't use a prototype here to avoid problems with qsort. */ +static int compare_attributes(); #define output_flush_markup() output_flush('!') #define output_flush_data() output_flush('|') @@ -64,6 +68,10 @@ static FILE *outfp; static int char_count = 0; static char *program_name; +static short sort_code[256]; +static struct sgmls_attribute **attribute_vector = 0; +static int attribute_vector_length = 0; + int main(argc, argv) int argc; char **argv; @@ -107,6 +115,8 @@ int main(argc, argv) (void)sgmls_set_errhandler(input_error); + init_sort_code(); + if (!do_file(stdin)) { fclose(outfp); if (output_file) { @@ -140,6 +150,18 @@ void usage() exit(EXIT_FAILURE); } +static +void init_sort_code() +{ + int i; + static char print[] = "!\"#$%&'()*+,-./0123456789:;<=>?\ +@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; + for (i = 0; i < 256; i++) + sort_code[i] = i + 128; + for (i = 0; print[i]; i++) + sort_code[(unsigned char)print[i]] = i; +} + static int do_file(fp) FILE *fp; @@ -269,10 +291,8 @@ void output_element_start(gi, att) { fprintf(outfp, "[%s", gi); if (att) { - struct sgmls_attribute *p; putc('\n', outfp); - for (p = att; p; p = p->next) - output_attribute(p); + output_attribute_list(att); } fputs("]\n", outfp); } @@ -284,6 +304,54 @@ void output_element_end(gi) fprintf(outfp, "[/%s]\n", gi); } +static +void output_attribute_list(att) + struct sgmls_attribute *att; +{ + struct sgmls_attribute *p; + int n = 0; + int i; + + for (p = att; p; p = p->next) + n++; + if (attribute_vector_length < n) { + if (attribute_vector_length == 0) + attribute_vector + = (struct sgmls_attribute **)malloc(n*sizeof(*attribute_vector)); + else + attribute_vector + = (struct sgmls_attribute **)realloc((UNIV)attribute_vector, + n*sizeof(*attribute_vector)); + attribute_vector_length = n; + if (!attribute_vector) + error("Out of memory"); + } + i = 0; + for (p = att; p; p = p->next) + attribute_vector[i++] = p; + qsort(attribute_vector, n, sizeof(attribute_vector[0]), compare_attributes); + for (i = 0; i < n; i++) + output_attribute(attribute_vector[i]); +} + +static +int compare_attributes(p1, p2) + UNIV p1, p2; +{ + char *s1 = (*(struct sgmls_attribute **)p1)->name; + char *s2 = (*(struct sgmls_attribute **)p2)->name; + + for (; *s1 && *s2; s1++, s2++) + if (*s1 != *s2) + return sort_code[(unsigned char)*s1] - sort_code[(unsigned char)*s2]; + if (*s1) + return 1; + else if (*s2) + return -1; + else + return 0; +} + static void output_attribute(p) struct sgmls_attribute *p; @@ -477,11 +545,9 @@ void output_external_entity_info(e) putc('\n', outfp); output_external_id(e->pubid, e->sysid); if (e->type != SGMLS_ENTITY_SUBDOC) { - struct sgmls_attribute *p; fprintf(outfp, "#NOTATION=%s\n", e->notation->name); output_external_id(e->notation->pubid, e->notation->sysid); - for (p = e->attributes; p; p = p->next) - output_attribute(p); + output_attribute_list(e->attributes); } } @@ -518,7 +584,7 @@ void error(char *message,...) char *message; #endif va_list ap; - + fprintf(stderr, "%s: ", program_name); #ifdef VARARGS va_start(ap); diff --git a/usr.bin/sgmls/sgmls/Makefile b/usr.bin/sgmls/sgmls/Makefile index 3a0a0cf805fc..b46e9f6f56e6 100644 --- a/usr.bin/sgmls/sgmls/Makefile +++ b/usr.bin/sgmls/sgmls/Makefile @@ -1,18 +1,19 @@ # # Bmakefile for sgmls # -# $id$ +# $Id$ # -PROG= sgmls +PROG= sgmls -SRCS+= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c -SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c -SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c -SRCS+= msgcat.c lineout.c ambig.c exclude.c lextaba.c +SRCS= lexrf.c pcbrf.c synrf.c context.c md1.c md2.c pars1.c pars2.c serv.c +SRCS+= sgml1.c sgml2.c sgmlmsg.c sgmlxtrn.c traceset.c entgen.c sgmlio.c +SRCS+= xfprintf.c main.c unixproc.c sgmldecl.c version.c strerror.c getopt.c +SRCS+= lineout.c ambig.c lextaba.c catalog.c CFLAGS+= -I${.CURDIR}/../libsgmls .include "../Makefile.inc" .include + diff --git a/usr.bin/sgmls/sgmls/action.h b/usr.bin/sgmls/sgmls/action.h index 08475bf8f5f1..03bf47840d43 100644 --- a/usr.bin/sgmls/sgmls/action.h +++ b/usr.bin/sgmls/sgmls/action.h @@ -48,6 +48,7 @@ #define MSP_ 75 /* Marked section start in prolog outside DTD */ #define APP_ 76 /* APPINFO (other than NONE) */ #define STE_ 77 /* Start tag ended prolog */ +#define ETE_ 78 /* End tag ended prolog */ /* GRPACT.H: Symbols for group tokenization action names (all alpha). There must be no conflict with PARSEACT.H, which diff --git a/usr.bin/sgmls/sgmls/alloc.h b/usr.bin/sgmls/sgmls/alloc.h new file mode 100644 index 000000000000..d732178e735d --- /dev/null +++ b/usr.bin/sgmls/sgmls/alloc.h @@ -0,0 +1,8 @@ +/* alloc.h */ + +typedef unsigned SIZE_T; + +/* Like malloc and realloc, but don't return if no memory is available. */ + +extern UNIV xmalloc P((SIZE_T)); +extern UNIV xrealloc P((UNIV, SIZE_T)); diff --git a/usr.bin/sgmls/sgmls/ambig.c b/usr.bin/sgmls/sgmls/ambig.c index 942aa5dab87b..9da02eb5a721 100644 --- a/usr.bin/sgmls/sgmls/ambig.c +++ b/usr.bin/sgmls/sgmls/ambig.c @@ -102,7 +102,7 @@ VOID ambig() { struct contoken *s; int i; - + if (!follow) { /* We can't allocate everything in one chunk, because that would overflow a 16-bit unsigned if GRPGTCNT was 253. */ diff --git a/usr.bin/sgmls/sgmls/appl.h b/usr.bin/sgmls/sgmls/appl.h index 404d74994a64..2513c98c9d73 100644 --- a/usr.bin/sgmls/sgmls/appl.h +++ b/usr.bin/sgmls/sgmls/appl.h @@ -15,8 +15,6 @@ enum { VOID process_document P((int)); VOID output_conforming P((void)); -UNIV xmalloc P((UNS)); -UNIV xrealloc P((UNIV, UNS)); VOID appl_error VP((int, ...)); #ifdef SUPPORT_SUBDOC diff --git a/usr.bin/sgmls/sgmls/catalog.c b/usr.bin/sgmls/sgmls/catalog.c new file mode 100644 index 000000000000..164b97d04957 --- /dev/null +++ b/usr.bin/sgmls/sgmls/catalog.c @@ -0,0 +1,925 @@ +/* Normalize public identifiers to handle ISO 8879[-:]1986 problem. +What should happen if there's a duplicate in a single catalog entry file? */ + +#include "config.h" +#include "std.h" +#include "catalog.h" + +#ifdef USE_PROTOTYPES +#define P(parms) parms +#else +#define P(parms) () +#endif + +#include "alloc.h" + +#define MINIMUM_DATA_CHARS \ +"abcdefghijklmnopqrstuvwxyz\ +ABCDEFGHIJKLMNOPQRSTUVWXYZ\ +0123456789-.'()+,/:=?" + +#define N_DECL_TYPE 3 +#define PUBLIC_ID_MAP N_DECL_TYPE +#define N_TABLES (N_DECL_TYPE + 1) + +enum literal_type { + NORMAL_LITERAL, + MINIMUM_LITERAL +}; + +typedef enum { + EOF_PARAM, + NAME_PARAM, + LITERAL_PARAM +} PARAM_TYPE; + +enum catalog_error { + E_NAME_EXPECTED, + E_LITERAL_EXPECTED, + E_ARG_EXPECTED, + E_MINIMUM_DATA, + E_EOF_COMMENT, + E_EOF_LITERAL, + E_NUL_CHAR, + E_CANNOT_OPEN, + E_GETC, + E_FCLOSE +}; + +#define FIRST_SYSTEM_ERROR E_CANNOT_OPEN + +#define HASH_TABLE_INITIAL_SIZE 8 +#define HASH_TABLE_MAX_SIZE (((SIZE_T)-1)/sizeof(struct hash_table_entry *)) + +struct hash_table_entry { + int file_index; + const char *key; + const char *system_id; +}; + +/* Number of bytes per string block. */ +#define BLOCK_SIZE 1000 + +/* Bytes follow the struct. */ + +struct string_block { + struct string_block *next; +}; + +struct hash_table { + struct hash_table_entry **v; + SIZE_T size; /* must be power of 2 */ + SIZE_T used; + SIZE_T used_limit; +}; + +struct catalog { + struct hash_table tables[N_TABLES]; + char **files; + int n_files; + struct string_block *blocks; + char *block_ptr; + SIZE_T block_spare; + CATALOG_ERROR_HANDLER error_handler; + int loaded; +}; + +struct parser { + FILE *fp; + struct catalog *cat; + char *param; + SIZE_T param_length; + SIZE_T param_alloc; + int file_index; + const char *filename; + unsigned long newline_count; + char minimum_data[256]; +}; + +static +VOID add_catalog_file P((struct catalog *cat, const char *filename, + SIZE_T length)); +static +VOID load P((struct catalog *cat)); +static +VOID parse_file P((struct parser *parser)); +static +VOID parse_public P((struct parser *parser)); +static +VOID parse_name_map P((struct parser *parser, + int decl_type)); +static +int parse_arg P((struct parser *parser)); +static +PARAM_TYPE parse_param P((struct parser *parser, enum literal_type)); +static +VOID skip_comment P((struct parser *parser)); +static +PARAM_TYPE parse_literal P((struct parser *parser, int lit, + enum literal_type)); +static +PARAM_TYPE parse_name P((struct parser *parser, int first_char)); +static +VOID param_grow P((struct parser *parser)); +static +const char *param_save P((struct parser *parser)); +static +char *alloc_bytes P((struct catalog *catalog, SIZE_T n)); +static +int param_equal P((struct parser *parser, const char *key)); +static +int hash_table_add P((struct hash_table *table, const char *s, + const char *system_id, int file_index)); +static +struct hash_table_entry *hash_table_lookup P((struct hash_table *table, + const char *s)); +static +struct hash_table_entry *hash_table_lookup_subst P((struct hash_table *table, + const char *subst_table, + const char *s)); +static +VOID hash_table_init P((struct hash_table *p)); +static +VOID hash_table_delete P((struct hash_table *p)); +static +SIZE_T hash_table_start_index P((struct hash_table *p, const char *s)); +static +int subst_equal P((const char *subst_table, const char *s1, const char *s2)); +static +VOID error P((struct parser *parser, enum catalog_error err)); + +#define param_char(parser, c) \ + ((((parser)->param_length < (parser)->param_alloc) \ + || (param_grow(parser), 1)), \ + ((parser)->param[(parser)->param_length] = (c)), \ + ((parser)->param_length += 1)) + +#define param_init(parser) ((parser)->param_length = 0) +#define param_chop(parser) \ + ((parser)->param_length = (parser)->param_length - 1) + +const char *catalog_error_text(error_number) + int error_number; +{ + static const char *text[] = { + "Name expected", + "Literal expected", + "Missing argument", + "Only minimum data characters allowed in a public identifier", + "End of file in comment", + "End of file in literal", + "Nul character is not allowed", + "Cannot open `%s': %s", + "Error reading `%s': %s", + "Error closing `%s': %s" + }; + if (error_number >= 0 && error_number < sizeof(text)/sizeof(text[0])) + return text[error_number]; + else + return "(invalid error number)"; +} + + +CATALOG catalog_create(error_handler) + CATALOG_ERROR_HANDLER error_handler; +{ + int i; + struct catalog *p = (struct catalog *)xmalloc(sizeof(struct catalog)); + p->loaded = 0; + p->n_files = 0; + p->files = 0; + p->error_handler = error_handler; + p->blocks = 0; + p->block_spare = 0; + p->block_ptr = 0; + for (i = 0; i < N_TABLES; i++) + hash_table_init(p->tables + i); + return (CATALOG)p; +} + +VOID catalog_delete(cat) + CATALOG cat; +{ + int i; + struct string_block *block; + struct catalog *catalog = (struct catalog *)cat; + for (i = 0; i < 4; i++) + hash_table_delete(catalog->tables + i); + if (catalog->files) + free(catalog->files); + block = catalog->blocks; + while (block) { + struct string_block *tem = block; + block = block->next; + free((UNIV)tem); + } + catalog->blocks = 0; + free((UNIV)catalog); +} + +VOID catalog_load_file(p, filename) + CATALOG p; + const char *filename; +{ + add_catalog_file((struct catalog *)p, filename, strlen(filename)); +} + +int catalog_lookup_entity(cat, public_id, name, decl_type, subst_table, + system_id, catalog_file) + CATALOG cat; + const char *public_id; + const char *name; + enum catalog_decl_type decl_type; + const char *subst_table; + const char **system_id; + const char **catalog_file; +{ + struct catalog *catalog = (struct catalog *)cat; + const struct hash_table_entry *entry = 0; + if (!catalog->loaded) + load(catalog); + if (public_id) + entry = hash_table_lookup(catalog->tables + PUBLIC_ID_MAP, public_id); + if (name + && decl_type >= 0 + && decl_type < N_DECL_TYPE + && (!entry || entry->file_index > 0)) { + const struct hash_table_entry *entity_entry = 0; + if (!subst_table) + entity_entry = hash_table_lookup(catalog->tables + decl_type, name); + else + entity_entry = hash_table_lookup_subst(catalog->tables + decl_type, + subst_table, name); + if (!entry + || (entity_entry + && entity_entry->file_index < entry->file_index)) + entry = entity_entry; + } + if (!entry) + return 0; + *system_id = entry->system_id; + *catalog_file = catalog->files[entry->file_index]; + return 1; +} + +static +VOID add_catalog_file(cat, filename, length) + struct catalog *cat; + const char *filename; + SIZE_T length; +{ + char *s; + if (!cat->files) + cat->files = (char **)xmalloc(sizeof(char *)); + else + cat->files + = (char **)xrealloc(cat->files, (cat->n_files + 1)*sizeof(char *)); + s = alloc_bytes(cat, length + 1); + memcpy(s, filename, length); + s[length] = '\0'; + cat->files[cat->n_files] = s; + cat->n_files += 1; +} + +static +VOID load(cat) + struct catalog *cat; +{ + int i; + const char *p; + struct parser parser; + const char *env_var; + int optional_file_index = cat->n_files; + + cat->loaded = 1; + parser.param = 0; + parser.param_alloc = 0; + parser.cat = cat; + for (i = 0; i < 256; i++) + parser.minimum_data[i] = 0; + for (p = MINIMUM_DATA_CHARS; *p; p++) + parser.minimum_data[(unsigned char)*p] = 1; + env_var = getenv(CATALOG_FILES_ENV_VAR); + if (!env_var || *env_var == '\0') + env_var = DEFAULT_CATALOG_FILES; + for (;;) { + for (p = env_var; *p && *p != PATH_FILE_SEP; p++) + ; + if (p > env_var) + add_catalog_file(cat, env_var, p - env_var); + if (!*p) + break; + env_var = p + 1; + } + for (i = 0; i < cat->n_files; i++) { + parser.filename = cat->files[i]; + parser.newline_count = 0; + parser.fp = fopen(cat->files[i], "r"); + if (!parser.fp) { + if (i < optional_file_index) + error(&parser, E_CANNOT_OPEN); + } + else { + parser.file_index = i; + parse_file(&parser); + errno = 0; + if (fclose(parser.fp) < 0) + error(&parser, E_FCLOSE); + } + } + if (parser.param) + free(parser.param); +} + +static +VOID parse_file(parser) + struct parser *parser; +{ + int skipping = 0; + for (;;) { + PARAM_TYPE type = parse_param(parser, NORMAL_LITERAL); + if (type == NAME_PARAM) { + if (param_equal(parser, "PUBLIC")) + parse_public(parser); + else if (param_equal(parser, "ENTITY")) + parse_name_map(parser, CATALOG_ENTITY_DECL); + else if (param_equal(parser, "DOCTYPE")) + parse_name_map(parser, CATALOG_DOCTYPE_DECL); + else if (param_equal(parser, "LINKTYPE")) + parse_name_map(parser, CATALOG_LINKTYPE_DECL); + else + skipping = 1; + } + else if (type == EOF_PARAM) + break; + else if (!skipping) { + skipping = 1; + error(parser, E_NAME_EXPECTED); + } + } +} + +static +VOID parse_public(parser) + struct parser *parser; +{ + const char *public_id; + + if (parse_param(parser, MINIMUM_LITERAL) != LITERAL_PARAM) + error(parser, E_LITERAL_EXPECTED); + public_id = param_save(parser); + if (!parse_arg(parser)) + return; + hash_table_add(parser->cat->tables + PUBLIC_ID_MAP, + public_id, param_save(parser), parser->file_index); +} + +static +VOID parse_name_map(parser, decl_type) + struct parser *parser; + int decl_type; +{ + const char *name; + + if (!parse_arg(parser)) + return; + name = param_save(parser); + if (!parse_arg(parser)) + return; + hash_table_add(parser->cat->tables + decl_type, + name, param_save(parser), parser->file_index); +} + +static +int parse_arg(parser) + struct parser *parser; +{ + PARAM_TYPE parm = parse_param(parser, NORMAL_LITERAL); + if (parm != NAME_PARAM && parm != LITERAL_PARAM) { + error(parser, E_ARG_EXPECTED); + return 0; + } + return 1; +} + +static +PARAM_TYPE parse_param(parser, lit_type) + struct parser *parser; + enum literal_type lit_type; +{ + for (;;) { + int c = getc(parser->fp); + switch (c) { + case EOF: + if (ferror(parser->fp)) + error(parser, E_GETC); + return EOF_PARAM; + case '"': + case '\'': + return parse_literal(parser, c, lit_type); + case '\n': + parser->newline_count += 1; + break; + case '\t': + case ' ': + break; + case '\0': + error(parser, E_NUL_CHAR); + break; + case '-': + c = getc(parser->fp); + if (c == '-') { + skip_comment(parser); + break; + } + ungetc(c, parser->fp); + c = '-'; + /* fall through */ + default: + return parse_name(parser, c); + } + } +} + +static +VOID skip_comment(parser) + struct parser *parser; +{ + FILE *fp = parser->fp; + for (;;) { + int c = getc(fp); + if (c == '-') { + c = getc(fp); + if (c == '-') + return; + } + if (c == EOF) { + if (ferror(fp)) + error(parser, E_GETC); + error(parser, E_EOF_COMMENT); + return; + } + if (c == '\n') + parser->newline_count += 1; + } +} + +static +PARAM_TYPE parse_literal(parser, lit, lit_type) + struct parser *parser; + int lit; + enum literal_type lit_type; +{ + enum { no, yes_begin, yes_middle } skipping = yes_begin; + FILE *fp = parser->fp; + param_init(parser); + for (;;) { + int c = getc(fp); + if (c == lit) + break; + switch (c) { + case '\0': + error(parser, E_NUL_CHAR); + break; + case EOF: + if (ferror(fp)) + error(parser, E_GETC); + error(parser, E_EOF_LITERAL); + return LITERAL_PARAM; + case '\n': + parser->newline_count += 1; + /* fall through */ + case ' ': + if (lit_type == MINIMUM_LITERAL) { + if (skipping == no) { + param_char(parser, ' '); + skipping = yes_middle; + } + } + else + param_char(parser, c); + break; + default: + if (lit_type == MINIMUM_LITERAL) { + if (!parser->minimum_data[c]) + error(parser, E_MINIMUM_DATA); + else { + skipping = no; + param_char(parser, c); + } + } + else + param_char(parser, c); + break; + } + } + if (skipping == yes_middle) + param_chop(parser); + return LITERAL_PARAM; +} + +static +PARAM_TYPE parse_name(parser, first_char) + struct parser *parser; + int first_char; +{ + FILE *fp = parser->fp; + param_init(parser); + param_char(parser, first_char); + for (;;) { + int c = getc(fp); + switch (c) { + case '\0': + error(parser, E_NUL_CHAR); + break; + case EOF: + if (ferror(fp)) + error(parser, E_GETC); + goto done; + case '\n': + parser->newline_count += 1; + goto done; + case ' ': + case '\t': + goto done; + case '"': + case '\'': + ungetc(c, fp); + goto done; + default: + param_char(parser, c); + } + } + done: + return NAME_PARAM; +} + +static +VOID param_grow(parser) + struct parser *parser; +{ + if (parser->param_alloc == 0) { + parser->param_alloc = 256; + parser->param = xmalloc(parser->param_alloc); + } + else { + parser->param_alloc *= 2; + parser->param = xrealloc(parser->param, parser->param_alloc); + } +} + +static +const char *param_save(parser) + struct parser *parser; +{ + char *s = alloc_bytes(parser->cat, parser->param_length + 1); + memcpy(s, parser->param, parser->param_length); + s[parser->param_length] = '\0'; + return s; +} + +static +char *alloc_bytes(catalog, n) + struct catalog *catalog; + SIZE_T n; +{ + char *tem; + if (n > catalog->block_spare) { + struct string_block *block; + SIZE_T block_size = n > BLOCK_SIZE ? n : BLOCK_SIZE; + block + = (struct string_block *)xmalloc(sizeof(struct string_block) + + block_size); + block->next = catalog->blocks; + catalog->blocks = block; + catalog->block_ptr = (char *)(block + 1); + catalog->block_spare = block_size; + } + tem = catalog->block_ptr; + catalog->block_ptr += n; + catalog->block_spare -= n; + return tem; +} + + +/* Return 1 if the current parameter is equal to key. */ + +static +int param_equal(parser, key) + struct parser *parser; + const char *key; +{ + const char *param = parser->param; + SIZE_T param_length = parser->param_length; + for (; param_length > 0; param++, param_length--, key++) { + unsigned char c; + if (*key == '\0') + return 0; + c = *param; + if (islower(c)) + c = toupper(c); + if (c != (unsigned char)*key) + return 0; + } + return *key == '\0'; +} + +/* Return 0 if it was a duplicate. */ + +static +int hash_table_add(table, s, system_id, file_index) + struct hash_table *table; + const char *s; + const char *system_id; + int file_index; +{ + SIZE_T i; + struct hash_table_entry *p; + + if (table->size > 0) { + i = hash_table_start_index(table, s); + while (table->v[i] != 0) { + if (strcmp(table->v[i]->key, s) == 0) + return 0; + if (i == 0) + i = table->size; + i--; + } + } + if (table->used >= table->used_limit) { + SIZE_T j; + struct hash_table_entry **old_table = table->v; + SIZE_T old_size = table->size; + if (old_size == 0) { + table->size = HASH_TABLE_INITIAL_SIZE; + table->used_limit = table->size/2; + } + else { + if (old_size > HASH_TABLE_MAX_SIZE/2) { + if (old_size == HASH_TABLE_MAX_SIZE) + return 0; /* FIXME: give an error? */ + table->size = HASH_TABLE_MAX_SIZE; + table->used_limit = HASH_TABLE_MAX_SIZE - 1; + } + else { + table->size = (old_size << 1); + table->used_limit = table->size/2; + } + } + table->v + = (struct hash_table_entry **)xmalloc(sizeof(struct hash_table_entry *) + * table->size); + for (j = 0; j < table->size; j++) + table->v[j] = 0; + for (j = 0; j < old_size; j++) + if (old_table[j]) { + SIZE_T k = hash_table_start_index(table, old_table[j]->key); + while (table->v[k] != 0) { + if (k == 0) + k = table->size; + k--; + } + table->v[k] = old_table[j]; + } + if (old_table) + free((UNIV)old_table); + i = hash_table_start_index(table, s); + while (table->v[i] != 0) { + if (i == 0) + i = table->size; + i--; + } + } + p = (struct hash_table_entry *)xmalloc(sizeof(struct hash_table_entry)); + p->key = s; + p->system_id = system_id; + p->file_index = file_index; + table->v[i] = p; + table->used += 1; + return 1; +} + +static +struct hash_table_entry *hash_table_lookup(table, s) + struct hash_table *table; + const char *s; +{ + if (table->size > 0) { + SIZE_T i; + i = hash_table_start_index(table, s); + while (table->v[i] != 0) { + if (strcmp(table->v[i]->key, s) == 0) + return table->v[i]; + if (i == 0) + i = table->size; + i--; + } + } + return 0; +} + +static +struct hash_table_entry *hash_table_lookup_subst(table, subst_table, s) + struct hash_table *table; + const char *subst_table; + const char *s; +{ + SIZE_T i; + for (i = 0; i < table->size; i++) { + struct hash_table_entry *p = table->v[i]; + if (p && subst_equal(subst_table, s, p->key)) + return p; + } + return 0; +} + +static +VOID hash_table_init(p) + struct hash_table *p; +{ + p->v = 0; + p->size = 0; + p->used = 0; + p->used_limit = 0; +} + +static +VOID hash_table_delete(p) + struct hash_table *p; +{ + if (p->v) { + SIZE_T i; + for (i = 0; i < p->size; i++) + if (p->v[i]) + free(p->v[i]); + free(p->v); + } +} + +static +SIZE_T hash_table_start_index(p, s) + struct hash_table *p; + const char *s; +{ + unsigned long h = 0; + while (*s) + h = (h << 5) + h + (unsigned char)*s++; + return (h & (p->size - 1)); +} + +/* s1 has already been substituted; s2 has not */ + +static +int subst_equal(subst_table, s1, s2) + const char *subst_table; + const char *s1; + const char *s2; +{ + for (; *s1 == subst_table[(unsigned char)*s2]; s1++, s2++) + if (*s1 == '\0') + return 1; + return 0; +} + +static +VOID error(parser, err) + struct parser *parser; + enum catalog_error err; +{ + (*parser->cat->error_handler)(parser->filename, + parser->newline_count + 1, + err, + (err >= FIRST_SYSTEM_ERROR + ? CATALOG_SYSTEM_ERROR + : 0), + (err >= FIRST_SYSTEM_ERROR + ? errno + : 0)); +} + +#ifdef MAIN + +static const char *program_name; + +#include "getopt.h" + +static VOID usage P((void)); +static VOID out_of_memory P((void)); +static VOID handle_catalog_error P((const char *filename, + unsigned long lineno, + int error_number, + unsigned flags, + int sys_errno)); + +int main(argc, argv) + int argc; + char **argv; +{ + int entity_flag = 0; + enum catalog_decl_type entity_type = CATALOG_NO_DECL; + char *public_id = 0; + char *name = 0; + int exit_status; + int opt; + CATALOG catalog; + int i; + const char *file; + const char *system_id; + + program_name = argv[0]; + + while ((opt = getopt(argc, argv, "edl")) != EOF) + switch (opt) { + case 'e': + entity_flag = 1; + entity_type = CATALOG_ENTITY_DECL; + break; + case 'd': + entity_flag = 1; + entity_type = CATALOG_DOCTYPE_DECL; + break; + case 'l': + entity_flag = 1; + entity_type = CATALOG_LINKTYPE_DECL; + break; + case '?': + usage(); + } + if (argc - optind < 2) + usage(); + if (entity_flag) + name = argv[optind]; + else + public_id = argv[optind]; + + catalog = catalog_create(handle_catalog_error); + for (i = optind + 1; i < argc; i++) + catalog_load_file(catalog, argv[i]); + if (catalog_lookup_entity(catalog, public_id, name, entity_type, (char *)0, + &system_id, &file)) { + exit_status = 0; + fprintf(stderr, "%s (%s)\n", system_id, file); + } + else { + fprintf(stderr, "not found\n"); + exit_status = 1; + } + catalog_delete(catalog); + return exit_status; +} + +static +VOID usage() +{ + fprintf(stderr, "usage: %s [-e] [-d] [-l] id file ...\n", + program_name); + exit(1); +} + +static +VOID handle_catalog_error(filename, lineno, error_number, flags, sys_errno) + const char *filename; + unsigned long lineno; + int error_number; + unsigned flags; + int sys_errno; +{ + fprintf(stderr, "%s:", program_name); + if (flags & CATALOG_SYSTEM_ERROR) { + putc(' ', stderr); + fprintf(stderr, catalog_error_text(error_number), filename); + putc('\n', stderr); + } + else + fprintf(stderr, "%s:%lu: %s\n", filename, lineno, + catalog_error_text(error_number)); + fflush(stderr); +} + +UNIV xmalloc(n) + SIZE_T n; +{ + UNIV p = malloc(n); + if (!p) + out_of_memory(); + return p; +} + +UNIV xrealloc(p, n) + UNIV p; + SIZE_T n; +{ + p = realloc(p, n); + if (!p) + out_of_memory(); + return p; +} + +static +VOID out_of_memory() +{ + fprintf(stderr, "%s: out of memory\n", program_name); + exit(1); +} + +#endif /* MAIN */ diff --git a/usr.bin/sgmls/sgmls/catalog.h b/usr.bin/sgmls/sgmls/catalog.h new file mode 100644 index 000000000000..b9509a5c9af4 --- /dev/null +++ b/usr.bin/sgmls/sgmls/catalog.h @@ -0,0 +1,45 @@ +#ifndef CATALOG_H +#define CATALOG_H 1 + +enum catalog_decl_type { + CATALOG_NO_DECL = -1, + CATALOG_ENTITY_DECL, + CATALOG_DOCTYPE_DECL, + CATALOG_LINKTYPE_DECL +}; + +#define CATALOG_SYSTEM_ERROR 1 + +#ifdef __STDC__ + +typedef void *CATALOG; +typedef void (*CATALOG_ERROR_HANDLER)(const char *filename, + unsigned long lineno, + int error_number, + unsigned flags, + int sys_errno); +CATALOG catalog_create(CATALOG_ERROR_HANDLER); +void catalog_load_file(CATALOG, const char *); +void catalog_delete(CATALOG); +int catalog_lookup_entity(CATALOG, + const char *public_id, + const char *name, + enum catalog_decl_type, + const char *subst_table, + const char **system_id, + const char **catalog_file); +const char *catalog_error_text(int error_number); + +#else /* not __STDC__ */ + +typedef char *CATALOG; +typedef void (*CATALOG_ERROR_HANDLER)(); +CATALOG catalog_create(); +void catalog_load_file(); +void catalog_delete(); +int catalog_lookup_entity(); +char *catalog_error_text(); + +#endif /* not __STDC__ */ + +#endif /* not CATALOG_H */ diff --git a/usr.bin/sgmls/sgmls/config.h b/usr.bin/sgmls/sgmls/config.h index 562cdcf1f4c5..a7fa92c5062e 100644 --- a/usr.bin/sgmls/sgmls/config.h +++ b/usr.bin/sgmls/sgmls/config.h @@ -11,6 +11,17 @@ Usually the same as PATH_FILE_SEP. */ #define SYSID_FILE_SEP ':' /* The environment variable that contains the list of filename templates. */ #define PATH_ENV_VAR "SGML_PATH" +/* A macro that returns non-zero if the filename is relative to the + current directory. */ +#define FILE_IS_RELATIVE(p) ((p)[0] != '/') +/* A string containing the characters that can separate the directory + part of a filename from the basename. */ +#define DIR_BASE_SEP "/" +/* The environment variable that contains the list of catalog entry files. + Filenames are separated by PATH_FILE_SEP. */ +#define CATALOG_FILES_ENV_VAR "SGML_CATALOG_FILES" +/* Default list of catalog entry files. */ +#define DEFAULT_CATALOG_FILES "CATALOG:/usr/share/sgml/CATALOG" /* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name or system identifier into a legal filename. A character in @@ -46,7 +57,7 @@ An implementations of these functions is included and will be used if you don't define this. On SunOS 4.1.1, if you do define this you should set CC=/usr/xpg2bin/cc in the makefile. */ -/* #define HAVE_CAT 1 */ +#define HAVE_CAT 1 #ifdef __STDC__ /* Define this if your compiler supports prototypes. */ diff --git a/usr.bin/sgmls/sgmls/context.c b/usr.bin/sgmls/sgmls/context.c index 1eb5a5c8674b..10a123a47206 100644 --- a/usr.bin/sgmls/sgmls/context.c +++ b/usr.bin/sgmls/sgmls/context.c @@ -44,27 +44,34 @@ int mexts; /* >0=stack level of minus grp; -1=plus; 0=none.*/ { UNCH toccsv, gtypesv; /* Save token's TOCC and GTYPE in case grp ends.*/ - if (mexts == -1) { - if (STATUS == RCEND) + if (mexts != 0) { + if (mexts == -1 && STATUS == RCEND) return RCPEX; copypos(savedpos, pos); } Tstart = T; /* Save starting token for AND group testing. */ while (STATUS!=RCMISS && STATUS!=RCEND) { - TRACEGI("CONTEXT", gi, mod, pos, Tstart); + TRACEGI("CONTEXT", gi, mod, pos); while (TTYPE==TTOR || TTYPE==TTSEQ || TTYPE==TTAND) { pos[P+1].g = M++; pos[++P].t = 1; HITCLEAR(H); Tstart = T; /* Save starting token for AND group testing. */ - TRACEGI("OPENGRP", gi, mod, pos, Tstart); + TRACEGI("OPENGRP", gi, mod, pos); } STATUS = (UNCH)tokenreq(gi, mod, pos); - TRACEGI("STATUS", gi, mod, pos, Tstart); + TRACEGI("STATUS", gi, mod, pos); if (gi==TOKEN.tu.thetd) { /* Hit in model. */ STATUS = (UNCH)RCHIT; gtypesv = GTYPE; toccsv = TOCC; newtoken(mod, pos, statuspt); - return(mexts<=0 ? RCHIT : (gtypesv==TTOR || BITON(toccsv, TOPT)) - ? RCMEX : RCHITMEX); + if (mexts <= 0) + return RCHIT; + else if (gtypesv==TTOR || BITON(toccsv, TOPT)) { + /* restore position */ + copypos(pos, savedpos); + return RCMEX; + } + else + return RCHITMEX; } if (STATUS==RCREQ) { if (mexts == -1) @@ -100,12 +107,12 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ unsigned next; /* Position in AND group of next testable token.*/ Tstart = T; - TRACEEND("ECONT", mod, pos, 0, 0, Tstart); + TRACEEND("ECONT", mod, pos, 0, 0); if (P<=1) {nextetd = 0; return(TOKENHIT || BITON(TOCC, TOPT));} nextetd = TTYPE == TTETD ? TOKEN.tu.thetd : 0; while (STATUS!=RCMISS && STATUS!=RCEND) { STATUS = (UNCH)testend(mod, pos, 0, 0); - TRACEEND("ECONTEND", mod, pos, 0, 0, Tstart); + TRACEEND("ECONTEND", mod, pos, 0, 0); nextetd = P<=1 || TTYPE != TTETD ? 0 : TOKEN.tu.thetd; if (STATUS==RCEND) return(1); if (P<=1) return(TOKENHIT || BITON(TOCC, TOPT)); @@ -121,7 +128,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ next : offbit(H, 0, GNUM)); M = G + grpsz(&GHDR, (int)T-1) + 1; - TRACEEND("ECONTNEW", mod, pos, 0, 0, Tstart); + TRACEEND("ECONTNEW", mod, pos, 0, 0); } if (STATUS==RCMISS) { if (BITON(TOCC, TOPT)) nextetd = 0; @@ -182,7 +189,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ In either case, set M to correspond to the new T. */ retest: - TRACEEND("RETEST", mod, pos, (int)nextand, 1, Tstart); + TRACEEND("RETEST", mod, pos, (int)nextand, 1); if (GTYPE==TTAND) { nextand = offbit(H, (int)T, GNUM); if (!nextand) @@ -212,7 +219,7 @@ UNCH *statuspt; /* Token status: RCHIT RCMISS RCEND RCREQ RCNREQ*/ } } else STATUS = RCMISS; - TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1, Tstart); + TRACEEND("NEWTOKEN", mod, pos, (int)nextand, 1); } /* TESTEND: End the current group, if possible, and any that it is nested in. The current token will either be a group header, or some token @@ -228,7 +235,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */ int rc = 0; /* Return code: RCNREQ RCHIT RCMISS RCEND */ while (!rc) { - TRACEEND("TRACEEND", mod, pos, rc, andoptsw, Tstart); + TRACEEND("TRACEEND", mod, pos, rc, andoptsw); /* TESTMISS: If we've hit no tokens yet in the current group, and the current token is the last unhit one in the group we can test, @@ -244,7 +251,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */ */ if (!ANYHIT(H) && (T==GNUM || (GTYPE==TTSEQ && BITOFF(TOCC, TOPT)))) { - M = G; --P; Tstart = T; + M = G; --P; if (P<=1) { if (BITON(TOCC, TOPT) || TOKENHIT) rc = RCEND; else rc = RCMISS; @@ -280,7 +287,7 @@ int newtknsw; /* 1=new token test; 0=end element test. */ } else rc = RCNREQ; /* No group ended this time, so return. */ } - TRACEEND("ENDFOUND", mod, pos, rc, andoptsw, Tstart); + TRACEEND("ENDFOUND", mod, pos, rc, andoptsw); return(rc); } /* TOKENOPT: Return 1 if current token is contextually optional; @@ -290,7 +297,7 @@ int tokenopt(mod, pos) struct thdr mod[]; /* Model of current open element. */ struct mpos pos[]; /* Position in open element's model. */ { - TRACEEND("TOKENOPT", mod, pos, 0, 0, Tstart); + TRACEEND("TOKENOPT", mod, pos, 0, 0); return (BITON(TOCC, TOPT) /* Inherently optional. */ || TOKENHIT /* Was hit (handles "plus" suffix case). */ || (!ANYHIT(H) && groupopt(mod, pos))); @@ -330,7 +337,7 @@ struct etd *gi; /* ETD of new GI. */ struct thdr mod[]; /* Model of current open element. */ struct mpos pos[]; /* Position in open element's model. */ { - TRACEGI("TOKENREQ", gi, mod, pos, Tstart); + TRACEGI("TOKENREQ", gi, mod, pos); return( tokenopt(mod, pos) ? RCNREQ : ( GTYPE==TTSEQ && (ANYHIT(H) || groupreq(gi, mod, pos)==RCREQ) #if 0 diff --git a/usr.bin/sgmls/sgmls/context.h b/usr.bin/sgmls/sgmls/context.h index 04350c758f55..01f4383b4e1b 100644 --- a/usr.bin/sgmls/sgmls/context.h +++ b/usr.bin/sgmls/sgmls/context.h @@ -7,6 +7,8 @@ #define P pos[0].t /* Index of current group in pos. */ #define G pos[P].g /* Index of current group in model. */ #define T pos[P].t /* Index of current token in its group. */ +#define Tstart pos[P].tstart /* Index of starting token in its group + for AND group testing. */ #define H pos[P].h /* Pointer to hit bits for current group. */ #define GHDR mod[G] /* Current group header. */ #define TOKEN mod[M] /* Current token. */ diff --git a/usr.bin/sgmls/sgmls/ebcdic.h b/usr.bin/sgmls/sgmls/ebcdic.h index 1c35bcbb4ce7..3e0f3bdfea7e 100644 --- a/usr.bin/sgmls/sgmls/ebcdic.h +++ b/usr.bin/sgmls/sgmls/ebcdic.h @@ -23,18 +23,3 @@ /* See comment in latin1.h. */ #define CANON_DATACHAR 254 - -/* Components for a formal public identifier for the whole of the -system character set. Protect with ifndef so that it can be overriden -in config.h. */ - -/* Use a private escape sequence. */ -#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE -#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0" -#endif -#ifndef SYSTEM_CHARSET_OWNER -#define SYSTEM_CHARSET_OWNER "-//IBM" -#endif -#ifndef SYSTEM_CHARSET_DESCRIPTION -#define SYSTEM_CHARSET_DESCRIPTION "Code Page 1047" -#endif diff --git a/usr.bin/sgmls/sgmls/entgen.c b/usr.bin/sgmls/sgmls/entgen.c index e08e9f0285f5..214682991f8f 100644 --- a/usr.bin/sgmls/sgmls/entgen.c +++ b/usr.bin/sgmls/sgmls/entgen.c @@ -1,7 +1,7 @@ /* entgen.c - Implement entgen() which generates a list of filenames from a struct fpi. - + Written by James Clark (jjc@jclark.com). */ @@ -51,6 +51,8 @@ static int field P((struct fpi *, int, char *)); static int mindatcpy P((char *, char *, int, int)); static int testopen P((char *)); static UNIV sysidgen P((char *)); +static UNIV catsysidgen P((const char *, const char *)); +static const char *basename P((const char *)); static char *path = 0; @@ -105,6 +107,14 @@ static char *ext[] = { "lpd", /* Link process definition */ }; +static CATALOG catalog; + +VOID entginit(swp) +struct switches *swp; +{ + catalog = swp->catalog; +} + /* Like memcpy, but substitute, fold to lower case (if fold is non-zero) and null terminate. This is used both for minimum data and for names. If p is NULL, do nothing. Return len. */ @@ -207,7 +217,7 @@ char *buf; /* return -1 if the formal public identifier was invalid or missing. */ if (f->fpiversw < 0 || !f->fpipubis) return -1; - + switch (c) { case 'A': /* Is it available? */ return f->fpitt == '+' ? 0 : -1; @@ -278,7 +288,12 @@ char *pathname; UNIV entgen(f) struct fpi *f; { + char *qname; char *file; + enum catalog_decl_type dtype; + char *subst = 0; + const char *sysid; + const char *catfile; assert(f->fpistore != 6); /* Musn't call entgen for a notation. */ if (!path) { @@ -300,14 +315,46 @@ struct fpi *f; p++; } } + + if (f->fpisysis && !sysidsrch) + return sysidgen((char *)f->fpisysis); + + qname = (char *)f->fpinm; + + switch (f->fpistore) { + case 3: + /* fall through */ + qname--; /* hack */ + case 1: + case 2: + dtype = CATALOG_ENTITY_DECL; + if (ENTCASE) + subst = getsubst(); + break; + case 4: + dtype = CATALOG_DOCTYPE_DECL; + if (NAMECASE) + subst = getsubst(); + break; + default: + dtype = CATALOG_NO_DECL; + } + + if (catalog_lookup_entity(catalog, + (char *)f->fpipubis, + qname, + dtype, + (char *)subst, + &sysid, + &catfile)) + return catsysidgen(sysid, catfile); if (f->fpisysis - && (!sysidsrch - || strchr((char *)f->fpisysis, SYSID_FILE_SEP) + && (strchr((char *)f->fpisysis, SYSID_FILE_SEP) || strcmp((char *)f->fpisysis, STDINNAME) == 0)) return sysidgen((char *)f->fpisysis); file = path; - + for (;;) { char *p; int len = 0; @@ -334,7 +381,7 @@ struct fpi *f; } else len++; - + if (len > 0) { /* We've got a valid non-empty filename. */ char *s; @@ -368,7 +415,7 @@ UNIV sysidgen(s) char *s; { char *buf, *p; - + buf = (char *)rmalloc(strlen(s) + 2); for (p = buf; *s; s++) { @@ -394,6 +441,71 @@ char *s; return buf; } +/* Handle a system id in a catalog entry file. */ +static +UNIV catsysidgen(s, catfile) +const char *s; +const char *catfile; +{ + const char *p; + char *bufp; + char *buf; + int nrelative = 0; + int catdirlen = 0; + if (FILE_IS_RELATIVE(s)) + nrelative++; + for (p = s; *p; p++) + if (*p == SYSID_FILE_SEP + && FILE_IS_RELATIVE(p + 1)) + nrelative++; + if (nrelative) { + const char *base = basename(catfile); + catdirlen = base - catfile; + } + buf = (char *)rmalloc(p - s + 2 + nrelative*catdirlen); + bufp = buf; + for (;;) { + if (!*s) + break; + if (*s != SYSID_FILE_SEP && FILE_IS_RELATIVE(s)) { + memcpy(bufp, catfile, catdirlen); + bufp += catdirlen; + } + for (;;) { + if (*s == SYSID_FILE_SEP) { + s++; + break; + } + *bufp++ = *s++; + if (*s == '\0') + break; + } + if (bufp > buf && bufp[-1] != '\0') + *bufp++ = '\0'; + } + if (bufp == buf) { + frem((UNIV)buf); + return 0; + } + *bufp++ = '\0'; + return buf; +} + +static +const char *basename(s) +const char *s; +{ + const char *p = s; + while (*p) + p++; + if (p > s) { + while (--p > s) + if (strchr(DIR_BASE_SEP, *p)) + return p + 1; + } + return s; +} + /* Local Variables: c-indent-level: 5 diff --git a/usr.bin/sgmls/sgmls/entity.h b/usr.bin/sgmls/sgmls/entity.h index d7d30962819d..84a3515d5a47 100644 --- a/usr.bin/sgmls/sgmls/entity.h +++ b/usr.bin/sgmls/sgmls/entity.h @@ -5,6 +5,7 @@ */ #include "tools.h" /* Definitions for type declarations, etc. */ #include "msgcat.h" +#include "catalog.h" #define STDINNAME "-" /* File name that refers to standard input. */ @@ -151,7 +152,8 @@ struct switches { /* Parser control switches (1=non-standard). */ int swenttr; /* 1=trace entity stack in error messages; 0=no.*/ int sweltr; /* 1=trace element stack in error messages; 0=no. */ int swambig; /* 1=check content model ambiguity */ - int swundef; /* 1=warn about undefined elements and notations. */ + int swundef; /* 1=warn about undefined elements. */ + int swcap; /* 1=report capcity errors */ char *prog; /* Program name for error messages. */ #ifdef TRACE char *trace; /* What to trace in the body. */ @@ -163,6 +165,7 @@ struct switches { /* Parser control switches (1=non-standard). */ char **includes; /* List of parameter entities to be defined as "INCLUDE"; NULL terminated.*/ VOID (*die) P((void)); /* Function to call on fatal error. */ + CATALOG catalog; /* Catalog for generating system identifiers. */ }; struct markup { /* Delimiter strings for text processor. */ UNCH *cro; /* LEXCON markup string: CRO */ diff --git a/usr.bin/sgmls/sgmls/etype.h b/usr.bin/sgmls/sgmls/etype.h index 707f60214968..8ec64c133bb7 100644 --- a/usr.bin/sgmls/sgmls/etype.h +++ b/usr.bin/sgmls/sgmls/etype.h @@ -25,7 +25,7 @@ struct thdr { /* Token header or model header. */ UNCH ttype; /* Token type attributes or model content. */ union { - int tnum; /* Group token: tokens in group. + int tnum; /* Group token: tokens in group. Model header: content tokens at any level. */ struct etd *thetd; /* GI token: ptr to etd. */ } tu; @@ -64,6 +64,8 @@ extern struct etd dumetd[]; struct mpos { /* Position of current element in model. */ UNCH g; /* Index of this group in the model. */ UNCH t; /* Index of the current token in this group. */ + UNCH tstart; /* Index of starting token for AND group + testing. */ unsigned long *h; /* Hit bits of this group's tokens. */ }; diff --git a/usr.bin/sgmls/sgmls/genlex.c b/usr.bin/sgmls/sgmls/genlex.c index 2a0d3a6e1d57..b653d148b4a5 100644 --- a/usr.bin/sgmls/sgmls/genlex.c +++ b/usr.bin/sgmls/sgmls/genlex.c @@ -12,7 +12,28 @@ extern UNCH *lextabs[]; extern UNCH lextran[]; static char *lextabnames[] = { - "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke" + "lexcnm", "lexcon", "lexgrp", "lexlms", "lexmark", "lexsd", "lextoke", + "lexmin" +}; + +#define UNUSED -1 + +extern int iso646charset[]; +extern int iso646G0charset[]; +extern int iso646C0charset[]; +extern int iso8859_1charset[]; +extern int iso6429C1charset[]; + +static struct { + char *name; + int *map; +} charsets[] = { + { "iso646charset", iso646charset }, + { "iso646G0charset", iso646G0charset }, + { "iso646G0charset", iso646G0charset }, + { "iso8859_1charset", iso8859_1charset }, + { "iso646C0charset", iso646C0charset }, + { "iso6429C1charset", iso6429C1charset }, }; static VOID print_tab(s, t) @@ -34,7 +55,7 @@ int main(argc, argv) UNCH tab[256]; char special[256]; /* Shunned character numbers in the reference concrete syntax. */ - static UNCH refshun[] = { + static UNCH refshun[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 }; @@ -89,7 +110,7 @@ int main(argc, argv) for (j = 0; j < 256; j++) if (!special[j]) { - if (shunned[j]) + if (shunned[j]) tab[j] = lextabs[i][CANON_ASCII_NONSGML]; else tab[j] = lextabs[i][CANON_ASCII_DATACHAR]; @@ -102,13 +123,18 @@ int main(argc, argv) tab[charset[i]] = charset[lextran[i]]; print_tab("lextran", tab); - /* Generate asciicharset. */ - fputs("int asciicharset[] = {\n", stdout); - for (i = 0; i < 128; i++) - printf("%3d,%c", charset[i], (i + 1) % 16 == 0 ? '\n' : ' '); - for (i = 128; i < 256; i++) - printf("UNUSED,%c", (i + 1) % 8 == 0 ? '\n' : ' '); - fputs("};\n", stdout); + /* Generate charsets. */ + for (i = 0; i < sizeof(charsets)/sizeof(charsets[0]); i++) { + int j; + int *map = charsets[i].map; + printf("\nint %s[] = {\n", charsets[i].name); + for (j = 0; j < 256; j++) + if (map[j] == UNUSED) + printf("UNUSED,%c", (j + 1) % 8 == 0 ? '\n' : ' '); + else + printf("%3d,%c", charset[map[j]], (j + 1) % 16 == 0 ? '\n' : ' '); + fputs("};\n", stdout); + } exit(EXIT_SUCCESS); } diff --git a/usr.bin/sgmls/sgmls/getopt.c b/usr.bin/sgmls/sgmls/getopt.c index bc8edebc83d3..9a218b395873 100644 --- a/usr.bin/sgmls/sgmls/getopt.c +++ b/usr.bin/sgmls/sgmls/getopt.c @@ -132,7 +132,7 @@ char *opts; else optarg = argv[optind++]; sp = 1; - } + } else { if (argv[optind][++sp] == '\0') { sp = 1; diff --git a/usr.bin/sgmls/sgmls/latin1.h b/usr.bin/sgmls/sgmls/latin1.h index 44f43f363caa..c6df696b3060 100644 --- a/usr.bin/sgmls/sgmls/latin1.h +++ b/usr.bin/sgmls/sgmls/latin1.h @@ -35,17 +35,3 @@ shunned in the reference concrete syntax and is not the number of a significant (in the reference concrete syntax) SGML character nor one of the above characters. */ #define CANON_DATACHAR 254 - -/* Components for a formal public identifier for the whole of the -system character set. Protect with ifndef so that it can be overriden -in config.h. */ - -#ifndef SYSTEM_CHARSET_DESIGNATING_SEQUENCE -#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/13 4/1" -#endif -#ifndef SYSTEM_CHARSET_OWNER -#define SYSTEM_CHARSET_OWNER "ISO Registration Number 100" -#endif -#ifndef SYSTEM_CHARSET_DESCRIPTION -#define SYSTEM_CHARSET_DESCRIPTION "ECMA-94 Right Part of Latin Alphabet Nr. 1" -#endif diff --git a/usr.bin/sgmls/sgmls/lexcode.h b/usr.bin/sgmls/sgmls/lexcode.h index e4047ba4d7ca..d34e3e63cd9c 100644 --- a/usr.bin/sgmls/sgmls/lexcode.h +++ b/usr.bin/sgmls/sgmls/lexcode.h @@ -3,6 +3,7 @@ #define FCE 27 /* FRE Free character in use as an entity reference */ #define FRE 0 /* FREECHAR that is not in a CON delimiter-in-context. */ #define LITC 21 /* LIT LITA PIC or EE in use as a literal terminator */ +#define MINLITC 13 /* LIT LITA as literal terminator in minimum data */ #define MSC3 15 /* ] Also MSC[2]. */ #define NET 17 /* / When enabled. */ #define ETI 16 /* / Actually ETAGO[2] */ diff --git a/usr.bin/sgmls/sgmls/lexrf.c b/usr.bin/sgmls/sgmls/lexrf.c index ec3db83fe389..643b33640be9 100644 --- a/usr.bin/sgmls/sgmls/lexrf.c +++ b/usr.bin/sgmls/sgmls/lexrf.c @@ -110,6 +110,7 @@ struct lexical lex = { /* Delimiter set constants for parser use. */ FCE, /* LEXCNM: FRE char as entity reference.*/ FRE, /* LEXLMS: Free character not an entity ref.*/ LITC, /* LEXLMS: Literal close delimiter enabled. */ + MINLITC, /* LEXMIN: Literal close delimiter enabled. */ MSC3, /* LEXLMS: Marked section close delim enabled. */ NET, /* LEXCON: Null end-tag delimiter enabled. */ ETI, /* LEXCON: NET disabled; still used as ETI. */ @@ -120,5 +121,5 @@ struct lexical lex = { /* Delimiter set constants for parser use. */ }; UNCH *lextabs[] = { - lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, 0 + lexcnm, lexcon, lexgrp, lexlms, lexmark, lexsd, lextoke, lexmin, 0 }; diff --git a/usr.bin/sgmls/sgmls/lextaba.c b/usr.bin/sgmls/sgmls/lextaba.c index 38a2fd1d5daa..a851d8596694 100644 --- a/usr.bin/sgmls/sgmls/lextaba.c +++ b/usr.bin/sgmls/sgmls/lextaba.c @@ -331,6 +331,69 @@ FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON #undef TGC3 #undef TGO3 /* def LITC*/ +/* LEXMIN: Lexical table for minimum data literals. +*/ +/* Symbols for SGML character set divisions and function characters. +*/ +#define FRE 0 /* Free char: not in a delimiter or minimum literal. */ +#define NU 1 /* Numeral Numerals */ +#undef MIN +#define MIN 2 /* Minimum literal '()+,-./:?= */ +#define NMS 3 /* LC/UCNMSTRT Lower and uppercase letters */ +#define SPC 4 /* SPACE 32 Space */ +#define NON 5 /* NONSGML 0-31 127 255 Unused, except for: */ +#define EE 6 /* NONSGML 00 26 Entity end (end of file) */ +#define EOB 7 /* NONSGML 28 End disk buffer */ +#define RS 8 /* Function 10 Line feed */ +#define RE 9 /* Function 13 Carrier return */ +#define SEP 10 /* SEPCHAR 09 TAB: horizontal tab */ +/*#define CDE 11 NONSGML delcdata CDATA/SDATA delimiter */ +#define NSC 12 /* NONSGML delnonch Non-SGML character prefix */ +/* Either LIT or LITA changed to LITC when a literal is begun. + It is changed back when the LITC occurs (i.e., when the literal ends). +*/ +UNCH lexmin[256] = { /* +000 001       bs tab lf home ff cr so si */ +EE, NON, NON, NON, NON, NON, NON, NON, NON ,SEP, RS, NON, NON, RE, NON, NON, /* +          eof esc rt left up down */ +NON, NON, NON, NON, NON, NON, NON, NON, NON, NON, EE, NON, EOB, NON, NON, NSC, /* +032 ! " # $ % & ' ( ) * + , - . / */ +SPC, FRE, FRE, FRE, FRE, FRE, FRE, MIN, MIN, MIN, FRE, MIN, MIN, MIN, MIN, MIN, /* +0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ +NU , NU , NU , NU , NU , NU , NU , NU , NU , NU , MIN, FRE, FRE, MIN, FRE, MIN, /* +@ A B C D E F G H I J K L M N O */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +P Q R S T U V W X Y Z [ \ ] ^ _ */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, FRE, /* +` a b c d e f g h i j k l m n o */ +FRE, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, /* +p q r s t u v w x y z { | } ~ 127 */ +NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, NMS, FRE, FRE, FRE, FRE, NON, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, +FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, FRE, NON +}; +/* free nu min nms spc non ee eob rs re sep cde nsc ero + mdo msc mso pero rni tago tagc litc */ +/* def FRE*/ +#undef NU +#undef MIN +#undef NMS +#undef SPC +#undef NON +#undef EE +#undef EOB +#undef RS +#undef RE +#undef SEP +/* def CDE*/ +/* def NSC*/ +/* def LITC*/ /* LEXMARK: Lexical scan table for markup: PCBMD? and PCB?TAG. */ /* Symbols for SGML character set divisions. */ @@ -457,15 +520,15 @@ DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, DAT, NON }; -#undef SIG -#undef DAT -#undef NON -#undef NU -#undef NMS -#undef SPC -#undef EE -#undef EOB -#undef RS +#undef SIG +#undef DAT +#undef NON +#undef NU +#undef NMS +#undef SPC +#undef EE +#undef EOB +#undef RS #undef COM1 #undef LIT3 #undef LITA @@ -531,7 +594,7 @@ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV }; /* This table maps ASCII to the system character set. */ -int asciicharset[] = { +int iso646charset[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, @@ -557,3 +620,131 @@ UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, }; + +/* This table maps the C0 part of ISO646 to the system character set. */ +/* We through in 32 and 127 for free, since ISO 2022 maps them in +automatically. */ +int iso646C0charset[] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +32, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, 127, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +/* This table maps the G0 part of ISO646 to the system character set. */ +int iso646G0charset[] = { +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, +48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, +64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, +80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, +96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, +112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso8859_1charset[] = { +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, +176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, +192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, +208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, +224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso6429C1charset[] = { +128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, +144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; diff --git a/usr.bin/sgmls/sgmls/lextabe.c b/usr.bin/sgmls/sgmls/lextabe.c index f93af89ec4b5..5cfe0de5ffb6 100644 --- a/usr.bin/sgmls/sgmls/lextabe.c +++ b/usr.bin/sgmls/sgmls/lextabe.c @@ -137,6 +137,25 @@ UNCH lextoke[] = { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, }; +UNCH lexmin[] = { + 6, 5, 5, 5, 5, 10, 5, 5, 5, 5, 5, 5, 5, 9, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 5, 12, + 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, + 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 5, +}; + UNCH lextran[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, @@ -156,7 +175,8 @@ UNCH lextran[] = { 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, }; -int asciicharset[] = { + +int iso646charset[] = { 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, @@ -182,3 +202,156 @@ UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, }; + +int iso646G0charset[] = { +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, + 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, +124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, +215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109, +121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, +151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso646G0charset[] = { +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, + 64, 90, 127, 123, 91, 108, 80, 125, 77, 93, 92, 78, 107, 96, 75, 97, +240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 122, 94, 76, 126, 110, 111, +124, 193, 194, 195, 196, 197, 198, 199, 200, 201, 209, 210, 211, 212, 213, 214, +215, 216, 217, 226, 227, 228, 229, 230, 231, 232, 233, 173, 224, 189, 176, 109, +121, 129, 130, 131, 132, 133, 134, 135, 136, 137, 145, 146, 147, 148, 149, 150, +151, 152, 153, 162, 163, 164, 165, 166, 167, 168, 169, 192, 79, 208, 161, 7, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso8859_1charset[] = { +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, + 65, 170, 74, 177, 159, 178, 106, 181, 187, 180, 154, 138, 95, 202, 175, 188, +144, 143, 234, 250, 190, 160, 182, 179, 157, 218, 155, 139, 183, 184, 185, 171, +100, 101, 98, 102, 99, 103, 158, 104, 116, 113, 114, 115, 120, 117, 118, 119, +172, 105, 237, 238, 235, 239, 236, 191, 128, 253, 254, 251, 252, 186, 174, 89, + 68, 69, 66, 70, 67, 71, 156, 72, 84, 81, 82, 83, 88, 85, 86, 87, +140, 73, 205, 206, 203, 207, 204, 225, 112, 221, 222, 219, 220, 141, 142, 223, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso646C0charset[] = { + 0, 1, 2, 3, 55, 45, 46, 47, 22, 5, 37, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, + 64, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, 7, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; + +int iso6429C1charset[] = { + 4, 6, 8, 9, 10, 20, 21, 23, 26, 27, 32, 33, 34, 35, 36, 40, + 41, 42, 43, 44, 48, 49, 51, 52, 53, 54, 56, 57, 58, 59, 62, 255, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, UNUSED, +}; diff --git a/usr.bin/sgmls/sgmls/lineout.c b/usr.bin/sgmls/sgmls/lineout.c index 553c835acc8e..794eff8cb6c3 100644 --- a/usr.bin/sgmls/sgmls/lineout.c +++ b/usr.bin/sgmls/sgmls/lineout.c @@ -150,7 +150,7 @@ UNCH *ename; int rc; PNE np; UNCH *tp; - + if (sgmlment(ename)) /* already defined it */ return; rc = sgmlgent(ename, &np, &tp); @@ -234,8 +234,10 @@ int aln; else if (ADTYPE(al, aln) == AENTITY) define_entity(ADVAL(al, aln)); output_begin_attribute(ent, ADNAME(al, aln), ADTYPE(al, aln)); - if (ADTYPE(al, aln) == ACHARS) - output_attribute_token(ustrlen(ADVAL(al, aln)), ADVAL(al, aln)); + if (ADTYPE(al, aln) == ACHARS) { + putchar(' '); + print_string(ustrlen(ADVAL(al, aln)), ADVAL(al, aln), 0); + } else output_attribute_token(*ADVAL(al, aln) - 2, ADVAL(al, aln) + 1); output_end_attribute(); @@ -294,7 +296,7 @@ UNIV id; ret = run_process(argv); if (ret != 0) suberr++; - + current_filename = 0; free(argv); if (ret == 0) @@ -410,7 +412,7 @@ UNCH *s; print_string(n, s, 0); putchar('\n'); } - + static VOID output_implied_attribute(ent, aname) UNCH *ent, *aname; @@ -451,7 +453,7 @@ int type; fatal("invalid attribute type %d", type); #endif return "INVALID"; -} +} static VOID output_begin_attribute(ent, aname, type) UNCH *ent, *aname; @@ -472,7 +474,8 @@ UNS vallen; UNCH *val; { putchar(' '); - print_string(vallen, val, 0); + for (; vallen > 0; --vallen, ++val) + putchar(*val); } static VOID output_end_attribute() @@ -576,7 +579,7 @@ int is_sdata; if (is_sdata) fputs("\\|", stdout); } - + static VOID print_id(id, pubid, sysid) UNIV id; diff --git a/usr.bin/sgmls/sgmls/main.c b/usr.bin/sgmls/sgmls/main.c index 4c8bbb3a1c30..fb2d30366c0c 100644 --- a/usr.bin/sgmls/sgmls/main.c +++ b/usr.bin/sgmls/sgmls/main.c @@ -11,6 +11,7 @@ #include "adl.h" /* Definitions for attribute list processing. */ #include "sgmlmain.h" /* Main interface to SGML services. */ #include "appl.h" +#include "alloc.h" #define READCNT 512 @@ -25,6 +26,10 @@ including the last character in prog that occurs in PROG_PREFIX. */ #define CAT_NAME "sgmls" /* Message set to use for application error messages. */ #define APP_SET 4 +/* Message set to use for error messages from catalog.c. */ +#define CAT_SET 5 +#define CATALOG_ERROR_HEADER_MSGNO 20 +#define CATALOG_ERROR_HEADER_TEXT "Catalog error at %s, line %lu" #ifdef HAVE_EXTENDED_PRINTF #define xvfprintf vfprintf @@ -37,6 +42,7 @@ static VOID fatal VP((int, ...)); static VOID do_error P((int, va_list)); static VOID swinit P((struct switches *)); static VOID write_caps P((char *, struct sgmlcap *)); +static VOID do_catalog_error(); static UNIV make_docent P((int, char **)); static char *munge_program_name P((char *, char *)); @@ -66,9 +72,11 @@ static char *prog; /* Program name (for error messages). */ static nl_catd catd; /* Message catalogue descriptor. */ static char *capfile = 0; /* File for capacity report. */ extern char *version_string; +static CATALOG catalog; /* Entity catalog. */ char options[] = { 'c', ':', 'd', 'e', 'g', 'i', ':', 'l', 'o', ':', 'p', 'r', 's', 'u', 'v', + 'm', ':', #ifdef CANT_REDIRECT_STDERR 'f', ':', #endif /* CANT_REDIRECT_STDERR */ @@ -112,14 +120,19 @@ char **argv; prog = argv[0] = munge_program_name(argv[0], "sgmls"); catd = catopen(CAT_NAME, 0); + catalog = catalog_create(do_catalog_error); swinit(&sw); while ((opt = getopt(argc, argv, options)) != EOF) { switch (opt) { + case 'm': + catalog_load_file(catalog, optarg); + break; case 'l': /* Generate location information. */ locsw = 1; break; case 'c': /* Print capacity usage. */ + sw.swcap = 1; capfile = optarg; break; case 's': /* Suppress output. */ @@ -178,7 +191,7 @@ char **argv; abort(); } } - + #ifdef CANT_REDIRECT_STDERR if (errfile) { FILE *fp; @@ -285,7 +298,7 @@ char **argv; for (i = 0; i < argc; i++) len += strlen(argv[i]) + 1; - + res = xmalloc(len); ptr = (char *)res; for (i = 0; i < argc; i++) { @@ -300,7 +313,7 @@ char **argv; static VOID usage() { /* Don't mention -o since this are for internal use only. */ - fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity]%s [filename ...]\n", + fprintf(stderr, "Usage: %s [-deglprsuv]%s [-c file] [-i entity] [-m file]%s [filename ...]\n", prog, #ifdef CANT_REDIRECT_STDERR " [-f file]", @@ -338,8 +351,10 @@ struct switches *swp; swp->ptrace = 0; #endif /* TRACE */ swp->catd = catd; + swp->catalog = catalog; swp->swambig = 1; /* Always check for ambiguity. */ swp->swundef = 0; + swp->swcap = 0; /* Don't check capacities. */ swp->nopen = 0; swp->onlypro = 0; swp->includes = 0; @@ -455,10 +470,10 @@ UNIV id; for (p = (char *)id, nfiles = 0; *p; p = strchr(p, '\0') + 1) nfiles++; - + argv = (char **)xmalloc((subargc + 2 + 1 + nfiles + 1)*sizeof(char *)); memcpy((UNIV)argv, (UNIV)subargv, subargc*sizeof(char *)); - + i = subargc; argv[i++] = "-c"; @@ -541,7 +556,7 @@ VOID fatal(int errnum,...) int errnum; #endif va_list ap; - + #ifdef VARARGS va_start(ap); errnum = va_arg(ap, int); @@ -563,7 +578,7 @@ VOID appl_error(int errnum,...) int errnum; #endif va_list ap; - + #ifdef VARARGS va_start(ap); errnum = va_arg(ap, int); @@ -590,6 +605,39 @@ va_list ap; fflush(stderr); } +static +VOID do_catalog_error(filename, lineno, error_number, flags, sys_errno) +char *filename; +unsigned long lineno; +int error_number; +unsigned flags; +int sys_errno; +{ + char *text; + unsigned indent; + text = catgets(catd, CAT_SET, error_number, + (char *)catalog_error_text(error_number)); /* XXX */ + assert(text != 0); + fprintf(stderr, "%s: ", prog); + indent = strlen(prog) + 2; + if (flags & CATALOG_SYSTEM_ERROR) + fprintf(stderr, text, filename, strerror(sys_errno)); + else { + unsigned i; + fprintf(stderr, + catgets(catd, APP_SET, + CATALOG_ERROR_HEADER_MSGNO, + CATALOG_ERROR_HEADER_TEXT), + filename, lineno); + fputs(":\n", stderr); + for (i = 0; i < indent; i++) + putc(' ', stderr); + fputs(text, stderr); + } + putc('\n', stderr); + fflush(stderr); +} + /* Local Variables: c-indent-level: 5 diff --git a/usr.bin/sgmls/sgmls/md1.c b/usr.bin/sgmls/sgmls/md1.c index 9a294e3237bc..66c476dc79d2 100644 --- a/usr.bin/sgmls/sgmls/md1.c +++ b/usr.bin/sgmls/sgmls/md1.c @@ -129,10 +129,12 @@ struct etd *p; /* Pointer to element type definition. */ if (GET(p->adl[0].adflags, ADLCONR)) mderr(85, (UNCH *)0, (UNCH *)0); } +#if 0 /* "-" should not be specified for the end-tag minimization if the element has a content reference attribute. */ if (GET(p->adl[0].adflags, ADLCONR) && BITON(p->etdmin, EMM)) mderr(153, (UNCH *)0, (UNCH *)0); +#endif } /* MDNADL: Process ATTLIST declaration for notation. TO DO: Pass deftab and dvtab as parameters so @@ -583,7 +585,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ /* PARAMETER 2: External identifier keyword or MDS. */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD("2: extid or MDS"); switch (pcbmd.action) { @@ -645,7 +646,7 @@ UNCH *tbuf; /* Work area for tokenization. */ parmno = 0; /* No parameters as yet. */ /* PARAMETER 4: End of declaration. */ - pcbmd.newstate = 0; + pcbmd.newstate = pcbmdtk; parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); TRACEMD(emd); if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); @@ -721,7 +722,7 @@ UNCH *tbuf; /* Work area for tokenization (tbuf). */ mderr(129, tbuf+1, (UNCH *)0); return; } - /* Must omit omitted end-tag minimization, if omitted + /* Must omit omitted end-tag minimization, if omitted start-tag minimization was omitted (because OMITTAG == NO). */ if (!minomitted) { /* PARAMETER 2B: End-tag minimization. @@ -733,6 +734,9 @@ UNCH *tbuf; /* Work area for tokenization (tbuf). */ if (ustrcmp(tbuf+1, key[KO])) {mderr(129, tbuf+1, (UNCH *)0); return;} if (OMITTAG==YES) SET(fmin, EMO); break; + case MGRP: + REPEATCC; + /* fall through */ case CDR: SET(fmin, EMM); break; diff --git a/usr.bin/sgmls/sgmls/md2.c b/usr.bin/sgmls/sgmls/md2.c index 94dc4d30dcdb..df7e57e9f3c2 100644 --- a/usr.bin/sgmls/sgmls/md2.c +++ b/usr.bin/sgmls/sgmls/md2.c @@ -54,7 +54,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ subdcl = nmbuf+1; /* Subject name for error messages. */ /* PARAMETER 2: Entity text keyword (optional). */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); TRACEMD("2: keyword"); switch (pcbmd.action) { @@ -77,7 +76,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ mderr(38, tbuf+1, (UNCH *)0); estore = ESM; } - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); break; default: @@ -98,7 +96,7 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ etx.c = savestr(tbuf); break; case ESMD: /* MD: parameter literal required. */ - etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc); + etx.c = sandwich(tbuf, lex.m.mdo, lex.m.mdc); goto bcheck; case ESMS: /* MS: parameter literal required. */ etx.c = sandwich(tbuf, lex.m.mss, lex.m.mse); @@ -122,7 +120,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ } /* PARAMETER 4: End of declaration. */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); parm4: TRACEMD(emd); @@ -230,7 +227,6 @@ PNE pne; /* Caller's external entity ptr. */ /* PARAMETER 2: Public ID literal. */ - pcbmd.newstate = 0; /* The length of a minimum literal cannot exceed the value of LITLEN in the reference quantity set. */ parsemd(pubibuf, NAMECASE, &pcblitv, REFLITLEN); @@ -248,13 +244,11 @@ PNE pne; /* Caller's external entity ptr. */ /* PARAMETER 3: System ID literal. */ parm3: - pcbmd.newstate = 0; parsemd(sysibuf, NAMECASE, &pcblitc, LITLEN); TRACEMD("3: sys ID literal"); if (pcbmd.action==LIT || pcbmd.action==LITE) { entlen += ustrlen(sysibuf); fpis->fpisysis = sysibuf; - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); } else memcpy(tbuf, sysibuf, *sysibuf); @@ -277,13 +271,11 @@ PNE pne; /* Caller's external entity ptr. */ if (exetype==ESNSUB) { pne->nedcn = 0; - pcbmd.newstate = 0; /* Parse next token for caller. */ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); goto genfpi; } /* PARAMETER 5: Notation name. */ - pcbmd.newstate = 0; parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD("5: notation"); if (pcbmd.action!=NAS) {mderr(119, tbuf+1, (UNCH *)0); return (struct fpi *)0;} @@ -296,7 +288,6 @@ PNE pne; /* Caller's external entity ptr. */ /* PARAMETER 6: Data attribute specification. */ - pcbmd.newstate = 0; parsemd(lbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD("6: [att list]"); if (pcbmd.action!=MDS) { /* No attributes specified. */ @@ -321,7 +312,6 @@ PNE pne; /* Caller's external entity ptr. */ storedatt(pne); } parse(&pcbeal); /* Parse the list ending. */ - pcbmd.newstate = 0; /* Parse next token for caller. */ parsemd(tbuf, NAMECASE, &pcblitp, LITLEN); /* GENFPI: Builds a formal public identifier structure, including the @@ -339,10 +329,9 @@ PNE pne; /* Caller's external entity ptr. */ } /* Analyze public ID and make structure entries. */ if (exidtype==EDPUBLIC) { - if (FORMAL==NO) - fpis->fpiversw = -1; - else if (parsefpi(fpis)>0) { - mderr(88, fpis->fpipubis, (UNCH *)0); + if (parsefpi(fpis)>0) { + if (FORMAL==YES) + mderr(88, fpis->fpipubis, (UNCH *)0); fpis->fpiversw = -1; /* Signal bad formal public ID. */ } } @@ -355,7 +344,7 @@ VOID storedatt(pne) PNE pne; { int i; - + NEAL(pne) = (struct ad *)rmalloc((1+ADN(al))*ADSZ); memcpy((UNIV)NEAL(pne), (UNIV)al, (1+ADN(al))*ADSZ); for (i = 1; i <= (int)ADN(al); i++) { @@ -387,9 +376,11 @@ PFPI f; /* Ptr to formal public identifier structure. */ p = f->fpipubis; /* Point to start of identifier. */ l = p + ustrlen(p); /* Point to EOS of identifier. */ - if (*p=='+' || *p=='-') { /* If owner registered, unregistered. */ + if ((*p=='+' || *p=='-') + && p[1] == '/' && p[2] == '/') { /* If owner registered, + unregistered. */ f->fpiot = *p; /* Save owner type. */ - if ((p += 3)>=l) return 1; /* Get to owner ID field. */ + p += 3; } else f->fpiot = '!'; /* Indicate ISO owner identifier. */ if ((q = pubfield(p, l, '/', &len))==0) /* Find end of owner ID field. */ @@ -407,9 +398,10 @@ PFPI f; /* Ptr to formal public identifier structure. */ /* The public text class in a notation identifier must be NOTATION. */ if (f->fpistore == ESK - ESFM + 1 && f->fpic != FPINOT) return 10; - if (*p=='-') { /* If text is unavailable public text.*/ + if (*p=='-' && p[1] == '/' && p[2] == '/') { /* If text is unavailable + public text.*/ f->fpitt = *p; /* Save text type. */ - if ((p += 3)>=l) return 5; /* Get to text description field. */ + p += 3; } else f->fpitt = '+'; /* Indicate available public text. */ if ((q = pubfield(p, l, '/', &len))==0) /* Find end of text description. */ @@ -423,8 +415,11 @@ PFPI f; /* Ptr to formal public identifier structure. */ /* Language must be all upper-case letters. */ /* The standard only says that it *should* be two letters, so don't enforce that. */ + /* Language must be a name, which means it can't be empty. */ + if (len == 0) + return 7; for (i = 0; i < len; i++) { - /* Don't assume ASCII. */ + /* Don't assume ASCII. */ if (!strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", q[i])) return 7; } @@ -552,7 +547,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ /* PARAMETER 2: External identifier keyword. */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD("2: extid"); if (pcbmd.action!=NAS) {mderr(29, (UNCH *)0, (UNCH *)0); return;} @@ -630,9 +624,8 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ SRM(0) = (PECB)srhptr; /* Indicate map was actually declared.*/ subdcl = srhptr->ename+1; /* Save map name for error msgs. */ - while ( pcbmd.newstate = 0, - parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN)==LIT - || pcbmd.action==LITE ) { + while (parsemd(tbuf, NAMECASE, &pcblitp, SRMAXLEN) == LIT + || pcbmd.action==LITE ) { /* PARAMETER 2: Delimiter string. */ TRACEMD("2: SR string"); @@ -642,7 +635,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ } /* PARAMETER 3: Entity name. */ - pcbmd.newstate = 0; parsemd(tbuf, ENTCASE, &pcblitp, NAMELEN); TRACEMD("3: entity"); if (pcbmd.action!=NAS) {mderr(120, (UNCH *)0, (UNCH *)0); goto cleanup;} @@ -725,7 +717,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ } /* PARAMETER 2: Element name or a group of them. (In DTD only.) */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD("2: GI or grp"); switch (pcbmd.action) { @@ -740,6 +731,7 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ break; case EMD: if (indtdsw) {mderr(28, (UNCH *)0, (UNCH *)0); return;} + if (docelsw) {mderr(233, (UNCH *)0, (UNCH *)0); return;} tags[ts].tsrm = srmptr; TRACESRM("USEMAP", tags[ts].tsrm, tags[ts].tetd->etdgi+1); goto realemd; @@ -749,7 +741,6 @@ UNCH *tbuf; /* Work area for tokenization[LITLEN+2]. */ } /* PARAMETER 3: End of declaration. */ - pcbmd.newstate = 0; parsemd(tbuf, NAMECASE, &pcblitp, NAMELEN); TRACEMD(emd); if (pcbmd.action!=EMD) mderr(126, (UNCH *)0, (UNCH *)0); diff --git a/usr.bin/sgmls/sgmls/msg.h b/usr.bin/sgmls/sgmls/msg.h index fa97a4cd9288..5526337e5af5 100644 --- a/usr.bin/sgmls/sgmls/msg.h +++ b/usr.bin/sgmls/sgmls/msg.h @@ -112,7 +112,7 @@ struct { /* 90 */ {"\"%s\" keyword is for unsupported feature; declaration terminated", 'E', 'D'}, /* 91 */ {"Attribute specification list in prolog cannot be empty", 'E', 'D'}, /* 92 */ {"Document ended invalidly within a literal; parsing ended", 'C', 'S'}, -/* 93 */ {"Short ref in map \"%2$s\" to undeclared entity \"%1$s\" treated as data", 'E', 'C'}, +/* 93 */ {"General entity \"%s\" in short reference map \"%s\" undeclared", 'E', 'D'}, /* 94 */ {"Could not reopen file to continue entity \"%s\"; entity terminated", 'E', 'R'}, /* 95 */ {"Out-of-context data ended %s document element (and parse)", 'E', 'C'}, /* 96 */ {"Short start-tag (no GI) ended %s document element (and parse)", 'E', 'C'}, @@ -169,16 +169,16 @@ struct { /* 147 */ {"Could not find external general entity \"%s\"", 'I', 'R'}, /* 148 */ {"Could not find external parameter entity \"%s\"", 'I', 'R'}, /* 149 */ {"Reference to non-existent general entity \"%s\" ignored", 'E', 'R'}, -/* 150 */ {"Could not find entity \"%s\" using default declaration", 'E', 'R'}, +/* 150 */ {"Could not find entity \"%s\" using default declaration", 'I', 'R'}, /* 151 */ {"Could not find entity \"%2$s\" in attribute %1$s using default declaration", 'E', 'R'}, -/* 152 */ {"Short reference map \"%s\" used in DTD but not defined", 'I', 'D'}, +/* 152 */ {"Short reference map \"%s\" used in USEMAP declaration but not defined; declaration will be ignored", 'E', 'D'}, /* 153 */ {"End-tag minimization should be \"O\" for element with CONREF attribute", 'I', 'D'}, /* 154 */ {"Declared value of data attribute cannot be ENTITY or ENTITIES", 'E', 'D' }, /* 155 */ {"Declared value of data attribute cannot be IDREF or IDREFS", 'E', 'D' }, /* 156 */ {"Declared value of data attribute cannot be NOTATION", 'E', 'D' }, /* 157 */ {"CURRENT cannot be specified for a data attribute", 'E', 'D' }, /* 158 */ {"CONREF cannot be specified for a data attribute", 'E', 'D' }, -/* 159 */ {"Short reference map for element \"%s\" not defined; ignored", 'E', 'C'}, +/* 159 */ {"Parameter must be a number or CONTROLS or NONE", 'E', 'D'}, /* 160 */ {"Cannot create temporary file", 'C', 'R'}, /* 161 */ {"Document ended invalidly within SGML declaration", 'C', 'D'}, /* 162 */ {"Capacity limit %s exceeded by %s points", 'W', 'Q'}, @@ -208,9 +208,9 @@ struct { /* 186 */ {"\"%s\" invalid; must be \"PUBLIC\" or \"SGMLREF\"", 'E', 'D'}, /* 187 */ {"Feature \"%s\" is not supported", 'E', 'U'}, /* 188 */ {"Too many open subdocument entities", 'E', 'Q'}, -/* 189 */ {"Invalid formal public identifier", 'I', 'D'}, -/* 190 */ {"Public text class should have been \"%s\"", 'I', 'D'}, -/* 191 */ {"Character number %s must be non-SGML", 'W', 'D'}, +/* 189 */ {"Invalid formal public identifier", 'W', 'D'}, +/* 190 */ {"Public text class must be \"%s\"", 'W', 'D'}, +/* 191 */ {"Use of character number %s as an SGML character is not supported", 'W', 'U'}, /* 192 */ {"Notation \"%s\" not defined in DTD", 'W', 'D'}, /* 193 */ {"Unclosed start or end tag requires \"SHORTTAG YES\"", 'W', 'M'}, /* 194 */ {"Net-enabling start tag requires \"SHORTTAG YES\"", 'W', 'M'}, @@ -218,12 +218,12 @@ struct { /* 196 */ {"Undelimited attribute value requires \"SHORTTAG YES\"", 'W', 'M'}, /* 197 */ {"Attribute specification omitted for \"%s\": requires markup minimization", 'W', 'M'}, /* 198 */ {"Concrete syntax does not have any short reference delimiters", 'E', 'D'}, -/* 199 */ {"Character number %s does not exist in the base character set", 'E', 'D'}, +/* 199 */ {"Character number %s not in the base character set; assuming UNUSED", 'E', 'D'}, /* 200 */ {"Character number %s is UNUSED in the syntax reference character set", 'E', 'D'}, /* 201 */ {"Character number %s was not described in the syntax reference character set", 'E', 'D'}, /* 202 */ {"Character number %s in the syntax reference character set has no corresponding character in the system character set", 'E', 'D'}, /* 203 */ {"Character number %s was described using an unknown base set", 'E', 'D'}, -/* 204 */ {"Duplication specification for added funtion \"%s\"", 'E', 'D'}, +/* 204 */ {"Duplication specification for added function \"%s\"", 'E', 'D'}, /* 205 */ {"Added function character cannot be \"%s\"", 'E', 'D'}, /* 206 */ {"Only reference concrete syntax function characters supported", 'E', 'U'}, /* 207 */ {"Only reference concrete syntax general delimiters supported", 'E', 'U'}, @@ -235,8 +235,8 @@ struct { /* 213 */ {"Duplicate replacement reserved name \"%s\"", 'E', 'D'}, /* 214 */ {"Quantity \"%s\" must not be less than %s", 'E', 'D'}, /* 215 */ {"Only values up to %2$s are supported for quantity \"%1$s\"", 'E', 'U'}, -/* 216 */ {"Exclusions attempt to change required status of group in \"%s\"", 'E', 'C'}, -/* 217 */ {"Exclusion cannot apply to token \"%s\" in content model for \"%s\"", 'E', 'C'}, +/* 216 */ {"%s element cannot be excluded from %s element because it is neither inherently optional nor a member of an or group", 'E', 'C'}, +/* 217 */ {"Marked section not allowed in other prolog", 'E', 'C'}, /* 218 */ {"Required %s attribute was not specified for entity %s", 'E', 'C'}, /* 219 */ {"UCNMSTRT must have the same number of characters as LCNMSTRT", 'E', 'D'}, /* 220 */ {"UCNMCHAR must have the same number of characters as LCNMCHAR", 'E', 'D'}, @@ -249,4 +249,10 @@ struct { /* 227 */ {"Unrecognized designating escape sequence \"%s\"", 'I', 'U'}, /* 228 */ {"Earlier reference to entity \"%s\" used default entity", 'I', 'D'}, /* 229 */ {"Reference to non-existent parameter entity \"%s\" ignored", 'E', 'R'}, +/* 230 */ {"DSC within marked section; marked section terminated", 'E', 'C'}, +/* 231 */ {"Document element end tag can only occur in document element because entity end not allowed in other prolog", 'E', 'C'}, +/* 232 */ {"Character reference not allowed in other prolog", 'E', 'C'}, +/* 233 */ {"USEMAP declaration not allowed in other prolog", 'E', 'D'}, +/* 234 */ {"Entity reference not allowed in other prolog", 'E', 'C'}, +/* 235 */ {"Value assigned to capacity %s exceeds value assigned to TOTALCAP", 'W', 'D'}, }; diff --git a/usr.bin/sgmls/sgmls/msgcat.c b/usr.bin/sgmls/sgmls/msgcat.c index ec6a8b56023f..5c7ee9f1418d 100644 --- a/usr.bin/sgmls/sgmls/msgcat.c +++ b/usr.bin/sgmls/sgmls/msgcat.c @@ -25,6 +25,12 @@ merging catalogues. */ #define P(parms) () #endif +#ifdef USE_ISASCII +#define ISASCII(c) isascii(c) +#else +#define ISASCII(c) (1) +#endif + /* Default message set. */ #define NL_SETD 1 @@ -48,7 +54,7 @@ struct message { unsigned setnum; char *text; }; - + struct cat { char *name; int loaded; @@ -117,7 +123,7 @@ int oflag; if (!name) return 0; - + catp = (struct cat *)malloc(sizeof *catp); if (!catp) return 0; @@ -476,7 +482,7 @@ int quote; p[i] = '\0'; return p; } - + /* 0 success, -1 error */ static @@ -559,7 +565,7 @@ struct message **table; unsigned setnum, msgnum; { struct message **pp; - + for (pp = &table[hash(setnum, msgnum)]; *pp; pp = &(*pp)->next) if ((*pp)->setnum == setnum && (*pp)->msgnum == msgnum) { struct message *p = *pp; @@ -638,9 +644,9 @@ char **argv; struct message **list; unsigned setnum; struct message *table[HASH_TAB_SIZE]; - + program_name = argv[0]; - + if (argc < 3) usage(); @@ -666,7 +672,7 @@ char **argv; fclose(fp); } } - + errno = 0; fp = fopen(argv[1], "w"); if (!fp) @@ -687,7 +693,7 @@ char **argv; list[j++] = p; } assert(j == nmessages); - + qsort((UNIV)list, nmessages, sizeof(struct message *), message_compare); setnum = NL_SETD; @@ -728,8 +734,8 @@ VOID fatal(char *message,...) message = va_arg(ap, char *); #else /* not VARARGS */ va_start(ap, message); -#endif /* not VARARGS */ - +#endif /* not VARARGS */ + fprintf(stderr, "%s: ", program_name); vfprintf(stderr, message, ap); putc('\n', stderr); @@ -739,7 +745,8 @@ VOID fatal(char *message,...) static int message_compare(p1, p2) -UNIV p1, UNIV p2; +UNIV p1; +UNIV p2; { struct message *m1 = *(struct message **)p1; struct message *m2 = *(struct message **)p2; @@ -763,7 +770,7 @@ FILE *fp; for (; *s; s++) { if (*s == '\\') fputs("\\\\", fp); - else if (ISASCII(*s) && isprint((UNCH)*s)) + else if (ISASCII(*s) && isprint((unsigned char)*s)) putc(*s, fp); else { switch (*s) { @@ -803,7 +810,7 @@ char **argv; { nl_catd catd; int msgnum, setnum; - + if (argc != 2) { fprintf(stderr, "usage: %s catalogue\n", argv[0]); exit(1); diff --git a/usr.bin/sgmls/sgmls/pars1.c b/usr.bin/sgmls/sgmls/pars1.c index 86161077e032..0a67cbc5b875 100644 --- a/usr.bin/sgmls/sgmls/pars1.c +++ b/usr.bin/sgmls/sgmls/pars1.c @@ -90,6 +90,7 @@ struct parse *pcb; /* Parse control block for this parse. */ case STG_: /* Process non-null start-tag. */ CTRSET(tagctr); /* Start counting tag length. */ + tages = es; parsenm(tbuf, NAMECASE); /* Get the GI. */ newetd = etdref(tbuf); if (newetd && newetd->adl) { @@ -264,6 +265,10 @@ struct parse *pcb; /* Parse control block for this parse. */ case RSR_: /* Record start: ccnt=0; ++rcnt.*/ ++RCNT; CTRSET(RSCC); + return RSR_; + case MSS_: + if (ts == 0) synerr(217, pcb); + return MSS_; default: return (int)pcb->action; /* Default (MD_ MDC_ MSS_ MSE_ PIS_). */ } @@ -288,8 +293,12 @@ struct parse *pcb; /* Parse control block for this parse. */ */ int nstetd() { - newetd = ts>0 ? tags[ts].tetd - : tags[0].tetd->etdmod[2].tu.thetd; + if (sd.omittag && ts > 0) + newetd = tags[ts].tetd; + else if (!sd.omittag && lastetd != 0) + newetd = lastetd; + else + newetd = tags[0].tetd->etdmod[2].tu.thetd; stagmin = MINNULL; stagreal = ETDNULL; etisw = 0; return stag(0); @@ -332,11 +341,6 @@ struct parse *pcb; /* Parse control block for this parse. */ int rc; /* Return code from entopen. */ if (tags[ts].tsrm==SRMNULL || !tags[ts].tsrm[srn]) return ENTUNDEF; - if (!tags[ts].tsrm[srn]->estore) { - sgmlerr(93, pcb, tags[ts].tsrm[srn]->ename+1, - tags[ts].tsrm[0]->ename+1); - return(ENTUNDEF); - } rc = entopen(tags[ts].tsrm[srn]); if (rc==ENTDATA) return DEF_; if (rc==ENTPI) return PIS_; @@ -395,7 +399,14 @@ int parsepro() REPEATCC; /* Put back MSC so it follows referenced DTD. */ entref(indtdent); } - else mddtde(tbuf); + else { + if (mslevel > 0) { + sgmlerr(230, propcb, (UNCH *)0, (UNCH *)0); + mslevel = 0; + msplevel = 0; + } + mddtde(tbuf); + } continue; case MD_: @@ -442,12 +453,13 @@ int parsepro() return(PIS_); case EOD_: /* Return end of primary entity. */ - if (!sw.onlypro || propcb != &pcbpro || !dtdsw) - sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0); - else { + if (dtdsw && propcb == &pcbpro) { + /* We've had a DTD, so check it. */ setdtype(); checkdtd(); } + if (!sw.onlypro || propcb != &pcbpro || !dtdsw) + sgmlerr(127, propcb, (UNCH *)0, (UNCH *)0); return propcb->action; case PIS_: /* Return processing instruction (string). */ sgmlsw++; /* SGML declaration not allowed after PI */ @@ -457,6 +469,9 @@ int parsepro() synerr(E_RESTART, propcb); REPEATCC; continue; + case ETE_: /* End tag ended prolog */ + REPEATCC; + /* fall through */ case STE_: /* Start tag ended prolog */ REPEATCC; REPEATCC; @@ -506,22 +521,33 @@ static VOID checkdtd() { struct dcncb *np; + struct srh *sp; if (sw.swundef) { int i; struct etd *ep; - struct srh *sp; for (i = 0; i < ETDHASH; i++) for (ep = etdtab[i]; ep; ep = ep->etdnext) if (!ep->etdmod) sgmlerr(140, (struct parse *)0, ep->etdgi + 1, (UNCH *)0); - for (sp = srhtab[0]; sp; sp = sp->enext) - if (sp->srhsrm[0] == 0) - sgmlerr(152, (struct parse *)0, sp->ename + 1, - (UNCH *)0); } + for (sp = srhtab[0]; sp; sp = sp->enext) + if (sp->srhsrm[0] == 0) + sgmlerr(152, (struct parse *)0, sp->ename + 1, (UNCH *)0); + else { + int i; + for (i = 1; i < lex.s.dtb[0].mapdata + 1; i++) { + struct entity *ecb = sp->srhsrm[i]; + if (ecb && !ecb->estore) { + sgmlerr(93, (struct parse *)0, + ecb->ename + 1, + sp->srhsrm[0]->ename + 1); + sp->srhsrm[i] = 0; + } + } + } for (np = dcntab[0]; np; np = np->enext) if (!np->defined) sgmlerr(192, (struct parse *)0, np->ename + 1, (UNCH *)0); @@ -604,7 +630,7 @@ struct mpos *newmpos() VOID endprolog() { int i; - + ambigfree(); if (dtdsw) { frem((UNIV)nmgrp); @@ -739,9 +765,8 @@ int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */ realrc = RCEND; break; case RCHITMEX: /* Invalid minus exclusion for required element. */ -#if 0 /* This will have been detected by exclude.c. */ - sgmlerr(E_MEXERR, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1); -#endif + sgmlerr(216, &pcbstag, NEWGI, tags[mexts].tetd->etdgi+1); + /* fall through */ case RCHIT: /* Start-tag was valid. */ realrc = RCHIT; break; @@ -764,11 +789,9 @@ int dataret; /* Data pending: DAF_ REF_ 0=not #PCDATA. */ return ETG_; case RCREQ: /* Stack compulsory GI, then retry start-tag. */ if (!BADPTR(nextetd)) { -#if 0 /* This will have been detected in exclude.c. */ if ((mexts = pexmex(nextetd))>0) sgmlerr(E_MEXERR, &pcbstag, nextetd->etdgi+1, tags[mexts].tetd->etdgi+1); -#endif if (!nextetd->etdmod) { sgmlerr(53, &pcbstag, nextetd->etdgi+1, (UNCH *)0); etdset(nextetd, (UNCH)SMO+EMO+ETDOCC, &undechdr, @@ -847,8 +870,8 @@ struct etd *curetd; /* The etd for this entry. */ /* If etd has ALT table, use it; otherwise, use last element's ALT. */ if (curetd->etdsrm) { if (curetd->etdsrm != SRMNULL && curetd->etdsrm[0] == NULL) { - /* Map hasn't been defined. Ignore it. */ - sgmlerr(159, &pcbstag, curetd->etdgi + 1, (UNCH *)0); + /* Map hasn't been defined. Ignore it. + We already gave an error. */ curetd->etdsrm = 0; tags[ts].tsrm = tags[ts-1].tsrm; } @@ -867,8 +890,6 @@ struct etd *curetd; /* The etd for this entry. */ tags[ts].tpos[1].t = 1; /* 1st token is next in grp to be tested. */ HITCLEAR(tags[ts].tpos[1].h); /* No hits yet as yet. */ TRACESTK(&tags[ts], ts, etictr); - - exclude(); return; } /* ETAG: Check validity of an end-tag by seeing if it matches any tag @@ -908,6 +929,7 @@ VOID destack() are required tags left, and no CONREF attribute was specified, issue an error message. */ + lastetd = tags[ts].tetd; if (!GET(tags[ts].tetd->etdmod->ttype, MKEYWORD) && !conrefsw && !econtext(tags[ts].tetd->etdmod, tags[ts].tpos, &tags[ts].status)) { @@ -945,6 +967,10 @@ VOID destack() /* TEMP: See if parser bug caused stack to go below zero. */ else if (ts<0) {sgmlerr(64, conpcb, (UNCH *)0, (UNCH *)0); ts = 0;} TRACEDSK(&tags[ts], &tags[ts+1], ts, etictr); + if (ts == 0) { + docelsw = 1; /* Finished document element. */ + if (es > 0) sgmlerr(231, conpcb, (UNCH *)0, (UNCH *)0); + } } /* Local Variables: diff --git a/usr.bin/sgmls/sgmls/pars2.c b/usr.bin/sgmls/sgmls/pars2.c index cc4c4ec9cfcd..4249797586fd 100644 --- a/usr.bin/sgmls/sgmls/pars2.c +++ b/usr.bin/sgmls/sgmls/pars2.c @@ -104,6 +104,7 @@ struct parse *pcb; /* Current parse control block. */ parsenm(entbuf, NAMECASE); parse(&pcbref); /* Handle reference terminator. */ charrefa(entbuf); + if (docelsw) synerr(232, pcb); continue; case SYS_: /* Invalid NONCHAR: send msg and ignore. */ @@ -172,10 +173,10 @@ int ch; change the entity, since the entity might be referenced again. So in this case we copy the entity. This is inefficient, but it will only happen in a case like this: - + - + Usually character references will have been processed while the entity was being defined. */ if (*FPOS != ch) { @@ -754,12 +755,19 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ #endif { UNCH *pt = tbuf; /* Current pointer into tbuf. */ - UNCH lexsv = lexlms[del];/* Saved lexlms value of delimiter. */ + UNCH lexsv = pcb->plex[del];/* Saved value of delimiter in lexical table. */ int essv = es; /* Entity stack level when literal started. */ UNCH datadel; /* Delimiter for CDATA/SDATA entity. */ - int parmlen = (int)maxlen; /* Working limit (to be decremented). */ + int parmlen = (int)maxlen + 1; /* Working limit (to be decremented). */ + int overflow = 0; /* Did the buffer overflow? */ + + pcb->plex[del] = pcb->plex == lexlms ? lex.l.litc : lex.l.minlitc; + + /* The RPR_ action may cause the length of the literal to decrease by + 1 (this discards a final space in a minimum literal); so while + building the literal, the length must be allowed to grow to + maxlen + 1. */ - lexlms[del] = lex.l.litc; /* Set delimiter to act as literal close. */ do { switch (parse(pcb)) { case LP2_: /* Move 2nd char back to buffer; redo prev.*/ @@ -767,15 +775,19 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ case LPR_: /* Move previous char to buffer; REPEATCC; */ REPEATCC; case MLA_: /* Move character to buffer. */ + if (parmlen <= 0) { overflow = 1; break; } *pt++ = *FPOS; --parmlen; continue; case FUN_: /* Function char found; replace with space.*/ + if (parmlen <= 0) { overflow = 1; break; } *pt++ = ' '; --parmlen; continue; case RSM_: /* Record start: ccnt=0; ++rcnt.*/ - ++RCNT; CTRSET(RSCC); *pt++ = *FPOS; --parmlen; + ++RCNT; CTRSET(RSCC); + if (parmlen <= 0) { overflow = 1; break; } + *pt++ = *FPOS; --parmlen; continue; case ERX_: /* Entity reference: cancel LITC delim. */ @@ -806,7 +818,12 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ pt += parmlensv - parmlen; continue; } - if ((parmlen -= (int)datalen+2)<0) {entdatsw = 0; break;} + if (parmlen < datalen + 2) { + entdatsw = 0; + overflow = 1; + break; + } + parmlen -= datalen + 2; *pt++ = datadel = BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA; entdatsw = 0; @@ -816,7 +833,8 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ continue; case NON_: /* Non-SGML char (delimited and shifted). */ - if ((parmlen -= 2)<0) break; + if (parmlen < 2) { overflow = 1; break; } + parmlen -= 2; memcpy( pt , nonchbuf, 2 ); pt += 2; continue; @@ -832,19 +850,25 @@ UNCH del; /* Literal delimiter: LIT LITA PIC EOS */ break; } break; - } while (parmlen>=0 && pcb->action!=TER_); + } while (!overflow && pcb->action!=TER_); + + if (parmlen <= 0) { + --pt; + overflow = 1; + } + if (overflow) + sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); - if (parmlen<0) {--pt; sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); REPEATCC;} datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */ *pt++ = EOS; - lexlms[del] = lexsv; /* Restore normal delimiter handling. */ + pcb->plex[del] = lexsv; /* Restore normal delimiter handling. */ if (es!=essv) synerr(37, pcb); - return; } /* Handle a data entity in a tokenized attribute value literal. Parmlen is amount of space left. Return new parmlen. If there's not -enough space return -1, and copy up to parmlen + 1 characters. */ +enough space return -1, and copy up to parmlen + 1 characters. Only +tokenization should be done, not attribute value interpretation. */ int tokdata(pt, parmlen) UNCH *pt; @@ -852,14 +876,9 @@ int parmlen; { int skip = (pcblitt.newstate == 0); int i; - + for (i = 0; parmlen >= 0 && i < datalen; i++) { switch (data[i]) { - case RSCHAR: - /* ignore it */ - break; - case RECHAR: - case TABCHAR: case SPCCHAR: if (!skip) { *pt++ = data[i]; @@ -935,6 +954,7 @@ UNS tokenlen; /* Max length of expected token: NAMELEN LITLEN */ return (int)pcb->action; case NUM: /* Number or number token string. */ parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen); + if (tokenlen > NAMELEN) pcb->newstate = 0; return (int)pcb->action; case PENR: REPEATCC; @@ -976,6 +996,11 @@ int dctype; /* Content type (0=model). */ case OREP: /* OREP occurrence indicator for model. */ SET(gbuf[1].ttype, TOREP|TXOREP); break; + case EE_: + if (es < mdessv) { + synerr(37, &pcbmd); + mdessv = es; + } default: /* RCR_: Repeat char and return. */ break; } diff --git a/usr.bin/sgmls/sgmls/pcbrf.c b/usr.bin/sgmls/sgmls/pcbrf.c index a18617e9e96e..554fdfb859ca 100644 --- a/usr.bin/sgmls/sgmls/pcbrf.c +++ b/usr.bin/sgmls/sgmls/pcbrf.c @@ -39,14 +39,14 @@ et0a[]={DAS_,DAS_,DAS_,DAS_,DAS_,NON_,GET_,GET_,RSR_,SR2_,DAS_,DAS_,NSC_,LAS_, REF_,NOP_,DAS_,NED_,SR10,DAS_,DAS_,NOP_,SR25,DAS_,SR11,DAS_,LAS_,FCE_}, da0 []={DA0 ,DA0 ,DA0 ,DA0 ,DA1 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 , - ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da0*/ + ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,ET0 ,DA0 ,ET0 ,ET0 },/*da0*/ da0a[]={NOP_,NOP_,NOP_,NOP_,NOP_,DAF_,DAF_,DAF_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_, - DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_}, + DAF_,DAF_,NOP_,DAF_,DAF_,DAF_,NOP_,DAF_,DAF_,NOP_,DAF_,NOP_,DAF_,DAF_}, da1 []={DA0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 , - ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,DA0 ,DA0 ,ET0 ,ET0 },/*da1*/ + ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,ET0 ,DA0 ,ET0 ,ET0 ,DA0 ,ET0 ,DA0 ,ET0 ,ET0 },/*da1*/ da1a[]={NOP_,NOP_,NOP_,NOP_,DAR_,DAF_,DAF_,DAR_,DAF_,DAR_,DAR_,NOP_,DAF_,DAF_, - DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,NOP_,NOP_,DAF_,DAF_}, + DAF_,DAF_,NOP_,DAF_,DAF_,DAR_,NOP_,DAF_,DAF_,NOP_,DAF_,NOP_,DAF_,DAF_}, er0 []={ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ER0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 , ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,ET0 ,CR0 ,ET0 ,ET0 ,ET0 },/*er0*/ @@ -176,7 +176,7 @@ static UNCH et2 []={ET2 ,ET2 ,ET2 ,ET2 ,SP2 ,ET2 ,ET2 ,ET2 ,RS2 ,ET2 ,TB2 ,ET2 ,ET2 ,ER2 , ET2 ,SC2 ,ET2 ,ET2 ,ET2 ,SR2 ,ET2 ,ME2 ,ET2 ,ET2 ,ET2 ,ET2 ,ES2 ,ET2 },/*et2*/ et2a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,GET_,GET_,RS_ ,SR2_,NOP_,DCE_,DCE_,LAS_, - NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,DCE_,DCE_,SR11,DCE_,LAS_,DCE_}, + NOP_,NOP_,DCE_,NED_,SR10,NOP_,DCE_,NOP_,SR25,DCE_,SR11,DCE_,LAS_,FCE_}, er2 []={ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ER2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 , ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,ET2 ,CR2 ,ET2 ,ET2 ,ET2 },/*er2*/ @@ -391,6 +391,7 @@ struct parse pcbconc = {"CONC", lexcon, conctab, 0, 0, 0, 0}; #define ES7 2 /* TAGO found; start lookahead buffer. */ #define MD7 4 /* MDO found (TAGO, MDO[2]). */ #define MC7 6 /* MDO, COM found. */ +#define EE7 8 /* TAGO, ETI found */ static UNCH /* free nu nmc nms spc non ee eob rs re sep cde nsc ero @@ -401,9 +402,9 @@ et7a[]={DCE_,DCE_,DCE_,DCE_,NOP_,DCE_,EE_ ,GET_,RS_ ,NOP_,NOP_,DCE_,DCE_,DCE_, DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,DCE_,LAS_}, es7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ES7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 , - ET7 ,ET7 ,ET7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/ + ET7 ,ET7 ,EE7 ,ET7 ,MD7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*es7*/ es7a[]={PEP_,PEP_,PEP_,STE_,PEP_,PEP_,PEP_,GET_,PEP_,PEP_,PEP_,PEP_,PEP_,PEP_, - PEP_,PEP_,PEP_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_}, + PEP_,PEP_,LAM_,PEP_,LAM_,PEP_,PEP_,PEP_,PIS_,PEP_,STE_,PEP_}, md7 []={ET7, ET7, ET7, ET7, ET7 ,ET7, ET7, MD7, ET7 ,ET7 ,ET7 ,ET7, ET7, ET7, ET7, MC7, ET7, ET7, ET7, ET7 ,ET7, ET7, ET7, ET7 ,ET7, ET7 },/*md7*/ @@ -415,12 +416,18 @@ mc7 []={ET7, ET7, ET7, ET7, ET7, ET7 ,ET7, MC7, ET7 ,ET7, ET7 ,ET7, ET7, ET7, mc7a[]={LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, LAF_,MDC_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_}, -*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a}; +ee7 []={ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,EE7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 , + ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 ,ET7 },/*ee7*/ +ee7a[]={LAF_,LAF_,LAF_,ETE_,LAF_,LAF_,LAF_,GET_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_, + LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,LAF_,ETE_,LAF_}, + +*protab[] = {et7, et7a, es7, es7a, md7, md7a, mc7, mc7a, ee7, ee7a}; struct parse pcbpro = {"PRO", lexcon, protab, 0, 0, 0, 0}; #undef ET7 #undef ES7 #undef MD7 #undef MC7 +#undef EE7 /* PCBMDS: State and action table for parse of markup declaration subset. Initial state assumes subset just began (MSO found). */ @@ -550,7 +557,7 @@ dn01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,INV_,INV_,INV_,INV_, INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,DTAG,INV_}, dt01 []={TK1 ,TK1 ,TK1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,DT1 ,TK1 ,DT1 ,DT1 ,LI1 ,LA1 , - CO1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/ + SP1 ,TK1 ,TK1 ,DT1 ,DT1 ,TK1 ,TK1 ,TK1 ,DT1 ,TK1 },/*dt1*/ dt01a[]={INV_,INV_,INV_,NOP_,NOP_,SYS_,EE_ ,GET_,RS_ ,INV_,NOP_,NOP_,NOP_,NOP_, GRPE,INV_,INV_,NOP_,NOP_,INV_,INV_,INV_,NOP_,INV_}, @@ -798,23 +805,23 @@ struct parse pcblitr = {"LITR", lexlms, litrtab, 0, 0, 0, 0}; #define SP0 4 /* SPACE/RE sequence begun. */ static UNCH -/* free num min nms spc non ee eob rs re sep cde nsc ero - mdo msc mso pero rni tagc tago litc */ -ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 ,VA0 , - VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*ls0*/ -ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_, - MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_}, -va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 , - VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*va0*/ -da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_,MLE_, - MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,TER_}, -sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 ,VA0 , - VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,VA0 ,LS0 },/*sp0*/ -sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_,MLE_, - MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,MLE_,RPR_}, +/* free num min nms spc non ee eob rs re sep cde nsc + litc */ +ls10 []={VA0 ,VA0 ,VA0 ,VA0 ,LS0 ,VA0 ,LS0 ,LS0 ,LS0 ,LS0 ,LS0 ,VA0 ,VA0 , + LS0 },/*ls0*/ +ls10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_, + TER_}, +va10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,SP0 ,VA0 ,VA0 , + LS0 },/*va0*/ +da10a[]={MLE_,MLA_,MLA_,MLA_,MLA_,SYS_,EOF_,GET_,RS_ ,FUN_,MLE_,SYS_,SYS_, + TER_}, +sp10 []={VA0 ,VA0 ,VA0 ,VA0 ,SP0 ,VA0 ,VA0 ,SP0 ,SP0 ,SP0 ,SP0 ,VA0 ,VA0 , + LS0 },/*sp0*/ +sp10a[]={MLE_,MLA_,MLA_,MLA_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,MLE_,SYS_,SYS_, + RPR_}, *litvtab[] = {ls10, ls10a, va10, da10a, sp10, sp10a}; -struct parse pcblitv = {"LITV", lexlms, litvtab, 0, 0, 0, 0}; +struct parse pcblitv = {"LITV", lexmin, litvtab, 0, 0, 0, 0}; #undef LS0 #undef VA0 #undef SP0 @@ -881,9 +888,9 @@ struct parse pcblitt = {"LITT", lexlms, litttab, 0, 0, 0, 0}; Columns are based on LEXMARK.C. */ /* Symbols for state names (end with a number). */ -#define SP1 0 /* Separator before token expected. */ -#define TK1 2 /* Token expected. */ -#define CM0 4 /* COM[1] found when sep expected: possible comment, MGRP.*/ +#define SP1 0 /* Separator before token expected (but not -). */ +#define SP2 2 /* Separator before token expected. */ +#define TK1 4 /* Token expected. */ #define CM1 6 /* COM[1] found: possible comment, MGRP, or minus.*/ #define CM2 8 /* COM[2] found; in comment. */ #define CM3 10 /* Ending COM[1] found; end comment or continue it. */ @@ -896,22 +903,23 @@ int pcbmdtk = TK1; /* PCBMD: token expected. */ static UNCH /* bit nmc num nms spc non ee eob rs com eti grpo lit lita dso dsc pero plus refc rni tagc tago vi */ -sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,CM0 ,SP1 ,TK1 ,TK1 ,TK1 , +sp21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 , TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 }, -sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE, +sp21a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,LEN_,INV_,GRPS,LIT ,LITE, MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_}, -tk21 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 , +sp22 []={SP2 ,SP2 ,SP2 ,SP2 ,TK1 ,SP2 ,TK1 ,SP2 ,TK1 ,CM1 ,SP2 ,TK1 ,TK1 ,TK1 , + TK1 ,SP2 ,PR1 ,PX1 ,SP2 ,RN1 ,SP2 ,SP2 ,SP2 }, +sp22a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE, + MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_}, + +tk21 []={SP1 ,SP1 ,SP2 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,SP1 ,TK1 ,TK1 ,TK1 , TK1 ,SP1 ,PR1 ,PX1 ,SP1 ,RN1 ,SP1 ,SP1 ,SP1 }, tk21a[]={INV_,NMT ,NUM ,NAS ,NOP_,SYS_,EE_ ,GET_,RS_ ,NOP_,INV_,GRPS,LIT ,LITE, MDS ,INV_,NOP_,NOP_,INV_,NOP_,EMD ,INV_,INV_}, /* bit nmc num nms spc non ee eob rs com eti grpo lit lita dso dsc pero plus refc rni tagc tago vi */ -cm20 []={SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 ,SP1 , - SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, -cm20a[]={LNR_,LNR_,LNR_,LNR_,LNR_,SYS_,LNR_,GET_,LNR_,NOP_,LNR_,LNR_,LNR_,LNR_, - LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_,LNR_}, cm21 []={TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 ,TK1 , TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 }, @@ -930,8 +938,8 @@ cm23 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 ,CM2 , cm23a[]={NOP_,NOP_,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_,NOP_, NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_,NOP_}, -pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP1 ,PR1 ,TK1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , - SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 }, +pr21 []={SP1 ,SP1 ,SP1 ,TK1 ,TK1 ,PR1 ,SP2 ,PR1 ,TK1 ,SP2 ,SP1 ,SP1 ,SP1 ,SP1 , + SP1 ,SP1 ,SP2 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 }, pr21a[]={PCI_,PCI_,PCI_,PER_,PEN ,SYS_,PENR,GET_,PEN ,PENR,PCI_,PCI_,PCI_,PCI_, PCI_,PCI_,PENR,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, @@ -945,12 +953,12 @@ rn21 []={TK1 ,TK1 ,TK1 ,SP1 ,TK1 ,RN1 ,TK1 ,RN1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 , rn21a[]={PCI_,PCI_,PCI_,RNS ,PCI_,SYS_,PCI_,GET_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_, PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_,PCI_}, -*mdtab[] = {sp21, sp21a, tk21, tk21a, cm20, cm20a, cm21, cm21a, cm22, cm22a, +*mdtab[] = {sp21, sp21a, sp22, sp22a, tk21, tk21a, cm21, cm21a, cm22, cm22a, cm23, cm23a, pr21, pr21a, px21, px21a, rn21, rn21a}; struct parse pcbmd = {"MD", lexmark, mdtab, 0, 0, 0, 0}; #undef SP1 +#undef SP2 #undef TK1 -#undef CM0 #undef CM1 #undef CM2 #undef CM3 @@ -1214,29 +1222,29 @@ static UNCH dso dsc pero plus refc rni tagc tago vi */ sp41 []={SP1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, -sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_, +sp41a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,ETIC,INV_,INV_,INV_, INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_}, an41 []={SP1 ,SP1 ,SP1 ,SP2 ,AN1 ,AN1 ,AN1 ,AN1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, -an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,ETIC,INV_,INV_,INV_, +an41a[]={INV_,NTV ,NTV ,NVS ,NOP_,SYS_,EOF_,GET_,RS_ ,NTV ,ETIC,INV_,INV_,INV_, INV_,DSC ,INV_,INV_,INV_,INV_,TAGC,TAGO,INV_}, sp42 []={SP1 ,SP1 ,SP1 ,SP1 ,VI1 ,SP2 ,SP2 ,SP2 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 }, -sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_, +sp42a[]={INV_,LEN_,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,NASV,INV_,INV_,INV_, INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_}, /* bit nmc num nms spc non ee eob rs com eti grpo lit lita dso dsc pero plus refc rni tagc tago vi */ -vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 , +vi41 []={SP1 ,AN1 ,AN1 ,AN1 ,VI1 ,VI1 ,VI1 ,VI1 ,VI1 ,AN1 ,SP1 ,SP1 ,SP1 ,SP1 , SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,AV1 }, -vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,NASV,INV_,INV_,INV_, +vi41a[]={INV_,NASV,NASV,NASV,NOP_,SYS_,EOF_,GET_,RS_ ,NASV,NASV,INV_,INV_,INV_, INV_,NASV,INV_,INV_,INV_,INV_,NASV,NASV,NOP_}, av41 []={SP1 ,SP1 ,SP1 ,SP1 ,AV1 ,AV1 ,AV1 ,AV1 ,AV1 ,SP1 ,SP1 ,SP1 ,AN1 ,AN1 , SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 ,SP1 }, -av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,INV_,INV_,INV_,AVD ,AVDA, +av41a[]={INV_,AVU ,AVU ,AVU ,NOP_,SYS_,EOF_,GET_,RS_ ,AVU ,INV_,INV_,AVD ,AVDA, INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_,INV_}, *stagtab[] = {sp41, sp41a, an41, an41a, sp42, sp42a, vi41, vi41a, av41, av41a}; @@ -1303,42 +1311,41 @@ struct parse pcbeal = {"EAL", lexgrp, ealtab, 0, 0, 0, 0}; /* Symbols for state names. */ -#define SP1 0 /* Separator before token expected. */ -#define TK1 2 /* Token expected. */ -#define CM0 4 /* COM[1] found when sep expected: possible comment.*/ +#define SP1 0 /* Separator before token expected (but not -) */ +#define SP2 2 /* Separator before token expected. */ +#define TK1 4 /* Token expected. */ #define CM1 6 /* COM[1] found: possible comment.*/ #define CM2 8 /* COM[2] found; in comment. */ #define CM3 10 /* Ending COM[1] found; end comment or continue it. */ - static UNCH /* sig dat num nms spc non ee eob rs com lit lita tagc */ - -sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,CM0 ,TK1 ,TK1 ,SP1 }, -sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD}, - -tk31 []={TK1 ,TK1 ,SP1 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 }, + +sp31 []={SP1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,SP1 ,SP1 ,TK1 ,SP1 ,TK1 ,TK1 ,SP1 }, +sp31a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,LEN_,LIT1,LIT2,ESGD}, + +sp32 []={SP2 ,SP2 ,SP2 ,SP2 ,TK1 ,SP2 ,SP2 ,SP2 ,TK1 ,CM1 ,TK1 ,TK1 ,SP2 }, +sp32a[]={INV_,ISIG,LEN_,LEN_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD}, + +tk31 []={TK1 ,TK1 ,SP2 ,SP1 ,TK1 ,TK1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,TK1 ,SP1 }, tk31a[]={INV_,ISIG,NUM1,NAS1,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,LIT1,LIT2,ESGD}, - -cm30 []={SP1 ,CM0 ,SP1 ,SP1 ,SP1 ,CM0 ,SP1 ,CM0 ,SP1 ,CM2 ,SP1 ,SP1 ,SP1 }, -cm30a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_}, - + cm31 []={TK1 ,CM1 ,TK1 ,TK1 ,TK1 ,CM1 ,TK1 ,CM1 ,TK1 ,CM2 ,TK1 ,TK1 ,TK1 }, cm31a[]={PCI_,ISIG,PCI_,PCI_,PCI_,SYS_,PCI_,GET_,PCI_,NOP_,PCI_,PCI_,PCI_}, - + cm32 []={CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,CM2 ,TK1 ,CM2 ,CM2 ,CM3 ,CM2 ,CM2 ,CM2 }, cm32a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_}, - + cm33 []={CM2 ,CM3 ,CM2 ,CM2 ,CM2 ,CM3 ,TK1 ,CM3 ,CM2 ,TK1 ,CM2 ,CM2 ,CM2 }, cm33a[]={NOP_,ISIG,NOP_,NOP_,NOP_,SYS_,EOF_,GET_,RS_ ,NOP_,NOP_,NOP_,NOP_}, -*sdtab[]={sp31, sp31a, tk31, tk31a, cm30, cm30a, cm31, cm31a, cm32, cm32a, +*sdtab[]={sp31, sp31a, sp32, sp32a, tk31, tk31a, cm31, cm31a, cm32, cm32a, cm33, cm33a}; struct parse pcbsd = {"SD", lexsd, sdtab, 0, 0, 0, 0}; #undef SP1 +#undef SP2 #undef TK1 -#undef CM0 #undef CM1 #undef CM2 #undef CM3 diff --git a/usr.bin/sgmls/sgmls/portproc.c b/usr.bin/sgmls/sgmls/portproc.c index 0bb24316fe0e..a057d248e2fd 100644 --- a/usr.bin/sgmls/sgmls/portproc.c +++ b/usr.bin/sgmls/sgmls/portproc.c @@ -12,6 +12,7 @@ #include "std.h" #include "entity.h" #include "appl.h" +#include "alloc.h" /* This code shows how you might use system() to implement run_process(). ANSI C says very little about the behaviour of system(), and so this diff --git a/usr.bin/sgmls/sgmls/serv.c b/usr.bin/sgmls/sgmls/serv.c index 68b5fe12e966..b9699d236b91 100644 --- a/usr.bin/sgmls/sgmls/serv.c +++ b/usr.bin/sgmls/sgmls/serv.c @@ -72,7 +72,7 @@ UNCH *s; /* String to be hashed. */ int hashsize; /* Size of hash table array. */ { unsigned long h = 0, g; - + while (*s != 0) { h <<= 4; h += *s++; diff --git a/usr.bin/sgmls/sgmls/sgml1.c b/usr.bin/sgmls/sgmls/sgml1.c index a2808f4dd6d1..c138c9fdd6ed 100644 --- a/usr.bin/sgmls/sgmls/sgml1.c +++ b/usr.bin/sgmls/sgmls/sgml1.c @@ -195,8 +195,8 @@ static UNCH sgmltab[][11] = { {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*nr2*/ {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_}, - {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR1 ,ST1 ,NR2 ,ST1 },/*st2*/ - {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,REF_,CON_,LOP_,EOD_}, + {DA1 ,DA1 ,ST2 ,ST2 ,ST2 ,ST2 ,ST2 ,NR2 ,ST1 ,NR2 ,ST1 },/*st2*/ + {CON_,ETG_,MD_ ,MDC_,MSS_,MSE_,PIS_,LOP_,CON_,LOP_,EOD_}, }; int scbsgmst = ST1; /* SCBSGML: trailing stag or markup; ignore RE. */ int scbsgmnr = NR1; /* SCBSGML: new record; do not ignore RE. */ @@ -257,6 +257,7 @@ struct switches *swp; TRACEPRO(); /* Set trace switches for prolog. */ msginit(swp); ioinit(swp); + entginit(swp); sdinit(); return &lex.m; } @@ -323,13 +324,15 @@ struct sgmlcap *p; p->limit = sd.capacity; p->name = captab; - for (i = 0; i < NCAPACITY; i++) { - long excess = capnumber[i]*cappoints[i] - sd.capacity[i]; - if (excess > 0) { - char buf[sizeof(long)*3 + 1]; - sprintf(buf, "%ld", excess); - sgmlerr(162, (struct parse *)0, - (UNCH *)captab[i], (UNCH *)buf); + if (sw.swcap) { + for (i = 0; i < NCAPACITY; i++) { + long excess = capnumber[i]*cappoints[i] - sd.capacity[i]; + if (excess > 0) { + char buf[sizeof(long)*3 + 1]; + sprintf(buf, "%ld", excess); + sgmlerr(162, (struct parse *)0, + (UNCH *)captab[i], (UNCH *)buf); + } } } } @@ -370,7 +373,7 @@ PNE *np; UNCH **tp; { PECB ep; /* Pointer to an entity control block. */ - + ep = entfind(iname); if (!ep) return -1; @@ -412,6 +415,11 @@ int sgmlgcnterr() return msgcnterr(); } +char *getsubst() +{ + return (char *)lextran; +} + /* This is for error handling functions that want to print a gi backtrace. */ UNCH *getgi(i) diff --git a/usr.bin/sgmls/sgmls/sgml2.c b/usr.bin/sgmls/sgmls/sgml2.c index 83bccbd371a8..df75b6aebaec 100644 --- a/usr.bin/sgmls/sgmls/sgml2.c +++ b/usr.bin/sgmls/sgmls/sgml2.c @@ -49,13 +49,12 @@ UNCH *ename; /* Entity name (with length and EOS). */ /* Get the entity control block, if the entity has been defined. */ if ((ecb = (PECB)hfind((THASH)etab, ename, hash(ename, ENTHASH)))==0 || ecb->estore == 0) { - if ( ename[1]==lex.d.pero - || ecbdeflt==0 - || (ecb = usedef(ename))==0 ) { - sgmlerr(ename[1] == lex.d.pero || ecbdeflt == 0 ? 35 : 150, - (struct parse *)0, ename+1, (UNCH *)0); + if (ename[1] == lex.d.pero || ecbdeflt == 0) { + sgmlerr(35, (struct parse *)0, ename+1, (UNCH *)0); return(ENTUNDEF); } + else + ecb = usedef(ename); } return(entopen(ecb)); } @@ -74,6 +73,7 @@ struct entity *ecb; /* Entity control block. */ sgmlerr(34, (struct parse *)0, ecb->ename+1, ntoa(ENTLVL)); return(ENTMAX); } + if (docelsw) sgmlerr(234, (struct parse *)0, (UNCH *)0, (UNCH *)0); /* If entity is an etd, pi, or data, return it without creating an scb. */ switch (ecb->estore) { case ESN: @@ -99,6 +99,8 @@ struct entity *ecb; /* Entity control block. */ case ESC: case ESX: datalen = ustrlen(ecb->etx.c); + /* Ignore reference to empty CDATA entity. */ + if (datalen == 0 && ecb->estore == ESC) return(0); data = ecb->etx.c; entdatsw = (ecb->estore==ESC) ? CDECONT : SDECONT; return(ENTDATA); @@ -169,7 +171,8 @@ int entget() { RSCC += (CCO = FPOS-FBUF); /* Characters-in-record (ignore EOB/EOF). */ - tagctr += CCO; /* Update tag length counter. */ + if (es == tages) + tagctr += CCO; /* Update tag length counter. */ switch (*FPOS) { case EOBCHAR: /* End of file buffer: refill it. */ rbufs[-2] = FPOS[-2]; @@ -227,9 +230,10 @@ UNCH *ename; /* Entity name (with length and EOS). */ else { /* Move entity name into fpi. */ fpidf.fpinm = ename + 1; - if ((etx.x = entgen(&fpidf))==0) return (PECB)0; + if ((etx.x = entgen(&fpidf))==0) + sgmlerr(150, (struct parse *)0, ename + 1, (UNCH *)0); if (estore==ESN) { - memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ); + memcpy((UNIV)(pne=(PNE)rmalloc((UNS)NESZ)),(UNIV)ecbdeflt->etx.n,(UNS)NESZ); NEID(pne) = etx.x; etx.n = pne; } @@ -288,7 +292,8 @@ int es; /* Local index to scbs. */ SCB.pushback = FPOS[-1]; FBUF = 0; /* Indicate pending file. */ RSCC += off; /* Update characters-in-record counter. */ - tagctr += off; /* Update tag length counter. */ + if (es == tages) + tagctr += off; /* Update tag length counter. */ iopend(SCBFCB, off, rbufs); return; } @@ -386,7 +391,7 @@ UNCH *parm2; /* Additional parameters (or NULL). */ { struct error err; errorinit(&err, subdcl ? MDERR : MDERR2, number); - err.parmno = parmno; + err.parmno = parmno; err.subdcl = subdcl; err.eparm[0] = (UNIV)parm1; err.eparm[1] = (UNIV)parm2; @@ -427,6 +432,24 @@ UNCH *parm2; /* Error message parameters. */ scbset(); return msgsave(&err); } +/* SAVMDERR: Save an md error for possible later use. +*/ +UNIV savmderr(number, parm1, parm2) +UNS number; /* Error number. */ +UNCH *parm1; /* Additional parameters (or NULL). */ +UNCH *parm2; /* Additional parameters (or NULL). */ +{ + struct error err; + errorinit(&err, subdcl ? MDERR : MDERR2, number); + err.parmno = parmno; + err.subdcl = subdcl; + err.eparm[0] = (UNIV)parm1; + err.eparm[1] = (UNIV)parm2; + err.errsp = (sizeof(pcbtab)/sizeof(pcbtab[0])) + ptrsrch(mdnmtab, + (UNIV)mdname); + scbset(); + return msgsave(&err); +} /* SVDERR: Print a saved error. */ VOID svderr(p) diff --git a/usr.bin/sgmls/sgmls/sgmlaux.h b/usr.bin/sgmls/sgmls/sgmlaux.h index f87ac8b806ed..6073e6679989 100644 --- a/usr.bin/sgmls/sgmls/sgmlaux.h +++ b/usr.bin/sgmls/sgmls/sgmlaux.h @@ -51,6 +51,7 @@ VOID ioinit P((struct switches *)); char *ioflid P((UNIV)); UNIV entgen P((struct fpi *)); +VOID entginit P((struct switches *)); VOID msgprint P((struct error *)); VOID msginit P((struct switches *)); @@ -68,3 +69,4 @@ UNIV rmalloc P((unsigned int)); UNIV rrealloc P((UNIV, UNS)); VOID frem P((UNIV)); VOID exiterr P((unsigned int,struct parse *)); +char *getsubst P((void)); diff --git a/usr.bin/sgmls/sgmls/sgmldecl.c b/usr.bin/sgmls/sgmls/sgmldecl.c index d9f06b3bfbc8..6ef6b68a5365 100644 --- a/usr.bin/sgmls/sgmls/sgmldecl.c +++ b/usr.bin/sgmls/sgmls/sgmldecl.c @@ -9,6 +9,7 @@ /* Symbolic names for the error numbers that are be generated only by this module. */ +#define E_SHUNCHAR 159 #define E_STANDARD 163 #define E_SIGNIFICANT 164 #define E_BADLIT 165 @@ -60,6 +61,7 @@ this module. */ #define E_NMBAD 222 #define E_NMMINUS 223 #define E_UNKNOWNSET 227 +#define E_TOTALCAP 235 #define CANON_NMC '.' /* Canonical name character. */ #define CANON_NMS 'A' /* Canonical name start character. */ @@ -163,21 +165,21 @@ in a buffer intended for a literal.) */ /* Table of quantity names. Must match Q* in sgmldecl.h. */ static char *quantity_names[] = { - "ATTCNT", - "ATTSPLEN", - "BSEQLEN", - "DTAGLEN", - "DTEMPLEN", - "ENTLVL", - "GRPCNT", - "GRPGTCNT", - "GRPLVL", - "LITLEN", - "NAMELEN", - "NORMSEP", - "PILEN", - "TAGLEN", - "TAGLVL", + "ATTCNT", + "ATTSPLEN", + "BSEQLEN", + "DTAGLEN", + "DTEMPLEN", + "ENTLVL", + "GRPCNT", + "GRPGTCNT", + "GRPLVL", + "LITLEN", + "NAMELEN", + "NORMSEP", + "PILEN", + "TAGLEN", + "TAGLVL", }; static int max_quantity[] = { @@ -253,9 +255,18 @@ static int systemcharset[] = { 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, }; +/* This is a private use designating sequence that by convention +refers to the whole system character set whatever it is. */ + +#define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0" + static struct pmap charset_map[] = { - { "ESC 2/5 4/0", (UNIV)asciicharset }, /* ISO 646 IRV */ - { "ESC 2/8 4/2", (UNIV)asciicharset }, /* ISO Registration Number 6, ASCII */ + { "ESC 2/5 4/0", (UNIV)iso646charset }, /* ISO 646 IRV */ + { "ESC 2/8 4/2", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */ + { "ESC 2/8 4/0", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */ + { "ESC 2/13 4/1", (UNIV)iso8859_1charset }, /* Latin 1 */ + { "ESC 2/1 4/0", (UNIV)iso646C0charset }, /* ISO 646, C0 */ + { "ESC 2/2 4/3", (UNIV)iso6429C1charset }, /* ISO 6429, C1 */ { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset }, /* system character set */ { 0 } @@ -272,7 +283,9 @@ static UNCH char_flags[256]; static int done_nonsgml = 0; static UNCH *nlextoke = 0; /* new lextoke */ static UNCH *nlextran = 0; /* new lextran */ - +#define MAX_SAVED_ERRS 4 +static UNIV saved_errs[MAX_SAVED_ERRS]; +static int nsaved_errs = 0; static UNCH kcharset[] = "CHARSET"; static UNCH kbaseset[] = "BASESET"; @@ -327,7 +340,7 @@ static UNCH kquantity[] = "QUANTITY"; static UNIV pmaplookup P((struct pmap *, char *)); static UNCH *ltous P((long)); -static VOID sdfixstandard P((UNCH *)); +static VOID sdfixstandard P((UNCH *, int)); static int sdparm P((UNCH *, struct parse *)); static int sdname P((UNCH *, UNCH *)); static int sdckname P((UNCH *, UNCH *)); @@ -353,6 +366,7 @@ static int sdnames P((UNCH *)); static int sdquantity P((UNCH *)); static int sdfeatures P((UNCH *)); static int sdappinfo P((UNCH *)); +static VOID sdsaverr P((UNS, UNCH *, UNCH *)); static VOID bufsalloc P((void)); static VOID bufsrealloc P((void)); @@ -377,11 +391,25 @@ int sgmldecl() parmno = 0; mdname = sgmlkey; subdcl = NULL; + nsaved_errs = 0; for (i = 0; i < SIZEOF(section); i++) if ((*section[i])(tbuf) == FAIL) { errsw = 1; break; } + if (sd.formal) { + /* print saved errors */ + int i; + for (i = 0; i < nsaved_errs; i++) + svderr(saved_errs[i]); + } + else { + /* free saved errors */ + int i; + for (i = 0; i < nsaved_errs; i++) + msgsfree(saved_errs[i]); + } + if (!errsw) setlexical(); bufsrealloc(); @@ -406,7 +434,7 @@ UNCH *tbuf; sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } - sdfixstandard(tbuf); + sdfixstandard(tbuf, 0); if (ustrcmp(tbuf, standard) != 0) sderr(E_BADVERSION, tbuf, standard); return SUCCESS; @@ -426,6 +454,7 @@ UNCH *tbuf; if (sdcsdesc(tbuf, status) == FAIL) return FAIL; +#if 0 for (i = 128; i < 256; i++) if (status[i] != UNDESC) break; @@ -437,11 +466,14 @@ UNCH *tbuf; sderr(E_7BIT, (UNCH *)0, (UNCH *)0); #endif } +#endif /* Characters that are declared UNUSED in the document character set are assigned to non-SGML. */ for (i = 0; i < 256; i++) { if (status[i] == UNDESC) { +#if 0 sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0); +#endif char_flags[i] |= CHAR_NONSGML; } else if (status[i] == UNUSED) @@ -491,9 +523,9 @@ int *status; fpi.fpipubis = tbuf; /* Give a warning if it is not a CHARSET fpi. */ if (parsefpi(&fpi)) - sderr(E_FORMAL, (UNCH *)0, (UNCH *)0); + sdsaverr(E_FORMAL, (UNCH *)0, (UNCH *)0); else if (fpi.fpic != FPICHARS) - sderr(E_BADCLASS, kcharset, (UNCH *)0); + sdsaverr(E_BADCLASS, kcharset, (UNCH *)0); else { fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0'; baseset = (int *)pmaplookup(charset_map, @@ -547,10 +579,12 @@ int *status; int n = basenum + (i - start); if (n < 0 || n > 255) sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0); - else if (baseset[n] == UNUSED) - sderr(E_BADBASECHAR, ltous((long)n), (UNCH *)0); - else + else { + if (baseset[n] == UNUSED) + sderr(E_BADBASECHAR, ltous((long)n), + (UNCH *)0); status[i] = baseset[n]; + } } } } @@ -570,6 +604,7 @@ static int sdcapacity(tbuf) UNCH *tbuf; { int ncap; + int i; if (sdckname(tbuf, kcapacity) == FAIL) return FAIL; @@ -609,7 +644,9 @@ UNCH *tbuf; sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0); return FAIL; } - + for (i = 1; i < NCAPACITY; i++) + if (sd.capacity[i] > sd.capacity[0]) + sderr(E_TOTALCAP, (UNCH *)captab[i], (UNCH *)0); return SUCCESS; } @@ -624,7 +661,7 @@ UNCH *tbuf; sderr(123, (UNCH *)0, (UNCH *)0); return FAIL; } - sdfixstandard(tbuf); + sdfixstandard(tbuf, 1); ptr = pmaplookup(capset_map, (char *)tbuf); if (!ptr) sderr(E_CAPSET, tbuf, (UNCH *)0); @@ -680,7 +717,7 @@ UNCH *tbuf; int nswitches; if (sdparm(tbuf, &pcblitv) != LIT1) return FAIL; - sdfixstandard(tbuf); + sdfixstandard(tbuf, 1); if (ustrcmp(tbuf, CORE_SYNTAX) == 0) sd.shortref = 0; else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0) @@ -769,7 +806,7 @@ UNCH *tbuf; } } if (pcbsd.action != NUM1) { - sderr(E_XNUM, (UNCH *)0, (UNCH *)0); + sderr(E_SHUNCHAR, (UNCH *)0, (UNCH *)0); return FAIL; } do { @@ -944,7 +981,7 @@ UNCH *tbuf; return FAIL; } start[i] = bufi; - + for (s = tbuf; *s; s++) { int c = *s; if (c == DELNONCH) { @@ -955,8 +992,7 @@ UNCH *tbuf; if (c < 0) bad = 1; else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC)) - && c != '.' && c != '-' - && !(c == '_' && i >= 2)) { + && c != '.' && c != '-') { int class = lextoke[c]; if (class == SEP || class == SP || class == NMC || class == NMS || class == NU) @@ -996,7 +1032,7 @@ UNCH *tbuf; nlextoke[uc] = NMS; nlextran[lc] = uc; } - + for (i = 0; i < count[2]; i++) { UNCH lc = buf[start[2] + i]; UNCH uc = buf[start[3] + i]; @@ -1149,7 +1185,7 @@ UNCH *tbuf; for (i = 0; i < NKEYS; i++) if (newkey[i][0] != '\0') { UNCH temp[REFNAMELEN + 1]; - + ustrcpy(temp, key[i]); ustrcpy(key[i], newkey[i]); ustrcpy(newkey[i], temp); @@ -1304,11 +1340,13 @@ UNCH *tbuf; /* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to the standard requires the latter. */ -static VOID sdfixstandard(tbuf) +static VOID sdfixstandard(tbuf, silently) UNCH *tbuf; +int silently; { if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) { - sderr(E_STANDARD, (UNCH *)0, (UNCH *)0); + if (!silently) + sderr(E_STANDARD, (UNCH *)0, (UNCH *)0); tbuf[8] = ':'; } } @@ -1389,7 +1427,7 @@ VOID sdinit() { int i; /* Shunned character numbers in the reference concrete syntax. */ - static UNCH refshun[] = { + static UNCH refshun[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 }; @@ -1435,7 +1473,7 @@ static VOID bufsrealloc() { UNS size; - + if (ENTLVL != REFENTLVL) scbs = (struct source *)rrealloc((UNIV)scbs, (ENTLVL+1)*sizeof(struct source)); @@ -1463,7 +1501,7 @@ static VOID setlexical() { int i; UNCH **p; - + if (nlextoke) { /* Handle characters that were made significant by the NAMING section. */ @@ -1493,7 +1531,7 @@ static VOID setlexical() } } - + /* Now munge the lexical tables. */ for (p = lextabs; *p; p++) { UNCH nonclass = (*p)[CANON_NONSGML]; @@ -1513,6 +1551,12 @@ static VOID setlexical() } else if (!(char_flags[i] & CHAR_SIGNIFICANT)) (*p)[i] = datclass; + else if (*p == lexmin) { + /* If it used to be NONSGML, but its now significant, + treat it like a datachar. */ + if ((*p)[i] == nonclass) + (*p)[i] = datclass; + } else if (nlextoke /* This relies on the fact that lextoke occurs last in lextabs. */ @@ -1544,7 +1588,7 @@ static VOID setlexical() frem((UNIV)nlextoke); nlextoke = 0; } - + } /* Munge parse tables so that empty start and end tags are not recognized. */ @@ -1553,7 +1597,7 @@ static VOID noemptytag() { static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc }; int i; - + for (i = 0; i < SIZEOF(pcbs); i++) { int maxclass, maxstate; int j, k, act; @@ -1617,11 +1661,10 @@ FILE *fp; char lcletter[256]; /* LC letters: a-z */ fprintf(fp, "\n"); } +/* Save an error to be printed only if FORMAL is declared as YES. */ + +static +VOID sdsaverr(number, parm1, parm2) +UNS number; +UNCH *parm1; +UNCH *parm2; +{ + saved_errs[nsaved_errs++] = savmderr(number, parm1, parm2); +} + /* Local Variables: c-indent-level: 5 diff --git a/usr.bin/sgmls/sgmls/sgmldecl.h b/usr.bin/sgmls/sgmls/sgmldecl.h index 296bdb8ce4b5..1111f72fca7a 100644 --- a/usr.bin/sgmls/sgmls/sgmldecl.h +++ b/usr.bin/sgmls/sgmls/sgmldecl.h @@ -1,15 +1,15 @@ /* sgmldecl.h: SGML declaration parsing. */ -#define QATTCNT 0 -#define QATTSPLEN 1 -#define QBSEQLEN 2 -#define QDTAGLEN 3 -#define QDTEMPLEN 4 -#define QENTLVL 5 -#define QGRPCNT 6 -#define QGRPGTCNT 7 -#define QGRPLVL 8 -#define QLITLEN 9 +#define QATTCNT 0 +#define QATTSPLEN 1 +#define QBSEQLEN 2 +#define QDTAGLEN 3 +#define QDTEMPLEN 4 +#define QENTLVL 5 +#define QGRPCNT 6 +#define QGRPGTCNT 7 +#define QGRPLVL 8 +#define QLITLEN 9 #define QNAMELEN 10 #define QNORMSEP 11 #define QPILEN 12 @@ -18,16 +18,16 @@ #define NQUANTITY (QTAGLVL+1) -#define TOTALCAP 0 -#define ENTCAP 1 -#define ENTCHCAP 2 -#define ELEMCAP 3 -#define GRPCAP 4 -#define EXGRPCAP 5 -#define EXNMCAP 6 -#define ATTCAP 7 -#define ATTCHCAP 8 -#define AVGRPCAP 9 +#define TOTALCAP 0 +#define ENTCAP 1 +#define ENTCHCAP 2 +#define ELEMCAP 3 +#define GRPCAP 4 +#define EXGRPCAP 5 +#define EXNMCAP 6 +#define ATTCAP 7 +#define ATTCHCAP 8 +#define AVGRPCAP 9 #define NOTCAP 10 #define NOTCHCAP 11 #define IDCAP 12 @@ -81,4 +81,10 @@ extern struct sgmldecl sd; #define UNDESC -3 #define UNKNOWN_SET -4 -extern int asciicharset[]; +extern int iso646charset[]; +extern int iso646G0charset[]; +extern int iso646C0charset[]; +extern int iso8859_1charset[]; +extern int iso6429C1charset[]; + + diff --git a/usr.bin/sgmls/sgmls/sgmlfnsm.h b/usr.bin/sgmls/sgmls/sgmlfnsm.h index 0d617fb32aa5..3003d6766f12 100644 --- a/usr.bin/sgmls/sgmls/sgmlfnsm.h +++ b/usr.bin/sgmls/sgmls/sgmlfnsm.h @@ -86,6 +86,7 @@ UNCH *pubfield P((UNCH *,UNCH *,UNCH,UNS *)); UNCH *replace P((UNCH *,UNCH *)); UNCH *sandwich P((UNCH *,UNCH *,UNCH *)); UNIV saverr P((unsigned int,struct parse *,UNCH *,UNCH *)); +UNIV savmderr P((unsigned int,UNCH *,UNCH *)); VOID scbset P((void)); VOID sdinit P((void)); VOID setcurchar P((int)); diff --git a/usr.bin/sgmls/sgmls/sgmlio.c b/usr.bin/sgmls/sgmls/sgmlio.c index 3db1d0d80990..c78bb7a6f044 100644 --- a/usr.bin/sgmls/sgmls/sgmlio.c +++ b/usr.bin/sgmls/sgmls/sgmlio.c @@ -191,7 +191,7 @@ int *newfilep; struct iofcb *f = (struct iofcb *)p; FILE *fp; int c; - + *newfilep = 0; if (f->first) { buf[i] = EOBCHAR; diff --git a/usr.bin/sgmls/sgmls/sgmlmsg.c b/usr.bin/sgmls/sgmls/sgmlmsg.c index 454bc3ebb7f6..4d98c55b58c1 100644 --- a/usr.bin/sgmls/sgmls/sgmlmsg.c +++ b/usr.bin/sgmls/sgmls/sgmlmsg.c @@ -64,16 +64,16 @@ static char *headers[] = { /* Indexes into headers[] */ -#define HDRPFX 0 -#define HDRALL 1 -#define HDRUNSUP 2 -#define HDRSYS 3 -#define HDRWARN 4 -#define HDRLOC 5 -#define HDRELOC 6 -#define HDRMD 7 -#define HDRMD2 8 -#define HDRMODE 9 +#define HDRPFX 0 +#define HDRALL 1 +#define HDRUNSUP 2 +#define HDRSYS 3 +#define HDRWARN 4 +#define HDRLOC 5 +#define HDRELOC 6 +#define HDRMD 7 +#define HDRMD2 8 +#define HDRMODE 9 #define HDREOF 10 #define HDREE 11 #define HDRRS 12 @@ -229,7 +229,7 @@ struct error *e; } else indent = 4; - + for (toplevel = 0; getlocation(toplevel, &loc); toplevel++) if (loc.filesw) { prevfilelevel = filelevel; @@ -285,7 +285,7 @@ struct error *e; hdrcode = HDRUNSUP; else hdrcode = HDRALL; - + xfprintf(efp, getheader(hdrcode), type, severity, e->errnum); if (filelevel >= 0) { @@ -301,9 +301,9 @@ struct error *e; } } } - + /* It is necessary to copy the result of getparm() because - the specification of catgets() says in can return a + the specification of catgets() says in can return a pointer to a static buffer which may get overwritten by the next call to catgets(). */ @@ -388,14 +388,14 @@ int indent; { int i = 1; UNCH *gi; - + gi = getgi(i); if (!gi) return; spaces(efp, indent); xfprintf(efp, getheader(HDRELT)); do { - fprintf(efp, " %s", gi); + fprintf(efp, " %s", (char *)gi); gi = getgi(++i); } while (gi); putc('\n', efp); diff --git a/usr.bin/sgmls/sgmls/sgmls.1 b/usr.bin/sgmls/sgmls/sgmls.1 index b9967a0a5fb5..634601b0aea9 100644 --- a/usr.bin/sgmls/sgmls/sgmls.1 +++ b/usr.bin/sgmls/sgmls/sgmls.1 @@ -44,6 +44,9 @@ Standard Generalized Markup Language [ .BI \-i name ] +[ +.BI \-m file +] .if \n(Tr \{\ [ .BI \-x flags @@ -77,7 +80,8 @@ can also be used to refer to the standard input. The following options are available: .TP .BI \-c file -Write a report of capacity usage to +Report any capacity limits that are exceeded +and write a report of capacity usage to .IR file . The report is in the format of a RACT result. RACT is the Reference Application for Capacity Testing defined in the @@ -137,6 +141,17 @@ Output .B L commands giving the current line number and filename. .TP +.BI \-m file +Map public identifiers and entity names to system identifiers +using the catalog entry file +.IR file . +Multiple +.B \-m +options are allowed. +Catalog entry files specified with the +.B -m +option will be searched before the defaults. +.TP .B \-p Parse only the prolog. .I Sgmls @@ -153,7 +168,6 @@ Error messages will still be printed. .TP .B \-u Warn about undefined elements: elements used in the DTD but not defined. -Also warn about undefined short reference maps. .TP .B \-v Print the version number. @@ -223,11 +237,80 @@ interpreted as a list of filenames separated by A filename of .B \- can be used to refer to the standard input. -If no system identifier is supplied, then the entity manager will -attempt to generate a filename using the public identifier -(if there is one) and other information available to it. -Notation identifiers are not subject to this treatment. -This process is controlled by the environment variable +.LP +If a system identifier is not specified, +then the entity manager can generate one using catalog +entry files in the format defined in the SGML Open Draft Technical +Resolution on Entity Management. A catalog entry file contains a +sequence of entries in one of the following four forms: +.TP +.BI PUBLIC\ pubid\ sysid +This specifies that +.I sysid +should be used as the system identifier if the the public +identifier is +.IR pubid . +.I Sysid +is a system identifier as defined in ISO 8879 and +.I pubid +is a public identifier as defined in ISO 8879. +.TP +.BI ENTITY\ name\ sysid +This specifies that +.I sysid +should be used as the system identifier if the entity is a general +entity whose name is +.IR name . +.TP +.BI ENTITY\ % name\ sysid +This specifies that +.I sysid +should be used as the system identifier if the entity is a parameter +entity whose name is +.IR name . +Note that there is no space between the +.B % +and the +.IR name . +.TP +.BI DOCTYPE\ name\ sysid +This specifies that +.I sysid +should be used as the system identifier if the entity is an +entity declared in a document type declaration whose document type name is +.IR name . +.LP +The last two forms are extensions to the SGML Open format. +The delimiters can be omitted from the +.I sysid +provided it does not contain any white space. +Comments are allowed between parameters delimited by +.B -- +as in SGML. +The environment variable +.B \s-1SGML_CATALOG_FILES\s0 +contains a +.if \n(Os=0 colon-separated +.if \n(Os=1 semicolon-separated +list of catalog entry files. +These will be searched after any catalog entry files specified +using the +.B \-m +option. +If this environment variable is not set, +then a system dependent list of catalog entry files will be used. +A match in a catalog entry file for a PUBLIC entry will take +precedence over a match in the same file for an ENTITY +or DOCTYPE entry. +A filename in a system identifier in a catalog entry file +is interpreted relative to the directory containing the catalog +entry file. +.LP +If no match can be found in a catalog entry file, then the entity +manager will attempt to generate a filename using the public +identifier (if there is one) and other information available to it. +Notation identifiers are not subject to this treatment. This process +is controlled by the environment variable .BR \s-1SGML_PATH\s0 ; this contains a .if \n(Os=0 colon-separated @@ -238,18 +321,6 @@ substitution fields; a substitution field is a .B % character followed by a single letter that indicates the value of the substitution. -If -.B \s-1SGML_PATH\s0 -uses the -.B %S -field (the value of which is the system identifier), -then the entity manager will also use -.B \s-1SGML_PATH\s0 -to generate a filename -when a system identifier that does not contain any -.if \n(Os=0 colons -.if \n(Os=1 semi-colons -is supplied. The value of a substitution can either be a string or it can be .IR null . @@ -395,6 +466,27 @@ does not allow a display version or if no version was specified. If an empty version was specified, a value of .B default will be used. +.LP +Normally if the external identifier for an entity includes a system +identifier, the entity manager will use the specified system +identifier and not attempt to generate one. +If, however, +.B \s-1SGML_PATH\s0 +uses the +.B %S +field, +then the entity manager will first search for a matching +entry in the catalog entry files. +If a match is found, then this will be used instead of the +specified system identifier. +Otherwise, +if the specified system identifier does not contain any +.if \n(Os=0 colons, +.if \n(Os=1 semi-colons, +the entity manager will use +.B \s-1SGML_PATH\s0 +to generate a filename. +Otherwise the entity manager will use the specified system identifier. .br .ne 18 .SS "System declaration" @@ -442,6 +534,10 @@ SDIF &PACK&NO&UNPACK&NO .TE .LP +Exceeding a capacity limit will be ignored unless the +.B \-c +option is given. +.LP The memory usage of .I sgmls is not a function of the capacity points used by a document; @@ -465,18 +561,6 @@ The shunned character numbers can be changed. .LP Eight bit characters can be assigned to \s-1LCNMSTRT\s0, \s-1UCNMSTRT\s0, \s-1LCNMCHAR\s0 and \s-1UCNMCHAR\s0. -Declaring this requires that the syntax reference character set be declared -like this: -.RS -.ne 3 -.TS -tab(&); -l l. -BASESET&"ISO Registration Number 100//CHARSET -&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1" -DESCSET&0\0256\00 -.TE -.RE .LP Uppercase substitution can be performed or not performed both for entity names and for other names. @@ -544,21 +628,35 @@ APPINFO NONE> .TE with the exception that characters 128 through 254 will be assigned to \s-1DATACHAR\s0. -When exporting documents that use characters in this range, -an accurate description of the upper half of the document character set -should be added to this declaration. -For ISO Latin-1, an appropriate description would be: -.br -.ne 5 +.LP +.I Sgmls +identifies base character sets using the designating sequence in the +public identifier. The following designating sequences are +recognized: .TS tab(&); -l l. -BASESET&"ISO Registration Number 100//CHARSET -&\h'\w'"'u'ECMA-94 Right Part of Latin Alphabet Nr. 1//ESC 2/13 4/1" -DESCSET&128\032\0UNUSED -&160\095\032 -&255\0\01\0UNUSED +c c c c c +c c c c ^ +c c c c ^ +l n n n l. +Designating&ISO&Minimum&Number&Description +Escape&Registration&Character&of& +Sequence&Number&Number&Characters& +_ +ESC 2/5 4/0&-&0&128&full set of ISO 646 IRV +ESC 2/8 4/0&2&33&94&G0 set of ISO 646 IRV +ESC 2/8 4/2&6&33&94&G0 set of ASCII +ESC 2/13 4/1&100&32&96&G1 set of ISO 8859-1 +ESC 2/1 4/0&1&0&32&C0 set of ISO 646 +ESC 2/2 4/3&77&0&32&C1 set of ISO 6429 +ESC 2/5 2/15 3/0&-&0&256&the system character set .TE +.LP +When one of the G0 sets is used as a base set, the characters SPACE +and DELETE are treated as occurring at positions 32 and 127 +respectively; although these characters are not part of the character +sets designated by the escape sequences, this mimics the behaviour of +ISO 2022 with respect to these code positions. .SS "Output format" The output is a series of lines. Lines can be arbitrarily long. diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.c b/usr.bin/sgmls/sgmls/sgmlxtrn.c index d27eb669dcfd..74d7894e36b8 100644 --- a/usr.bin/sgmls/sgmls/sgmlxtrn.c +++ b/usr.bin/sgmls/sgmls/sgmlxtrn.c @@ -29,6 +29,7 @@ int contersw = 0; /* 1=element or #CHARS out of context; 0=valid. */ int datarc = 0; /* Return code for data: DAF_ or REF_. */ int delmscsw = 0; /* 1=DELMSC must be read on return to es==0. */ int didreq = 0; /* 1=required implied tag processed; 0=no. */ +int docelsw = 0; /* 1=had document element; 0=no */ int dostag = 0; /* 1=retry newetd instead of parsing; 0=parse. */ int dtdsw = 0; /* DOCTYPE declaration found: 1=yes; 0=no. */ int entdatsw = 0; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */ @@ -49,6 +50,7 @@ int pss = 0; /* SGMLACT: scbsgml stack level. */ int sgmlsw = 0; /* SGML declaration found: 1=yes; 0=no. */ int stagmin = MINNONE; /* Minimization: NONE, NULL tag, implied by STAG*/ int tagctr = 0; /* Tag source chars read. */ +int tages = -1; /* ES level at start of tag. */ int ts = -1; /* Index of current tag in stack. */ struct parse *propcb = &pcbpro; /* Current PCB for prolog parse. */ int aentctr = 0; /* Number of ENTITY tokens in this att list. */ @@ -70,6 +72,7 @@ struct etd *docetd = 0; /* The etd for the document as a whole. */ struct etd *etagreal = 0; /* Actual or dummy etd that implied this tag. */ struct etd *newetd = 0; /* The etd for a start- or end-tag recognized. */ struct etd *nextetd = 0; /* ETD that must come next (only one choice). */ +struct etd *lastetd = 0; /* most recently ended ETD. */ struct etd *stagreal = 0; /* Actual or dummy etd that implied this tag. */ struct parse *conpcb = 0; /* Current PCB for content parse. */ UNCH *data = 0; /* Pointer to returned data in buffer. */ @@ -78,7 +81,6 @@ UNCH *ptcon = 0; /* Current pointer into tbuf. */ UNCH *ptpro = 0; /* Current pointer into tbuf. */ UNCH *rbufs = 0; /* DOS file read area: start position for read. */ UNCH *subdcl = 0; /* Subject of markup declaration (e.g., GI). */ -int Tstart = 0; /* Save starting token for AND group testing. */ UNS conradn = 0; /* 1=CONREF attribute in list (0=no). */ UNS datalen = 0; /* Length of returned data in buffer. */ UNS entlen = 0; /* Length of TAG or EXTERNAL entity text. */ diff --git a/usr.bin/sgmls/sgmls/sgmlxtrn.h b/usr.bin/sgmls/sgmls/sgmlxtrn.h index f1b0b4b80326..e551200f39f5 100644 --- a/usr.bin/sgmls/sgmls/sgmlxtrn.h +++ b/usr.bin/sgmls/sgmls/sgmlxtrn.h @@ -13,6 +13,7 @@ extern int contersw; /* 1=element or #CHARS out of context; 0=valid. */ extern int datarc; /* Return code for data: DAF_ or REF_. */ extern int delmscsw; /* 1=DELMSC must be read on return to es==0. */ extern int didreq; /* 1=required implied tag processed; 0=no. */ +extern int docelsw; /* 1=had document element; 0=no */ extern int dostag; /* 1=retry newetd instead of parsing; 0=parse. */ extern int dtdsw; /* DOCTYPE declaration found: 1=yes; 0=no. */ extern int entdatsw; /* 2=CDATA entity; 4=SDATA; 8=NDATA; 0=none. */ @@ -32,6 +33,7 @@ extern int pss; /* SGMLACT: scbsgml stack level. */ extern int sgmlsw; /* SGML declaration found: 1=yes; 0=no. */ extern int stagmin; /* Minimization: NONE, NULL tag, implied by STAG*/ extern int tagctr; /* Tag source chars read. */ +extern int tages; /* ES level at start of tag. */ extern int ts; /* Index of current tag in stack. */ extern struct parse *propcb; /* Current PCB for prolog parse. */ extern int aentctr; /* Number of ENTITY tokens in this att list. */ @@ -53,6 +55,7 @@ extern struct etd *docetd; /* The etd for the document as a whole. */ extern struct etd *etagreal; /* Actual or dummy etd that implied this tag. */ extern struct etd *newetd; /* The etd for a start- or end-tag recognized. */ extern struct etd *nextetd; /* ETD that must come next (only one choice). */ +extern struct etd *lastetd; /* Most recently ended ETD. */ extern struct etd *stagreal; /* Actual or dummy etd that implied this tag. */ extern struct parse *conpcb; /* Current PCB for content parse. */ extern UNCH *data; /* Pointer to returned data in buffer. */ @@ -61,7 +64,6 @@ extern UNCH *ptcon; /* Current pointer into tbuf. */ extern UNCH *ptpro; /* Current pointer into tbuf. */ extern UNCH *rbufs; /* DOS file read area: start position for read. */ extern UNCH *subdcl; /* Subject of markup declaration (e.g., GI). */ -extern int Tstart; /* Save starting token for AND group testing. */ extern UNS conradn; /* 1=CONREF attribute in list (0=no). */ extern UNS datalen; /* Length of returned data in buffer. */ extern UNS entlen; /* Length of TAG or EXTERNAL entity text. */ diff --git a/usr.bin/sgmls/sgmls/std.h b/usr.bin/sgmls/sgmls/std.h index 3a9ab4bf64e8..4e6e8564b152 100644 --- a/usr.bin/sgmls/sgmls/std.h +++ b/usr.bin/sgmls/sgmls/std.h @@ -42,13 +42,7 @@ #include #endif /* not BSD_STRINGS */ -#ifdef STRERROR_MISSING -#ifdef USE_PROTOTYPES -extern char *strerror(int); -#else extern char *strerror(); -#endif -#endif /* STRERROR_MISSING */ #ifdef STDLIB_H_MISSING UNIV malloc(); diff --git a/usr.bin/sgmls/sgmls/synxtrn.h b/usr.bin/sgmls/sgmls/synxtrn.h index 75b64715619d..1cdf9a0a63f1 100644 --- a/usr.bin/sgmls/sgmls/synxtrn.h +++ b/usr.bin/sgmls/sgmls/synxtrn.h @@ -32,6 +32,7 @@ struct lexcode { UNCH fce; /* LEXCNM: FRE character as entity reference. */ UNCH fre; /* LEXCON: Free character not an entity ref. */ UNCH litc; /* LEXLMS: Literal close delimiter enabled. */ + UNCH minlitc; /* LEXMIN: Literal close delimiter enabled. */ UNCH msc; /* LEXLMS: Marked section close delim enabled. */ UNCH net; /* LEXCON: Null end-tag delimiter enabled. */ UNCH nonet; /* LEXCON: NET disabled; still used as ETI. */ @@ -50,6 +51,7 @@ extern UNCH lexcnm[]; /* Lexical table: mixed content. */ extern UNCH lexcon[]; /* Lexical table for content (except mixed). */ extern UNCH lexgrp[]; /* Lexical table for groups. */ extern UNCH lexlms[]; /* Lexical table: literals and marked sections. */ +extern UNCH lexmin[]; /* Lexical table: minimum data literal. */ extern UNCH lexmark[]; /* Lexical table for markup. */ extern UNCH lexsd[]; /* Lexical table for SGML declaration. */ extern UNCH lextran[]; /* Case translation table for SGML names. */ diff --git a/usr.bin/sgmls/sgmls/trace.h b/usr.bin/sgmls/sgmls/trace.h index 56362be641a8..f917a2630df0 100644 --- a/usr.bin/sgmls/sgmls/trace.h +++ b/usr.bin/sgmls/sgmls/trace.h @@ -20,11 +20,11 @@ VOID tracecon P((int,int,int,struct parse *,int,int)); VOID tracedcn P((struct dcncb *)); VOID tracedsk P((struct tag *,struct tag *,int,int)); VOID traceecb P((char *,struct entity *)); -VOID traceend P((char *,struct thdr *,struct mpos *,int,int,int)); +VOID traceend P((char *,struct thdr *,struct mpos *,int,int)); VOID traceesn P((struct ne *)); VOID traceetd P((struct etd *)); VOID traceetg P((struct tag *,struct etd *,int,int)); -VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *,int)); +VOID tracegi P((char *,struct etd *,struct thdr *,struct mpos *)); VOID tracegml P((struct restate *,int,int,int)); VOID tracegrp P((struct etd **)); VOID traceid P((char *,struct id *)); @@ -50,15 +50,15 @@ VOID traceval P((struct parse *,unsigned int,UNCH *,int)); ((void)(gtrace && (tracedsk(pts, ptso, ts3, etictr), 1))) #define TRACEECB(action, p) \ ((void)(etrace && (traceecb(action, p), 1))) -#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) \ - ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt, Tstart), 1))) +#define TRACEEND(stagenm, mod, pos, rc, opt) \ + ((void)(ctrace && (traceend(stagenm, mod, pos, rc, opt), 1))) #define TRACEESN(p) \ ((void)((etrace || atrace || ntrace) && (traceesn(p), 1))) #define TRACEETD(p) ((void)(gtrace && (traceetd(p), 1))) #define TRACEETG(pts, curetd, tsl, etagimct) \ ((void)(gtrace && (traceetg(pts, curetd, tsl, etagimct), 1))) -#define TRACEGI(stagenm, gi, mod, pos, Tstart) \ - ((void)(ctrace && (tracegi(stagenm, gi, mod, pos, Tstart), 1))) +#define TRACEGI(stagenm, gi, mod, pos) \ + ((void)(ctrace && (tracegi(stagenm, gi, mod, pos), 1))) #define TRACEGML(scb, pss, conactsw, conact) \ ((void)(trace && (tracegml(scb, pss, conactsw, conact), 1))) #define TRACEGRP(p) ((void)(gtrace && (tracegrp(p), 1))) @@ -89,11 +89,11 @@ VOID traceval P((struct parse *,unsigned int,UNCH *,int)); #define TRACEDCN(dcn) /* empty */ #define TRACEDSK(pts, ptso, ts3, etictr) /* empty */ #define TRACEECB(action, p) /* empty */ -#define TRACEEND(stagenm, mod, pos, rc, opt, Tstart) /* empty */ +#define TRACEEND(stagenm, mod, pos, rc, opt) /* empty */ #define TRACEESN(p) /* empty */ #define TRACEETG(pts, curetd, tsl, etagimct) /* empty */ #define TRACEETD(p) /* empty */ -#define TRACEGI(stagenm, gi, mod, pos, Tstart) /* empty */ +#define TRACEGI(stagenm, gi, mod, pos) /* empty */ #define TRACEGML(scb, pss, conactsw, conact) /* empty */ #define TRACEGRP(p) /* empty */ #define TRACEID(action, p) /* empty */ diff --git a/usr.bin/sgmls/sgmls/traceset.c b/usr.bin/sgmls/sgmls/traceset.c index 64ebd4807a3f..e57003f5e9d0 100644 --- a/usr.bin/sgmls/sgmls/traceset.c +++ b/usr.bin/sgmls/sgmls/traceset.c @@ -55,7 +55,7 @@ char *s; VOID traceset() { dotrace(sw.trace); - + if (trace||atrace||ctrace||dtrace||etrace||gtrace||itrace||mtrace||ntrace) fprintf(stderr, "TRACESET: state=%d;att=%d;con=%d;dcl=%d;ent=%d;grp=%d;id=%d;ms=%d;dcn=%d.\n", @@ -67,7 +67,7 @@ VOID traceset() VOID tracepro() { dotrace(sw.ptrace); - + if (trace||atrace||dtrace||etrace||gtrace||mtrace||ntrace) fprintf(stderr, "TRACEPRO: state=%d; att=%d; dcl=%d; ent=%d; grp=%d; ms=%d; dcn=%d.\n", @@ -78,7 +78,7 @@ VOID tracepro() VOID tracepcb(pcb) struct parse *pcb; { - fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %d:%d.\n", + fprintf(stderr, "%-8s %2u-%2u-%2u-%2u from %s [%3d] in %s, %lu:%d.\n", pcb->pname, pcb->state, pcb->input, pcb->action, pcb->newstate, printable(*FPOS), *FPOS, ENTITY+1, RCNT, RSCC+FPOS+1-FBUF); @@ -89,7 +89,7 @@ VOID tracetkn(scope, lextoke) int scope; UNCH lextoke[]; /* Lexical table for token and name parses. */ { - fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %d:%d.\n", + fprintf(stderr, "TOKEN %2d-%2d from %s [%3d] in %s, %lu:%d.\n", scope, lextoke[*FPOS], printable(*FPOS), *FPOS, ENTITY+1, RCNT, RSCC+FPOS+1-FBUF); @@ -217,7 +217,7 @@ struct entity *p; VOID tracedcn(p) struct dcncb *p; { - fprintf(stderr, + fprintf(stderr, "DCN dcn=%p; adl=%p; notation is %s\n", (UNIV)p, (UNIV)p->adl, p->ename+1); if (p->adl) @@ -245,7 +245,7 @@ TECB pg; UNCH *gi; { int i = 0; /* Loop counter. */ - + if (pg==SRMNULL) fprintf(stderr, "%-8s SHORTREF table empty for %s.\n", action, gi); else { @@ -264,12 +264,12 @@ VOID traceadl(al) struct ad al[]; { int i=0; - + fprintf(stderr, "ADLIST %p %d membe%s; %d attribut%s\n", (UNIV)al, ADN(al), ADN(al)==1 ? "r" : "rs", AN(al), AN(al)==1 ? "e" : "es"); while (++i<=ADN(al)) { - fprintf(stderr, + fprintf(stderr, (BITOFF(ADFLAGS(al,i), AGROUP) && ADTYPE(al,i)<=ANOTEGRP) ? " %p %-8s %02x %02x %2d %2d %p %p\n" : " %p %-8s %02x %02x %2d %2d %p %p\n", @@ -281,11 +281,10 @@ struct ad al[]; fprintf(stderr, "=>"); traceesn(ADDATA(al,i).n); } - else if (ADTYPE(al,i)==ANOTEGRP) - fprintf(stderr, "=>%s", - (ADDATA(al,i).x->dcnid!=0) - ? (char *)ADDATA(al,i).x->dcnid - : "[UNDEFINED]"); + else if (ADTYPE(al,i)==ANOTEGRP) { + fprintf(stderr, "=>"); + tracedcn(ADDATA(al,i).x); + } } else fprintf(stderr, "[%s]", @@ -325,7 +324,7 @@ VOID tracegrp(pg) struct etd *pg[]; { int i = -1; /* Loop counter. */ - + fprintf(stderr, "ETDGRP %p\n", (UNIV)pg); while (pg[++i]!=0) fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->etdgi+1); @@ -336,7 +335,7 @@ VOID tracengr(pg) struct dcncb *pg[]; { int i = -1; /* Loop counter. */ - + fprintf(stderr, "DCNGRP %p\n", (UNIV)pg); while (pg[++i]!=0) fprintf(stderr, " %p %s\n", (UNIV)pg[i], pg[i]->ename+1); @@ -346,7 +345,7 @@ struct dcncb *pg[]; VOID traceetd(p) struct etd *p; /* Pointer to an etd. */ { - fprintf(stderr, + fprintf(stderr, "ETD etd=%p %s min=%02x cmod=%p ttype=%02x mex=%p, pex=%p, ", (UNIV)p, p->etdgi+1, p->etdmin, (UNIV)p->etdmod, p->etdmod->ttype, (UNIV)p->etdmex, (UNIV)p->etdpex); @@ -400,15 +399,14 @@ unsigned long *h; /* TRACEGI: Trace GI testing stages in CONTEXT.C processing. */ -VOID tracegi(stagenm, gi, mod, pos, Tstart) +VOID tracegi(stagenm, gi, mod, pos) char *stagenm; struct etd *gi; /* ETD of new GI. */ struct thdr mod[]; /* Model of current open element. */ struct mpos pos[]; /* Position in open element's model. */ -int Tstart; /* Initial T for this group. */ { int i = 0; /* Loop counter. */ - + fprintf(stderr, "%-10s %d:", stagenm, P); while (++i<=P) fprintf(stderr, " %d-%d", pos[i].g, pos[i].t); @@ -427,16 +425,15 @@ int Tstart; /* Initial T for this group. */ } /* TRACEEND: Trace testing for end of group in CONTEXT.C processing. */ -VOID traceend(stagenm, mod, pos, rc, opt, Tstart) +VOID traceend(stagenm, mod, pos, rc, opt) char *stagenm; struct thdr mod[]; /* Model of current open element. */ struct mpos pos[]; /* Position in open element's model. */ int rc; /* Return code: RCNREQ RCHIT RCMISS RCEND */ int opt; /* ALLHIT parm: 1=test optionals; 0=ignore. */ -int Tstart; /* Initial T for this group. */ { int i = 0; /* Loop counter. */ - + fprintf(stderr, "%-10s %d:", stagenm, P); while (++i<=P) fprintf(stderr, " %d-%d", pos[i].g, pos[i].t); diff --git a/usr.bin/sgmls/sgmls/version.c b/usr.bin/sgmls/sgmls/version.c index 7144593574e6..f3b2d7c0e94d 100644 --- a/usr.bin/sgmls/sgmls/version.c +++ b/usr.bin/sgmls/sgmls/version.c @@ -1 +1 @@ -char *version_string = "1.1"; +char *version_string = "1.1.91"; diff --git a/usr.bin/sgmls/sgmls/xfprintf.c b/usr.bin/sgmls/sgmls/xfprintf.c index f544faaaa38a..1c504690f8e6 100644 --- a/usr.bin/sgmls/sgmls/xfprintf.c +++ b/usr.bin/sgmls/sgmls/xfprintf.c @@ -31,10 +31,6 @@ typedef long double long_double; #endif #endif /* FP_SUPPORT */ -#ifndef __STDC__ -#define const /* as nothing */ -#endif - #ifdef USE_PROTOTYPES #define P(parms) parms #else @@ -113,10 +109,10 @@ struct spec *sp; sp->pos = **pp - '0'; *pp += 2; } - + while (**pp != '\0' && strchr(FLAG_CHARS, **pp)) *pp += 1; - + /* handle the field width */ sp->field_width = MISSING; @@ -162,7 +158,7 @@ struct spec *sp; modifier = **pp; *pp += 1; } - + switch (**pp) { case 'd': case 'i': @@ -219,7 +215,7 @@ static int find_arg_types(format, arg_type) int i, pos; const char *p; struct spec spec; - + for (i = 0; i < 9; i++) arg_type[i] = NONE; @@ -384,7 +380,7 @@ static int printit(handle, func, p, ap, nargs, arg) start = ++p; if (!parse_spec(&p, &spec)) abort(); /* should have caught it in find_arg_types */ - + buf[0] = '%'; q = buf + 1; @@ -470,7 +466,7 @@ static int maybe_positional(format) } return 1; } - + static int xdoprt(handle, func, format, ap) UNIV handle; printer func; @@ -483,7 +479,7 @@ static int xdoprt(handle, func, format, ap) if (!find_arg_types(format, arg_type)) return -1; - + for (nargs = 0; nargs < 9; nargs++) if (arg_type[nargs] == NONE) break; @@ -491,7 +487,7 @@ static int xdoprt(handle, func, format, ap) for (i = nargs; i < 9; i++) if (arg_type[i] != NONE) return -1; - + for (i = 0; i < nargs; i++) get_arg(arg_type[i], &ap, arg + i); diff --git a/usr.bin/sgmls/sgmlsasp/Makefile b/usr.bin/sgmls/sgmlsasp/Makefile index 69bfdabddf46..17da01e11ac5 100644 --- a/usr.bin/sgmls/sgmlsasp/Makefile +++ b/usr.bin/sgmls/sgmlsasp/Makefile @@ -1,7 +1,7 @@ # # Bmakefile for sgmlsasp # -# $id$ +# $Id$ # PROG= sgmlsasp diff --git a/usr.bin/sgmls/sgmlsasp/replace.c b/usr.bin/sgmls/sgmlsasp/replace.c index a37086be3129..95fa113132a0 100644 --- a/usr.bin/sgmls/sgmlsasp/replace.c +++ b/usr.bin/sgmls/sgmlsasp/replace.c @@ -90,7 +90,7 @@ void load_replacement_file(tablep, file) else error("can't open `%s'", file); } - + current_lineno = 1; current_file = file; tok = get_token(); @@ -131,7 +131,7 @@ struct replacement_item **parse_string(tail, recog_attr) { struct buffer buf; unsigned len; - + buffer_init(&buf); for (;;) { int c = get(); @@ -274,7 +274,6 @@ int get_token() default: parse_error("bad input character `%c'", c); } - return EOF; } static @@ -325,7 +324,7 @@ struct replacement *lookup_replacement(tablep, type, name) { int h = hash(type, name); struct table_entry *p; - + for (p = tablep->table[h]; p; p = p->next) if (strcmp(name, p->gi) == 0 && type == p->type) return &p->replacement; @@ -342,7 +341,7 @@ struct replacement *define_replacement(tablep, type, name) { int h = hash(type, name); struct table_entry *p; - + for (p = tablep->table[h]; p; p = p->next) if (strcmp(name, p->gi) == 0 && type == p->type) return 0; @@ -409,7 +408,7 @@ int hash(type, s) char *s; { unsigned long h = 0, g; - + while (*s != 0) { h <<= 4; h += *s++; @@ -442,7 +441,7 @@ UNIV xrealloc(p, size) parse_error("out of memory"); return p; } - + static NO_RETURN #ifdef VARARGS void parse_error(va_alist) va_dcl @@ -455,7 +454,7 @@ void parse_error(char *message,...) char *message; #endif va_list ap; - + #ifdef VARARGS va_start(ap); message = va_arg(ap, char *); diff --git a/usr.bin/sgmls/sgmlsasp/replace.h b/usr.bin/sgmls/sgmlsasp/replace.h index be2bbcd735fa..18c9f82d7596 100644 --- a/usr.bin/sgmls/sgmlsasp/replace.h +++ b/usr.bin/sgmls/sgmlsasp/replace.h @@ -5,7 +5,7 @@ enum replacement_type { DATA_REPL, ATTR_REPL }; - + struct replacement_item { union { char *attr; @@ -30,6 +30,6 @@ enum event_type { START_ELEMENT, END_ELEMENT }; struct replacement_table *make_replacement_table P((void)); void load_replacement_file P((struct replacement_table *, char *)); - + struct replacement * lookup_replacement P((struct replacement_table *, enum event_type, char *)); diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 index ab0337113eb0..5033744fd5b2 100644 --- a/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 +++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.1 @@ -3,7 +3,7 @@ .SH NAME sgmlsasp \- translate output of sgmls using ASP replacement files .SH SYNOPSIS -.B sgmls +.B sgmlsasp .RB [ \-n ] .I replacement_file\|.\|.\|. .SH DESCRIPTION diff --git a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c index eacf1c176b5d..fdaf113bc806 100644 --- a/usr.bin/sgmls/sgmlsasp/sgmlsasp.c +++ b/usr.bin/sgmls/sgmlsasp/sgmlsasp.c @@ -154,7 +154,7 @@ struct sgmls_attribute *attributes; return; if (repl->flags & NEWLINE_BEGIN) output_begin_line(); - + for (p = repl->items; p; p = p->next) switch (p->type) { case DATA_REPL: @@ -190,7 +190,7 @@ struct sgmls_attribute *p; { char **token = p->value.token.v; int n = p->value.token.n; - + if (n > 0) { int i; output_token(token[0]); @@ -262,7 +262,7 @@ void error(char *message,...) char *message; #endif va_list ap; - + fprintf(stderr, "%s: ", program_name); #ifdef VARARGS va_start(ap); diff --git a/usr.bin/sgmls/unix.cfg b/usr.bin/sgmls/unix.cfg index 0bc84108061b..42455112d503 100644 --- a/usr.bin/sgmls/unix.cfg +++ b/usr.bin/sgmls/unix.cfg @@ -11,6 +11,17 @@ Usually the same as PATH_FILE_SEP. */ #define SYSID_FILE_SEP ':' /* The environment variable that contains the list of filename templates. */ #define PATH_ENV_VAR "SGML_PATH" +/* A macro that returns non-zero if the filename is relative to the + current directory. */ +#define FILE_IS_RELATIVE(p) ((p)[0] != '/') +/* A string containing the characters that can separate the directory + part of a filename from the basename. */ +#define DIR_BASE_SEP "/" +/* The environment variable that contains the list of catalog entry files. + Filenames are separated by PATH_FILE_SEP. */ +#define CATALOG_FILES_ENV_VAR "SGML_CATALOG_FILES" +/* Default list of catalog entry files. */ +#define DEFAULT_CATALOG_FILES "CATALOG:/usr/local/lib/sgml/CATALOG" /* MIN_DAT_SUBS_FROM and MIN_DATS_SUBS_TO tell sgmls how to transform a name or system identifier into a legal filename. A character in @@ -126,6 +137,13 @@ typedef void *UNIV; change `void' to `int'. */ typedef void VOID; +/* If your compiler doesn't understand const, define it to be nothing. */ +#ifndef __STDC__ +#ifndef const +#define const /* as nothing */ +#endif +#endif + /* If you don't have an ANSI C conformant , define CHAR_SIGNED as 1 or 0 according to whether the `char' type is signed. The on some versions of System Release V 3.2 is not ANSI C