/*
*****************************************************************************
*
* $RCSfile: grammutil.c,v $
* $Date: 1999/08/19 17:20:20 $
* $Source: /home/richard/Xml/RCS/grammutil.c,v $
* $Revision: 1.93 $
* $Author: richard $
*
*****************************************************************************
*
* Copyright 1998, 1999 Brown University and Richard Goerwitz
*
*****************************************************************************
*
* Utilities for adding new elements, attributes, etc. to internal
* hashtables; utilities for resolving general, parameter-entity,
* and other references; utilities for checking attributes; etc.
*
* Set your editor's column/screen width to at least 100, or this
* file may end up a bit hard to read.
*
*****************************************************************************
*/
#include "grammutil.h"
#include "errabort.h"
#include "hashutil.h"
#include "langcode.h"
#include "namespace.h"
#include "nfadfa.h"
#include "parstree.h"
#include "parsutil.h"
#include "utfutil.h"
#include "xtrautil.h"
static void check_for_duplicate_names (xml_file *, my_wchar_t **);
static int basic_attribute_integrity_check (xml_file *, xml_attribute *, name_val *, int);
static int expands_recursively (xml_file *, my_wchar_t *, my_wchar_t, my_wchar_t *);
static my_wchar_t *state_to_string (enum where_am_i);
/*
* expand_element
*
* Provide the xml_element struct corresponding to an element,
* elname (arg 2). Note that this routine doesn't really expand
* anything (in the sense that, say, expand_eref() provides the
* declared expansion text for a given element reference). It
* just returns more information on elname, in the form of an
* xml_element struct.
*
* Returns a pointer to a static buffer that may get overwritten
* on subsequent calls. Returns NULL if no such notation has been
* declared.
*/
struct xml_element *
expand_element (struct xml_file *xf, my_wchar_t *elname)
{
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding elname, %s\n",
utf_16_to_utf_8 (uni_truncate_to (elname, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = elname;
it.data = NULL;
if ((result = rg_find_item (xf->element_names, it)) == NULL)
{
/* if an element used in markup doesn't resolve, it's an error */
xwrap (errdebug (5, "oops; element name, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (elname, 20))));
return NULL;
}
xwrap (errdebug (5, "expanded element as xml_element struct\n"));
return result->data;
}
/*
* expand_notname
*
* Provide the system identifier corresponding to a given notation
* name (notations can be declared with a public or a system
* identifier, but it's expected that the public identifier will
* resolve to a system identifier).
*
* Returns a pointer to a static buffer that may get overwritten
* on subsequent calls. Returns NULL if no such notation has been
* declared.
*/
my_wchar_t *
expand_notname (struct xml_file *xf, my_wchar_t *notname)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding notname, %s\n",
utf_16_to_utf_8 (uni_truncate_to (notname, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = notname;
it.data = NULL;
result = rg_find_item (xf->notation_names, it);
if (result == NULL)
{
/* it's an error if a notation name doesn't resolve */
xwrap (errdebug (5, "oops; notation name, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (notname, 20))));
return NULL;
}
len = uni_strlen (result->data) + 1;
if (len > buflen)
{
buflen = len;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_notname()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_notname()");
}
}
/* copy the sysid corresponding to notname into a static buffer */
memcpy (buf, result->data, len * sizeof (my_wchar_t));
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded notname, %s -> %s\n",
utf_16_to_utf_8 (uni_truncate_to (notname, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_peref
*
* Provide expansion text for peref. Note that expand_peref()
* returns a pointer into a static buffer that may change on
* subsequent calls. Note that PErefs are, except when used in
* entity values, expanded with one leading and one trailing blank
* space, '\x20' (XML 1.0 paragraph 4.4.8). The third argument to
* expand_peref() controls whether these spaces are added or not.
* If it is nonzero, they are added; otherwise not.
*
* Returns NULL on an unresolvable PERef.
*/
my_wchar_t *
expand_peref (struct xml_file *xf, my_wchar_t *peref, int with_whitespace)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding peref, %s\n",
utf_16_to_utf_8 (uni_truncate_to (peref, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = peref;
it.data = NULL;
result = rg_find_item (xf->parameter_entity_names, it);
if (result == NULL)
{
/* it's an error if a PEReference doesn't resolve */
xwrap (errdebug (5, "oops; parameter entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (peref, 20))));
return NULL;
}
len = uni_strlen (result->data);
if ((len + 3) > buflen)
{
buflen = len + 3;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_peref()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_peref()");
}
}
if (! with_whitespace)
{
/* When used in an entity value, no whitespace is added */
memcpy (buf, result->data, (len + 1) * sizeof (my_wchar_t));
}
else
{
/* XML 1.0 paragraph 4.4.8 says we need leading and trailing
* whitespace, except inside quoted entity values, in entity
* decls.
*/
buf[0] = '\x20';
memcpy (&buf[1], result->data, len * sizeof (my_wchar_t));
*(buf + 1 + len) = '\x20';
*(buf + 1 + len + 1) = 0;
}
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded peref, %s -> \"%s\"\n",
utf_16_to_utf_8 (uni_truncate_to (peref, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_eref
*
* Provide expansion text for eref. Note that expand_eref()
* returns a pointer into a static buffer that may change on
* subsequent calls.
*/
my_wchar_t *
expand_eref (struct xml_file *xf, my_wchar_t *eref)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding eref, %s\n",
utf_16_to_utf_8 (uni_truncate_to (eref, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = eref;
it.data = NULL;
if ((result = rg_find_item (xf->entity_names, it)) == NULL)
{
/* oops; this entity reference is not (yet?) defined */
xwrap (errdebug (5, "oops; entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (eref, 20))));
return NULL;
}
len = uni_strlen (result->data) + 1;
if (len > buflen)
{
buflen = len;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_eref()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_eref()");
}
}
/* okay, now copy eref text into static buf */
memcpy (buf, result->data, len * sizeof (my_wchar_t));
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded entity ref, %s -> %s\n",
utf_16_to_utf_8 (uni_truncate_to (eref, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_ext_eref
*
* Provide expansion text for ext_eref (a reference to an external
* entity). Note that expand_ext_eref() returns a pointer into a
* static buffer that may change on subsequent calls.
*
*/
my_wchar_t *
expand_ext_eref (struct xml_file *xf, my_wchar_t *ext_eref)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding ext_eref, %s\n",
utf_16_to_utf_8 (uni_truncate_to (ext_eref, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = ext_eref;
it.data = NULL;
result = rg_find_item (xf->external_entity_names, it);
if (result == NULL)
{
/* oops; this entity reference is not (yet?) defined */
xwrap (errdebug (5, "oops; external entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (ext_eref, 20))));
return NULL;
}
len = uni_strlen (result->data) + 1;
if (len > buflen)
{
buflen = len;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_ext_eref()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_ext_eref()");
}
}
/* okay, now copy external eref text into static buf */
memcpy (buf, result->data, len * sizeof (my_wchar_t));
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded entity ref, %s -> %s\n",
utf_16_to_utf_8 (uni_truncate_to (ext_eref, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_uperef
*
* Provide an expansion for uperef. Note that expand_uperef()
* returns a pointer to an xml_unparsed_entity struct, so it isn't
* really "expanding" it, per se. Rather, it is returning more
* information on it (information that comes in the form of an
* xml_unparsed_entity structure).
*/
struct xml_unparsed_entity *
expand_uperef (struct xml_file *xf, my_wchar_t *uperef)
{
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding uperef, %s\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = uperef;
it.data = NULL;
if ((result = rg_find_item (xf->unparsed_entity_names, it)) == NULL)
{
/* it's an error if an unparsed eref doesn't resolve */
xwrap (errdebug (5, "oops; unparsed entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
return NULL;
}
xwrap (errdebug (5, "expanded uperef as xml_unparsed_entity struct\n"));
return result->data;
}
/*
* expand_uperef_as_sysid
*
* Provide the system identifier associated with uperef. Note that
* expand_uperef_as_sysid() returns a pointer into a static buffer
* that may change on subsequent calls. Returns NULL if no such
* unparsed entity has been defined.
*/
my_wchar_t *
expand_uperef_as_sysid (struct xml_file *xf, my_wchar_t *uperef)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding uperef, %s\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = uperef;
it.data = NULL;
result = rg_find_item (xf->unparsed_entity_names, it);
if (result == NULL)
{
/* it's an error if an unparsed eref doesn't resolve */
xwrap (errdebug (5, "oops; unparsed entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
return NULL;
}
len = uni_strlen (result->data) + 1;
if (len > buflen)
{
buflen = len;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_uperef_as_sysid()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_uperef_as_sysid()");
}
}
/* copy system identifier string into static buffer */
memcpy (buf, ((struct xml_unparsed_entity *)result->data)->sysid, len * sizeof (my_wchar_t));
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded uperef, %s -> %s\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_uperef_as_notname
*
* Provide the notation associated with uperef. Note that
* expand_uperef_as_notname() returns a pointer into a static buffer
* that may change on subsequent calls. Returns NULL if no such
* unparsed entity, uperef, has been defined.
*
* Note that if there is an uparsed entity in the DTD is invalid,
* and points to an undeclared NOTATION, then this routine will, if
* invoked on that entity name, will return a string that if passed
* to expand_notname(), will trigger a NULL return value.
*
* Put more simply: If the document isn't valid, don't count on
* this function returning anything useful.
*/
my_wchar_t *
expand_uperef_as_notname (struct xml_file *xf, my_wchar_t *uperef)
{
char *tmp;
static size_t len = 0;
static size_t buflen = 0;
static my_wchar_t *buf = NULL;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding uperef, %s\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = uperef;
it.data = NULL;
result = rg_find_item (xf->unparsed_entity_names, it);
if (result == NULL)
{
/* it's an error if an unparsed entity ref doesn't resolve */
xwrap (errdebug (5, "oops; unparsed entity, %s, undefined\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20))));
return NULL;
}
len = uni_strlen (result->data) + 1;
if (len > buflen)
{
buflen = len;
if (buf == NULL) {
if ((buf = malloc (buflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "expand_uperef_as_notname()");
}
else {
if ((buf = realloc (buf, buflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "expand_uperef_as_notname()");
}
}
/* copy notation name into static buffer */
memcpy (buf, ((struct xml_unparsed_entity *)result->data)->notname, len * sizeof (my_wchar_t));
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (buf, 20)));
xwrap (errdebug (5, "expanded uperef, %s -> %s\n",
utf_16_to_utf_8 (uni_truncate_to (uperef, 20)),
tmp));
free (tmp);
}
return buf;
}
/*
* expand_attribute
*
* Adds an attribute to a given element, elname's, attribute list, for
* a given xml_file struct, xf. Returns zero if the attribute is
* already present. Otherwise returns the number of attributes in
* the xml_element struct.
*/
xml_attribute *
expand_attribute (xml_file *xf, my_wchar_t *elname, my_wchar_t *attname)
{
size_t i;
char *tmp;
struct xml_element *xe;
struct xml_attribute *retval;
if (xmlparse_env.debug_level >= 7)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (attname, 20)));
xwrap (errdebug (7, "trying to expand attname %s for element %s\n",
tmp, utf_16_to_utf_8 (uni_truncate_to (elname, 20))));
free (tmp);
}
if ((xe = expand_element (xf, elname)) == NULL)
{
xwrap (errdebug (5, "no such element, %s\n",
utf_16_to_utf_8 (uni_truncate_to (elname, 20))));
return NULL;
}
retval = NULL;
for (i = 0; i < xe->attlistlen; i++)
if (uni_strcmp (xe->attlist[i]->name, attname) == 0)
retval = xe->attlist[i];
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (elname));
xwrap (errdebug (5, "attribute %s%s found for element %s\n",
utf_16_to_utf_8 (uni_truncate_to (elname, 20)),
retval ? "" : " not", tmp));
free (tmp);
}
return retval;
}
/*
* expand_id
*
* ID attribute values are unique for a given XML document. This
* routine returns a pointer into the parse tree for xf (arg 1)
* where a given ID attribute occurs. The ID attribute value being
* sought is given as (my_wchar_t *)idstring (arg 2).
*
* Returns NULL if the idstring can't be resolved (presumably
* because no such ID has been encoutered [yet] in xf).
*
* Note: This routine isn't much use if xmlparse_env.keep_children
* is set to "no", since this means the parser is discarding nodes
* in the parse tree shortly after generating them.
*/
struct name_val *
expand_id (struct xml_file *xf, my_wchar_t *idstring)
{
struct rg_htable_item item, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "expanding id, %s\n",
utf_16_to_utf_8 (uni_truncate_to (idstring, 20))));
/* return NULL if ID is already taken */
item.key = NULL;
item.uni_key = idstring;
item.data = NULL;
if ((result = rg_find_item (xf->ids, item)) == NULL)
{
if (xmlparse_env.debug_level >= 5)
xwrap (errdebug (5, "id, %s, not defined\n",
utf_16_to_utf_8 (uni_truncate_to (idstring, 20))));
return NULL;
}
/* report ids are now in the hashtable */
if (xmlparse_env.debug_level >= 5)
xwrap (errdebug (5, "expanded id, %s, as xml_node\n",
utf_16_to_utf_8 (uni_truncate_to (idstring, 20))));
if (xmlparse_env.keep_children == no)
errabort (54, "tried to dereference ID attribute without any parse tree\n");
return result->data;
}
/*
* add_element
*
* Add a given element to an xml_file's element_names hashtable,
* with information on the element's content model and attribute
* names/types/defaults. Returns NULL if the element is already
* present. Otherwise returns a pointer to the new element.
*/
struct xml_element *
add_element (xml_file *xf, my_wchar_t *name, enum content_types type, void *content_model)
{
size_t attlistlen;
struct xml_element *xe;
struct xml_attribute **attlist;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to element_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->element_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if ((result = rg_find_item (xf->element_names, it)) == NULL)
{
/* Not defined already; proceed to add it to the element_names
* hashtable for xf. Do a quick check to make sure that the
* element name doesn't have any malformed namespaces (e.g.,
* "hello:" or ":goodbye").
*/
check_for_colon (xf, name, 1376, 1377, 1378);
if ((xe = create_xml_element (xf, name, type, content_model)) == NULL)
errabort (40, "malloc() error in %s\n", "add_element()");
switch (type)
{
case children:
if (content_model)
xe->compiled_content_model = make_dfa (xf, content_model);
break;
case mixed:
check_for_duplicate_names (xf, content_model);
break;
case dummy:
case empty:
case Any:
/* what should we check here? */
break;
}
it.uni_key = uni_strdup (name);
it.data = xe;
rg_add_item (xf->element_names, it);
xwrap (errdebug (5, "added %s to element_names hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->element_names->no_items));
/* return a pointer to the element we just created */
return it.data;
}
else
{
/* eek; it's already defined; if it's a dummy element (created
* to make add_attribute() work for an as yet undeclared element),
* then delete the dummy and try again; otherwise, it's an error
*/
xe = result->data;
if (xe->type == dummy)
{
attlist = xe->attlist; xe->attlist = NULL;
attlistlen = xe->attlistlen; xe->attlistlen = 0;
xwrap (errdebug (5, "duplicate dummy element exists; deleting it\n"));
rg_delete_item (xf->element_names, it);
free_xml_element (xe);
xe = add_element (xf, name, type, content_model);
xe->attlistlen = attlistlen;
xe->attlist = attlist;
return xe;
}
else
{
/* element has already been fully defined */
add_xml_error (xf, 650, uni_truncate_to (name, 20));
/* element structs are fairly complex; comparing them is hard */
xwrap (errdebug (3, "error adding %s to element_names hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->element_names->no_items));
return NULL;
}
}
}
/*
* check_for_duplicate_names
*
* Checks for duplicate strings in an array of my_wchar_t strings.
* This routine is used to determine whether a symbol is used twice
* in a Mixed content model. Assume the last string is NULL.
*/
static void
check_for_duplicate_names (struct xml_file *xf, my_wchar_t **wps)
{
size_t i, j;
if (wps)
for (i = j = 0; wps[i] != NULL; i++)
for (j = 0; j < i; j++)
if (uni_strcmp (wps[j], wps[i]) == 0)
add_xml_warning (xf, 681, wps[j]);
}
/*
* add_attribute
*
* Adds an attribute to a given element, elname's, attribute list, for
* a given xml_file struct, xf. Returns zero if the attribute is
* already present. Otherwise returns the number of attributes in
* the xml_element struct.
*/
size_t
add_attribute (xml_file *xf, my_wchar_t *elname, struct xml_attribute *xa)
{
char *tmp;
size_t i, len;
int already_declared;
struct name_val *nv;
struct xml_element *xe;
my_wchar_t *wtmp, *expansion;
if (xmlparse_env.debug_level >= 7)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (xa->name, 20)));
xwrap (errdebug (7, "adding attribute %s to element, %s\n",
tmp, utf_16_to_utf_8 (uni_truncate_to (elname, 20))));
free (tmp);
}
if ((xe = expand_element (xf, elname)) == NULL)
/* create a dummy element to hold the attlist */
if ((xe = add_element (xf, elname, dummy, NULL)) == NULL)
errabort (45, "unexpected return value from %s\n", "add_attribute()");
/* Check if the attribute is already declared for this element */
already_declared = 0;
for (i = 0; i < xe->attlistlen; i++)
if (uni_strcmp (xe->attlist[i]->name, xa->name) == 0)
{
/* duplicate attribute definition (discard the second one) */
add_xml_warning (xf, 581, uni_truncate_to (xa->name, 20));
already_declared++;
}
/* Only do all these checks if this is the first time we've seen
* this attribute (xa). Note that the xml:space and xml:lang
* attribute checks could easily be moved to the beginning of this
* function. Do not move the ID check, however.
*/
if (! already_declared)
{
/* Now that we're namespace-aware, check for malformed
* namespaces in the attribute name (e.g., "hello:" or
* ":goodbye").
*/
if (! check_for_colon (xf, xa->name, 1380, 1381, 1382))
if (uni_utf_strcmp (xa->name, "xmlns") == 0)
goto do_check;
else
/* xmlns: attributes should really be #FIXED or #REQUIRED */
if (uni_utf_strncmp (xa->name, "xmlns:", 6) == 0)
{
do_check:
if (xa->default_type != fixed && xa->default_type != required)
add_xml_warning (xf, 1390, xa->name);
}
/* If this attribute is xml:space, then it has to be an
* enumerated attribute type with two possible values,
* default and preserve. The default can be one or the
* other; e.g.:
*
*
*/
if (uni_utf_strcmp (xa->name, "xml:space") == 0)
{
if (xa->type != enumeration)
/* XML 1.0 spec, par. 2.10 says it has to be enumerated */
add_xml_error (xf, 584, uni_truncate_to (xa->name, 20));
else if (xa->nmtoklen != 2)
add_xml_error (xf, 584, uni_truncate_to (xa->name, 20));
else if (! ((uni_utf_strcmp (xa->nmtokens[0], "preserve") == 0 &&
uni_utf_strcmp (xa->nmtokens[1], "default") == 0) ||
(uni_utf_strcmp (xa->nmtokens[0], "default") == 0 &&
uni_utf_strcmp (xa->nmtokens[1], "preserve") == 0)))
add_xml_error (xf, 584, uni_truncate_to (xa->name, 20));
}
/* If this attribute is xml:lang, then it has to be an
* nmtoken or enumerated attribute type, e.g.:
*
*
*/
if (uni_utf_strcmp (xa->name, "xml:lang") == 0)
if (xa->type != cdata && xa->type != nmtoken && xa->type != enumeration)
add_xml_error (xf, 583, uni_truncate_to (xa->name, 20));
/* XML standard, section 3.3.1: only one ID attribute per element */
if (xa->type == id)
{
for (i = 0; i < xe->attlistlen; i++)
if (xe->attlist[i]->type == id)
{
len = uni_strlen (elname) + uni_strlen (xa->name) + 9;
wtmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (wtmp, elname);
uni_utf_strcat (wtmp, " (id \"");
uni_strcat (wtmp, xa->name);
uni_utf_strcat (wtmp, "\")");
add_xml_error (xf, 580, wtmp);
free (wtmp);
}
}
/* Make room in (xml_element *)xe 's attlist for a new attribute */
xe->attlistlen++;
if (xe->attlistlen == 1)
{
if ((xe->attlist = malloc (sizeof (xml_attribute *))) == NULL)
errabort (40, "malloc() error in %s\n", "add_attribute()");
}
else
{
xe->attlist =
realloc (xe->attlist, xe->attlistlen * sizeof (xml_attribute *));
if (xe->attlist == NULL)
errabort (41, "realloc() error in %s\n", "add_attribute()");
}
/* now, finally, add the new attribute */
xe->attlist[xe->attlistlen - 1] = xa;
}
/* If a default is given, make sure that default is a valid instance
* of the attribute.
*/
if (xa->default_type == defaulted || xa->default_type == fixed)
{
if (xa->type == id)
/* Makes no sense for an ID attribute to be #FIXED or defaulted */
add_xml_warning (xf, 587, uni_truncate_to (xa->name, 20));
if (xa->default_val)
{
/* See s 3.3.3 of the XML standard on attribute whitespace. */
wtmp = uni_strdup (xa->default_val);
uni_map_whitespace_to_space (xa->default_val);
if (xa->type != cdata)
/* non-CDATA attributes get further whitespace normalization */
uni_map_spaces_to_space (xa->default_val);
expansion = map_entities (xf, xa->default_val,
MAP_CHAR_ENTITIES | MAP_GENERAL_ENTITIES |
NO_EXTERNAL_EREFS | ABORT_ON_FAILURE, 0);
free (xa->default_val);
xa->default_val = expansion;
if (expansion == NULL)
{
/* if there was an entity-expansion error, treat as implied */
add_xml_error (xf, 621, uni_truncate_to (wtmp, 20));
xa->default_type = implied;
}
else
if (xa->type != cdata)
{
/* Does whitespace normalization give us a blank namespace? */
if (uni_utf_strcmp (xa->name, "xmlns") == 0)
if (*wtmp && ! *xa->default_val)
add_xml_warning (xf, 620, uni_truncate_to (xa->name, 20));
}
free (wtmp);
/* if there was an error, xa->default_val was reset to NULL */
if (xa->default_val)
{
/* Now check to make sure that the default is okay */
nv = create_name_val (xa->name, xa->default_val, NULL, NULL, xf->lineno, yes, NULL);
if (! already_declared)
{
/* Do full checks */
if (! check_attribute (xf, elname, nv, DONT_INSERT_IDS))
add_xml_error (xf, 589, uni_truncate_to (xa->default_val, 20));
}
else
{
/* Don't do full checks (this attribute was already
* declared, and will be discarded anyway)
*/
if (! basic_attribute_integrity_check (xf, xa, nv, DONT_INSERT_IDS))
add_xml_error (xf, 589, uni_truncate_to (xa->default_val, 20));
}
free_name_val (nv);
if (xf->standalone == yes && in_external_dtd_subset (xf))
add_xml_warning (xf, 588, uni_truncate_to (xa->name, 20));
}
}
}
if (already_declared)
/* caller frees xa on zero return value */
return 0;
else
{
if (xmlparse_env.debug_level >= 5)
{
tmp = strdup (utf_16_to_utf_8 (uni_truncate_to (xa->name, 20)));
xwrap (errdebug (5, "added attribute %s to element %s (count = %d)\n",
tmp, utf_16_to_utf_8 (uni_truncate_to (elname, 20)),
xe->attlistlen));
free (tmp);
}
return xe->attlistlen;
}
}
/*
* add_notname
*
* Add a given entity name to an xml_file's notation_names table,
* along with its associated sysid. Returns zero on error (e.g., if
* the notation name is already in the table). Otherwise returns
* the number of elements in the hash table (presumably at least 1).
*/
size_t
add_notname (struct xml_file *xf, my_wchar_t *name, my_wchar_t *value)
{
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to notation_names hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->notation_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if ((result = rg_find_item (xf->notation_names, it)) == NULL)
{
it.uni_key = uni_strdup (name);
it.data = uni_strdup (value);
if (it.uni_key == NULL || it.data == NULL)
errabort (40, "malloc() error in %s\n", "add_notname()");
rg_add_item (xf->notation_names, it);
xwrap (errdebug (5, "added %s to notation_names hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->notation_names->no_items));
}
else
{
/* eek; it's already defined */
add_xml_warning (xf, 700, uni_truncate_to (name, 20));
if (uni_strcmp (result->data, value) != 0)
add_xml_warning (xf, 701, uni_truncate_to (name, 20));
xwrap (errdebug (3, "error adding %s to notation_names hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->notation_names->no_items));
return 0;
}
/* report how many items are now in the hashtable */
return (size_t)xf->notation_names->no_items;
}
/*
* add_peref
*
* Add a given entity name to an xml_file's parameter_entity_names
* hashtable, along with its associated expansion. Returns zero on
* error (e.g., if the key is already present). Otherwise returns
* the number of elements in the hash table (presumably at least 1).
*/
size_t
add_peref (struct xml_file *xf, my_wchar_t *name, my_wchar_t *value)
{
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to parameter_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if ((result = rg_find_item (xf->parameter_entity_names, it)) == NULL)
{
/* Not defined yet. Good. Now do a quick test for recursion. */
if (expands_recursively (xf, value, '%', name))
{
xwrap (errdebug (5, "parameter entity expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
return 0;
}
else
{
it.uni_key = uni_strdup (name);
it.data = uni_strdup (value);
if (it.uni_key == NULL || it.data == NULL)
errabort (40, "malloc() error in %s\n", "add_peref()");
rg_add_item (xf->parameter_entity_names, it);
if (xmlparse_env.debug_level >= 5)
{
xwrap (errdebug (7, "parameter entity's expansion is - \"%s\"\n",
utf_16_to_utf_8 (uni_truncate_to (value, 20))));
xwrap (errdebug (5, "added %s to parameter_entity_name hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
}
}
}
else
{
/* eek; it's already defined */
add_xml_warning (xf, 750, uni_truncate_to (name, 20));
if (uni_strcmp (result->data, value) != 0)
add_xml_warning (xf, 751, uni_truncate_to (name, 20));
xwrap (errdebug (3, "error adding %s to parameter_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
/* Zero here = error */
return 0;
}
/* report how many items are now in the hashtable */
return (size_t)xf->parameter_entity_names->no_items;
}
/*
* expands_recursively
*
* Return true if entity replacement text, text, contains a
* reference to entity name, name (be sure to prepend c to name).
* Name (arg1) should be the name of the entity whose replacement
* text is text (arg2). If name is a parameter entity, c should be
* '%'. If it's a general entity, then c should be '&'.
*
* This is a very simplistic test, and only catches the most obvious
* cases of recursion. The map_entities() routine catches other,
* less obvious, cases by just checking the depth of recursion.
* Past a certain point it's obvious there's a problem.
*
* This code is very similar to what we find in check_gt_and_lt()
* in parsutil.y. If there are bugs in check_gt_and_lt(), there
* are probably bugs here as well.
*/
static int
expands_recursively (struct xml_file *xf, my_wchar_t *text, my_wchar_t c, my_wchar_t *name)
{
size_t j;
my_wchar_t *wp;
enum where_am_i state;
int marked_section_nesting_level;
if (! (c == '&' || c == '%'))
errabort (46, "unexpected value for c in %s\n", "expands_recursively()");
/* This code is very similar to what we find in check_gt_and_lt() in
* parsutil.y. If there are bugs in check_gt_and_lt(), there are
* probably bugs here as well.
*/
state = nowhere;
marked_section_nesting_level = 0;
for (wp = text; *wp != 0; ++wp)
{
switch (*wp)
{
case '%':
case '&':
if (*wp != c)
/* just move up to next semicolon */
while (*wp && *wp != ';') wp++;
else
/* If we're not in markup, etc., and if there's more of wp left... */
if (state == nowhere && *++wp)
{
/* ... then see if wp now starts with "name;" */
for (j = 0; *wp != ';' && name[j] != 0; j++, wp++)
if (name[j] != *wp)
break;
/* If it does start with "name;", we have recursion - at
* least if expands_recursively() was called properly.
*/
if (*wp == ';' && name[j] == 0)
return 1;
else while (*wp && *wp != ';')
wp++;
}
break;
case '\'':
switch (state)
{
case in_markup:
state = in_single_quote;
break;
case in_single_quote:
state = in_markup;
default:
break;
}
break;
case '"':
switch (state)
{
case in_markup:
state = in_double_quote;
break;
case in_double_quote:
state = in_markup;
default:
break;
}
break;
case '<':
switch (state)
{
case in_markup:
/* can't parse this; just say it's okay (non-recursive) */
return 0;
case in_pi:
case in_comment:
case in_single_quote:
case in_double_quote:
break;
case in_marked_section:
if (*(wp + 1) == '!' && *(wp + 2) == '[')
{
marked_section_nesting_level++;
wp += 2;
}
break;
default:
switch (*(wp + 1))
{
case '?':
state = in_pi;
wp++;
break;
case '!':
if (*(wp + 2) == '-' && *(wp + 3) == '-')
{
state = in_comment;
wp += 3;
}
else if (*(wp + 2) == '[')
{
marked_section_nesting_level = 1;
state = in_marked_section;
wp += 2;
}
else
state = in_markup;
break;
default:
state = in_markup;
break;
}
break;
}
break;
case '?':
if (state == in_pi && *(wp + 1) == '>')
{
state = nowhere;
wp++;
}
break;
case ']':
if (state == in_marked_section && *(wp + 1) == ']' && *(wp + 2) == '>')
{
/* marked sections must nest properly */
if (--marked_section_nesting_level == 0)
state = nowhere;
else if (marked_section_nesting_level < 0)
/* can't parse it; just say it's okay */
return 0;
wp += 2;
}
break;
case '-':
if (state == in_comment && *(wp + 1) == '-' && *(wp + 2) == '>')
{
state = nowhere;
wp += 2;
}
break;
case '>':
switch (state)
{
case in_markup:
state = nowhere;
case in_pi:
case in_comment:
case in_marked_section:
case in_single_quote:
case in_double_quote:
break;
default:
/* can't parse it; just say it's okay */
return 0;
}
break;
default:
break;
}
}
/* Text is probably okay if we get to here and don't find recursion */
return 0;
}
/*
* add_ext_peref
*
* Add a given entity name to an xml_file's parameter_entity_names
* hashtable, along with its expansion - which in this case comes
* from an external file. Returns NULL if the parameter entity has
* already been declared (and is thus in the table) or if there is
* an error reading the file containing the external data (somewhat
* unlikely). Otherwise returns the contents of external_xf, minus
* the TextDecl - and mapped according to map_what).
*/
my_wchar_t *
add_ext_peref (struct xml_file *xf, my_wchar_t *name, struct xml_file *external_xf, int map_what)
{
my_wchar_t *data, *oldval;
struct rg_htable_item it;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to parameter_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if (rg_find_item (xf->parameter_entity_names, it) == NULL)
{
/* Not defined yet. Good. Now see if we can actually read external_xf */
if ((data = read_entire_xml_file (xf, external_xf)) == NULL)
return NULL;
/* Do a brief, naive check for recursion */
if (expands_recursively (xf, data, '%', name))
{
xwrap (errdebug (5, "ext PE expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
return NULL;
}
else
{
/* Now, finally, add the replacement text to the external entity hash table */
it.uni_key = uni_strdup (name);
it.data = map_entities (xf, data, map_what, 0);
if (it.data == NULL || it.uni_key == NULL)
errabort (40, "malloc() error in %s\n", "add_ext_peref()");
rg_add_item (xf->parameter_entity_names, it);
xwrap (errdebug (5, "added %s to parameter_entity_name hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
}
}
else
{
/* eek; it's already defined */
add_xml_warning (xf, 750, name);
if ((oldval = expand_peref (xf, name, WITHOUT_WHITESPACE)) == NULL)
add_xml_warning (xf, 751, uni_truncate_to (name, 20));
else
{
/* See if we can actually read external_xf */
if ((data = read_entire_xml_file (xf, external_xf)) == NULL)
return NULL;
/* Quick and dirty check for recursion */
if (expands_recursively (xf, data, '%', name))
{
xwrap (errdebug (5, "ext PE expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
return NULL;
}
else
{
/* data here has to be freed, unlike data just above */
data = map_entities (xf, data, map_what, 0);
if (data && uni_strcmp (oldval, data) != 0)
{
add_xml_warning (xf, 751, uni_truncate_to (name, 20));
free (data);
}
}
}
xwrap (errdebug (3, "error adding %s to parameter_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->parameter_entity_names->no_items));
return NULL;
}
/* report how many items are now in the hashtable */
return it.data;
}
/*
* add_eref
*
* Add a given entity name to a given xml_file's entity_names
* hashtable, along with its expansion. Returns zero on error
* (e.g., if the key is already present). Otherwise returns the
* number of elements in the hash table (presumably at least 1).
*/
size_t
add_eref (struct xml_file *xf, my_wchar_t *name, my_wchar_t *value)
{
size_t len;
my_wchar_t *tmp, *expansion;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->entity_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if (rg_find_item (xf->entity_names, it) == NULL &&
rg_find_item (xf->external_entity_names, it) == NULL &&
rg_find_item (xf->unparsed_entity_names, it) == NULL)
{
/* Not defined. Good. Now do a simple-minded recursion test. */
if (expands_recursively (xf, value, '&', name))
{
xwrap (errdebug (5, "general entity expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
return 0;
}
else
{
it.uni_key = uni_strdup (name);
it.data = uni_strdup (value);
if (it.uni_key == NULL || it.data == NULL)
errabort (40, "malloc() error in %s\n", "add_eref()");
rg_add_item (xf->entity_names, it);
if (xmlparse_env.debug_level >= 5)
{
xwrap (errdebug (7, "entity's expansion is - \"%s\"\n",
utf_16_to_utf_8 (uni_truncate_to (value, 20))));
xwrap (errdebug (5, "added %s to entity_name hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->entity_names->no_items));
}
}
}
else
{
if (! is_builtin (name))
{
/* eek; it's already defined & it's not built-in */
add_xml_warning (xf, 800, uni_truncate_to (name, 20));
xwrap (errdebug (3, "error adding %s to entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->entity_names->no_items));
}
if ((result = rg_find_item (xf->entity_names, it)) == NULL)
add_xml_warning (xf, 801, uni_truncate_to (name, 20));
else
{
/* simple test; map_entities() below does this better */
if (expands_recursively (xf, value, '&', name))
{
xwrap (errdebug (5, "general entity expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
}
else
{
/* if we haven't yet expanded general entities; do that now */
expansion = map_entities (xf, value, MAP_GENERAL_ENTITIES, 0);
/* compare old value to new value */
if (expansion == NULL || uni_strcmp (result->data, expansion) != 0)
{
/* incompatible redeclaration */
len = uni_strlen (name) + uni_strlen (result->data) + uni_strlen (value) + 30;
tmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (tmp, name);
uni_utf_strcat (tmp, " (old value \"");
uni_strcat (tmp, result->data);
uni_utf_strcat (tmp, "\"; new value \"");
uni_strcat (tmp, value);
uni_utf_strcat (tmp, "\")");
add_xml_warning (xf, 801, tmp);
free (tmp);
}
if (expansion)
free (expansion);
}
}
/* Zero here = error */
return 0;
}
/* report how many items are now in the hashtable */
return (size_t)xf->entity_names->no_items;
}
/*
* add_ext_eref
*
* Add a given entity name to an xml_file's external_entity_names
* hashtable, along with its expansion. Returns NULL if the key is
* already present or if there is an error reading the external
* entity data (somewhat unlikely). Otherwise returns a pointer to
* the expansion text of the entity.
*/
my_wchar_t *
add_ext_eref (struct xml_file *xf, my_wchar_t *name, xml_file *external_xf, int map_what)
{
size_t len;
struct rg_htable_item it;
my_wchar_t *tmp, *data, *expansion, *oldval;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to external_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->external_entity_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if (rg_find_item (xf->external_entity_names, it) == NULL &&
rg_find_item (xf->entity_names, it) == NULL &&
rg_find_item (xf->unparsed_entity_names, it) == NULL)
{
/* See if we can actually read external_xf */
if ((data = read_entire_xml_file (xf, external_xf)) == NULL)
return NULL;
/* Do a naive, quick check for recursion */
if (expands_recursively (xf, data, '%', name))
{
xwrap (errdebug (5, "ext entity expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
return NULL;
}
else
{
/* Now, finally, add the replacement text to the external entity hash table */
it.uni_key = uni_strdup (name);
it.data = map_entities (xf, data, map_what, 0);
if (it.data == NULL || it.uni_key == NULL)
errabort (40, "malloc() error in %s\n", "add_ext_eref()");
rg_add_item (xf->external_entity_names, it);
xwrap (errdebug (5, "added %s to external_entity_name hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->external_entity_names->no_items));
}
}
else
{
/* eek; it's already defined */
add_xml_error (xf, is_builtin (name) ? 754 : 800, uni_truncate_to (name, 20));
if ((oldval = expand_ext_eref (xf, name)) == NULL)
add_xml_warning (xf, 801, uni_truncate_to (name, 20));
else
{
/* See if we can actually read external_xf */
if ((data = read_entire_xml_file (xf, external_xf)) == NULL)
return NULL;
/* Do a naive, quick check for recursion */
if (expands_recursively (xf, data, '%', name))
{
xwrap (errdebug (5, "ext entity expands recursively, %s\n", uni_truncate_to (name, 20)));
add_xml_error (xf, 1002, uni_truncate_to (name, 20));
}
else
{
/* expansion has to be freed later, unlike data above */
expansion = map_entities (xf, data, map_what, 0);
if (! expansion || uni_strcmp (oldval, expansion) != 0)
{
add_xml_warning (xf, 801, uni_truncate_to (name, 20));
/* incompatible redeclaration */
len = uni_strlen (name) + uni_strlen (oldval) + uni_strlen (data) + 30;
tmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (tmp, name);
uni_utf_strcat (tmp, " (old value \"");
uni_strcat (tmp, oldval);
uni_utf_strcat (tmp, "\"; new value \"");
uni_strcat (tmp, data);
uni_utf_strcat (tmp, "\")");
add_xml_warning (xf, 801, tmp);
free (tmp);
}
if (expansion)
free (expansion);
expansion = NULL;
}
}
xwrap (errdebug (3, "error adding %s to external_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)), xf->external_entity_names->no_items));
return NULL;
}
/* return expansion if all went well */
return it.data;
}
/*
* add_ext_eref_text
*
* Add a given entity name to an xml_file's external_entity_names
* hashtable, along with its expansion. Unlike add_ext_eref() up
* above, this routine adds straight text to xf's external entity
* name table. It's assumed that this text was formerly read in
* from an external file at some point, and is just being manually
* inserted into this table, so we don't have to read the file in
* again or map any entities.
*
* Returns NULL if the name is already present. Otherwise returns
* a pointer to the expansion text of the entity.
*/
my_wchar_t *
add_ext_eref_text (struct xml_file *xf, my_wchar_t *name, my_wchar_t *text)
{
struct rg_htable_item it;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding text to external_entity_name hashtable (size = %d) for %s\n",
xf->external_entity_names->no_items, utf_16_to_utf_8 (uni_truncate_to (name, 20))));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if (! (rg_find_item (xf->external_entity_names, it) == NULL &&
rg_find_item (xf->entity_names, it) == NULL &&
rg_find_item (xf->unparsed_entity_names, it) == NULL))
return NULL;
else
{
/* Do a naive, quick check for recursion; ERROR MESSAGE WAS ALREADY EMITTED */
if (expands_recursively (xf, text, '%', name))
return NULL;
else
{
/* Now, finally, add the replacement text to the external entity hash table */
it.uni_key = uni_strdup (name);
it.data = uni_strdup (text);
if (it.data == NULL || it.uni_key == NULL)
errabort (40, "malloc() error in %s\n", "add_ext_eref()");
rg_add_item (xf->external_entity_names, it);
xwrap (errdebug (5, "added text to external_entity_name hashtable (new size = %d) for %s\n",
xf->external_entity_names->no_items, utf_16_to_utf_8 (uni_truncate_to (name, 20))));
}
}
/* return expansion if all went well */
return it.data;
}
/*
* add_uperef
*
* Add a given entity name to an xml_file's unparsed_entity_names
* hashtable, along with its "expansion." Returns zero if the key
* is already present or if there is an error reading the external
* entity data (somewhat unlikely). Otherwise returns the number of
* elements in the hash table (presumably at least 1).
*
* Note that in the case of unparsed entities, the keys in the
* xf->unparsed_entity_names hashtable are the entity names; the
* values are a) the external URIs associated with those names, and
* b) the notation names associated with them.
*
* Put differently, xf->unparsed_entity_names stores information as
* follows: keys = unparsed entity name; value = xml_unparsed_entity
* struct (consists of two fields: 1) a notation name, 2) a system
* identifier). The system identifier is NOT the system identifier
* associated with the notation (which you can get via the notation
* name hashtable (notname). Rather, it is the system identifier
* given in the entity declaration itself.
*/
size_t
add_uperef (xml_file *xf, my_wchar_t *name, my_wchar_t *notname, my_wchar_t *sysid)
{
struct xml_unparsed_entity *xn;
struct rg_htable_item it, *result;
if (xmlparse_env.debug_level >= 7)
xwrap (errdebug (7, "adding %s to unparsed_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->unparsed_entity_names->no_items));
/* if it.key == NULL, then rg_find_item will use it.uni_key */
it.key = NULL;
it.uni_key = name;
it.data = NULL;
if ((result = rg_find_item (xf->unparsed_entity_names, it)) == NULL &&
(result = rg_find_item (xf->entity_names, it)) == NULL &&
(result = rg_find_item (xf->external_entity_names, it)) == NULL)
{
/* Get the system identifier associated with this notation name */
if (expand_notname (xf, notname) == NULL)
/*
* as-yet undeclared notation isn't an error acc. to spec;
* at the end of the DTD we'll have to go and check for
* notname again (see s. 3.3.1, entity name constraint)
*/
add_xml_warning (xf, 810, uni_truncate_to (notname, 20));
if ((xn = malloc (sizeof (struct xml_unparsed_entity))) == NULL)
errabort (40, "malloc() error in %s\n,", "add_uperef()");
it.uni_key = uni_strdup (name);
xn->notname = uni_strdup (notname);
xn->sysid = uni_strdup (sysid);
if (xn->notname == NULL || xn->sysid == NULL)
errabort (40, "malloc() error in %s\n", "add_uperef()");
it.data = xn;
rg_add_item (xf->unparsed_entity_names, it);
xwrap (errdebug (5, "added %s to unparsed_entity_name hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->unparsed_entity_names->no_items));
}
else
{
/* eek; it's already defined */
add_xml_warning (xf, 800, uni_truncate_to (name, 20));
if ((xn = expand_uperef (xf, name)) == NULL
|| uni_strcmp (((xml_unparsed_entity *)result->data)->notname, xn->notname) != 0
|| uni_strcmp (((xml_unparsed_entity *)result->data)->sysid, xn->sysid) != 0)
add_xml_warning (xf, 801, uni_truncate_to (name, 20));
xwrap (errdebug (3, "error adding %s to unparsed_entity_name hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (name, 20)),
xf->unparsed_entity_names->no_items));
return 0;
}
/* report how many items are now in the hashtable */
return (size_t)xf->unparsed_entity_names->no_items;
}
/*
* add_idref
*
* Add a given val (presumed to be a value for an attribute of
* type IDREF) to the xf->idrefs hashtable, along with the line
* number where it occurs.
*
* This routine is different from most other "add" routines in that
* it gets called while parsing the actual document content. The
* others get called when parsing the DTD.
*/
size_t
add_idref (struct xml_file *xf, my_wchar_t *val, my_wchar_t *lines)
{
char *tmp;
size_t len;
my_wchar_t *w_tmp;
struct rg_htable_item item, *result;
item.key = NULL;
item.uni_key = val;
item.data = NULL;
if ((result = rg_find_item (xf->idrefs, item)) == NULL)
{
item.uni_key = uni_strdup (val);
if (lines)
/* lines has a list of line numbers, e.g., "1, 2, 3..." */
item.data = uni_strdup (lines);
else
{
/* lines is null; use current lineno; note: if you change
* 196 here, change 192 below
*/
tmp = malloc (64); w_tmp = malloc (196 * sizeof (my_wchar_t));
if (tmp == NULL || w_tmp == NULL)
errabort (40, "malloc error in %s\n", "add_idref()");
sprintf (tmp, "%d", xf->lineno);
uni_strcpy (w_tmp, utf_8_to_utf_16 (tmp));
free (tmp);
item.data = w_tmp;
rg_add_item (xf->idrefs, item);
}
xwrap (errdebug (5, "added %s to idrefs hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (val, 20)),
xf->idrefs->no_items));
}
else
{
/* IDREF exists; add lines or our current lineno to the list */
w_tmp = (my_wchar_t *)result->data;
len = uni_strlen (w_tmp);
if (w_tmp[len - 1] != '.')
{
if (lines)
{
/* lines is nonnull; add it to the list */
if ((len + uni_strlen (lines) + 2) >= 192)
{
/* no more room */
if (w_tmp[len - 1] != '.')
uni_utf_strcat (w_tmp, "...");
}
else
{
/* add lines */
uni_utf_strcat (w_tmp, ", ");
uni_strcat (w_tmp, lines);
}
}
else
{
/* lines is null; use current lineno */
tmp = malloc (64);
sprintf (tmp, "%d", xf->lineno);
if ((len + strlen (tmp) + 2) >= 192)
{
/* no more room */
if (w_tmp[len - 1] != '.')
uni_utf_strcat (w_tmp, "...");
}
else
{
/* add the current line number to the list */
uni_utf_strcat (w_tmp, ", ");
uni_utf_strcat (w_tmp, tmp);
}
free (tmp);
}
xwrap (errdebug (3, "added line number, %d, to entry \"%s\" in idefs hashtable\n",
xf->lineno, utf_16_to_utf_8 (uni_truncate_to (val, 20))));
}
}
/* report how many idrefs are now in the hashtable */
return (size_t)xf->idrefs->no_items;
}
/*
* add_id
*
* ID attribute values are unique for a given XML document.
* This routine adds a given ID value to the document's "ids"
* table. Returns NULL if such a value is already present.
*
* This routine is different from most other "add" routines in that
* it gets called while parsing the actual document content. The
* others get called when parsing the DTD.
*/
size_t
add_id (struct xml_file *xf, struct name_val *nv)
{
size_t len;
my_wchar_t *tmp;
struct name_val *old_nv;
struct rg_htable_item item, *result;
static struct name_val *dummy_nv = NULL;
/* return NULL if ID is already taken */
item.key = NULL;
item.uni_key = nv->val;
item.data = NULL;
if ((result = rg_find_item (xf->ids, item)) == NULL)
{
if (xmlparse_env.keep_children == yes)
/* we'll keep the parse tree around; insert pointer into it */
item.data = nv;
else
{
/* we'll discard the parse tree; don't insert pointer into it */
tmp = utf_8_to_utf_16 ("dummy ID value");
dummy_nv = create_name_val (tmp, tmp, NULL, NULL, nv->lineno, yes, NULL);
item.data = dummy_nv;
}
item.uni_key = uni_strdup (nv->val);
rg_add_item (xf->ids, item);
xwrap (errdebug (5, "added %s to ids hashtable (new size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (nv->name, 20)),
xf->ids->no_items));
}
else
{
/* eek; XML spec says ID values must be unique */
xwrap (errdebug (3, "error adding %s to ids hashtable (size = %d)\n",
utf_16_to_utf_8 (uni_truncate_to (nv->name, 20)),
xf->ids->no_items));
old_nv = (struct name_val *)result->data;
if (old_nv->parent == NULL)
add_xml_error (xf, 661, uni_truncate_to (nv->name, 20));
else
{
len = uni_strlen (nv->name) + uni_strlen (old_nv->parent->name) +
uni_strlen (old_nv->name) + uni_strlen (old_nv->val) + 12;
tmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (tmp, nv->name);
uni_utf_strcat (tmp, " (cf. ");
uni_strcat (tmp, old_nv->parent->name);
uni_utf_strcat (tmp, " ");
uni_strcat (tmp, old_nv->name);
uni_utf_strcat (tmp, "=\"");
uni_strcat (tmp, old_nv->val);
uni_utf_strcat (tmp, "\")");
add_xml_error (xf, 661, uni_truncate_to (nv->name, 20));
free (tmp);
}
return 0;
}
/* report how many ids are now in the hashtable */
return (size_t)xf->ids->no_items;
}
#define cleanup(buf) { free (buf); free (stack.states); return NULL; }
/*
* map_entities:
*
* Map all &entity; refs in a string, s, to their expansion text.
* Map_what (arg 3) determines what gets mapped (char, general,
* and/or parameter entities).
*
* NOTE WELL: Returns a pointer to a malloc'd buffer THAT MUST BE
* FREED, eventually. This is not my usual programming style, but
* it's warranted here, to facilitate recursion.
*
* Options are passed via map_what (arg 3):
*
* MAP_CHAR_ENTITIES map character entities to chars
* MAP_GENERAL_ENTITIES expand general entities
* FLAG_BYPASSED_AMPERSANDS flag unexpanded (PI, comment, etc.) &s
* UNPARSED_ENTITIES_OK uperefs are okay; no warning msgs
* MAP_PARAMETER_ENTITIES expand parameter entities
* MAP_AMP_AND_LT convert && and &< to & and <
* PAD_PARAMETER_ENTITIES add leading/trailing spaces to perefs
* NO_EXTERNAL_EREFS emit error msg if a general external
* occurs (used in attribute values)
* ABORT_ON_FAILURE return NULL on entity format or resolu-
* tion errors
* UNRESOLVABLES_OKAY squelch err msgs about unresolvable
* entities (overrides ABORT_ON_FAILURE
* for unresolvable entities)
*
* The final argument, depth, is always 0 (it gets incremented
* internally).
*/
my_wchar_t *
map_entities (struct xml_file *xf, my_wchar_t *s, int map_what, size_t depth)
{
char *p;
long int l;
int lineno, errcode;
struct xml_unparsed_entity *xue;
struct state_stack stack = { NULL, 0, 0 };
my_wchar_t c, *wp, *tmp, *tmp2, *ref, *wpbuf, **endptr;
size_t i, j, k, len, pad, slen, tmplen, base, wpbuflen;
if (++depth > 31)
{
/* If we're this deep, there is probably recursion */
add_xml_error (xf, 1002, uni_truncate_to (s, 20));
return NULL;
}
/* What are we doing? */
xwrap (errdebug (7, "mapping entities in %s\n", utf_16_to_utf_8 (uni_truncate_to (s, 20))));
slen = uni_strlen (s);
wpbuflen = len = slen + 1;
if ((wpbuf = malloc (wpbuflen * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc() error in %s\n", "map_entities()");
lineno = 1;
push_state (&stack, nowhere);
for (i = j = 0; i < slen; i++)
{
switch (s[i])
{
case '\n':
lineno++;
wpbuf[j++] = s[i];
break;
case '\'':
switch (top_state (&stack))
{
case in_markup:
push_state (&stack, in_single_quote);
break;
case in_single_quote:
pop_state (&stack);
default:
break;
}
wpbuf[j++] = s[i];
break;
case '"':
switch (top_state (&stack))
{
case in_markup:
push_state (&stack, in_double_quote);
break;
case in_double_quote:
pop_state (&stack);
default:
break;
}
wpbuf[j++] = s[i];
break;
case '<':
if (top_state (&stack) == nowhere)
{
switch (s[i + 1])
{
case '?':
push_state (&stack, in_pi);
wpbuf[j++] = s[i++];
break;
case '!':
if (s[i + 2] == '-' && s[i + 3] == '-')
{
/* in a comment */
push_state (&stack, in_comment);
for (k = 0; k < 3; k++)
wpbuf[j++] = s[i++];
}
else if (uni_utf_strncmp (&s[i + 2], "[CDATA[", 7) == 0)
{
/* in a CDATA section */
push_state (&stack, in_cdsect);
for (k = 0; k < 8; k++)
wpbuf[j++] = s[i++];
}
else
/* Not in a comment or CDATA section */
push_state (&stack, in_markup);
break;
default:
push_state (&stack, in_markup);
break;
}
}
wpbuf[j++] = s[i];
break;
case ']':
if (top_state (&stack) == in_cdsect)
if (uni_utf_strncmp (&s[i], "]]>", 3) == 0)
{
/* finished a CDATA section */
pop_state (&stack);
wpbuf[j++] = s[i++];
wpbuf[j++] = s[i++];
}
wpbuf[j++] = s[i];
break;
case '?':
if (top_state (&stack) == in_pi && s[i + 1] == '>')
{
/* finished PI */
pop_state (&stack);
wpbuf[j++] = s[i++];
}
wpbuf[j++] = s[i];
break;
case '-':
if (top_state (&stack) == in_comment && s[i + 1] == '-' && s[i + 2] == '>')
{
/* finished a comment */
pop_state (&stack);
wpbuf[j++] = s[i++];
wpbuf[j++] = s[i++];
}
wpbuf[j++] = s[i];
break;
case '>':
if (top_state (&stack) == in_markup)
pop_state (&stack);
wpbuf[j++] = s[i];
break;
case '%':
if (top_state (&stack) == in_comment ||
top_state (&stack) == in_cdsect ||
top_state (&stack) == in_pi)
wpbuf[j++] = s[i];
else
if (! ((map_what & MAP_PARAMETER_ENTITIES) && ! uni_isspace (s[i + 1])))
wpbuf[j++] = s[i];
else
{
/* find the trailing ';' */
for (k = i + 1; s[k] != 0 && s[k] != ';'; k++)
{
if (uni_isspace (s[k]) ||
(s[k] <= 0x80 && (iscntrl (s[k]) || (ispunct (s[k]) && ! uni_utf_any (&s[k],":.-_")))))
break;
}
c = s[k]; s[k] = 0;
if (c != ';')
{
/* found null or junk chars before terminating ';' */
s[k] = c;
add_unique_error (xf, depth ? 1003 : 1010, uni_truncate_to (&s[i], (k - i) + 1));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
wpbuf[j++] = s[i];
}
else
{
/* if we're not in a quoted entity val, add leading/trailing space */
pad = (map_what & PAD_PARAMETER_ENTITIES) ? WITH_WHITESPACE : WITHOUT_WHITESPACE;
if ((tmp = expand_peref (xf, &s[i + 1], pad)) == NULL)
{
s[k] = c;
/* unresolvables_okay overrides err msg and abort_on_failure */
if (map_what & UNRESOLVABLES_OKAY)
xwrap (errdebug (7, "skipping unresolvable peref, %s\n",
uni_truncate_to (&s[i], 20)));
else
{
/* reference to an undefined entity is usually an error */
add_xml_error (xf, 1012, uni_truncate_to (&s[i], 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
}
wpbuf[j++] = s[i];
}
else
{
/* code here is the same as for general entities */
if (xmlparse_env.debug_level >= 7)
{
p = strdup (utf_16_to_utf_8 (tmp));
xwrap (errdebug (7, "in map_entities(): expanded \"%s\" as \"%s\"\n",
utf_16_to_utf_8 (&s[i + 1]), p));
free (p);
}
goto map_entity_recursively;
}
}
s[k] = c;
}
/* end of '%' section */
break;
case '&':
if (top_state (&stack) != nowhere &&
top_state (&stack) != in_single_quote &&
top_state (&stack) != in_double_quote)
{
if (s[i + 1] == '#')
/* character entities are handled below */
goto char_entity;
if ((map_what & FLAG_BYPASSED_AMPERSANDS) &&
((top_state (&stack) == in_pi) ||
(top_state (&stack) == in_cdsect) ||
(top_state (&stack) == in_comment)))
{
/* flag bypassed ampersands */
if ((tmp = malloc (32 * sizeof (my_wchar_t))) == NULL)
errabort (40, "malloc error in %s\n", "map_entities()");
uni_strcpy (tmp, uni_truncate_to (&s[i], 2));
uni_utf_strcat (tmp, " (");
uni_strcat (tmp, state_to_string (top_state (&stack)));
uni_utf_strcat (tmp, ")");
add_xml_warning (xf, 822, tmp);
free (tmp);
}
wpbuf[j++] = s[i];
}
else
if (map_what & (MAP_CHAR_ENTITIES | MAP_GENERAL_ENTITIES | MAP_AMP_AND_LT))
{
switch (s[i + 1])
{
case '&':
case '<':
/* if we're mapping && and &< to '&' and '<', omit this char */
(map_what & MAP_AMP_AND_LT) ? i++ : (wpbuf[j++] = s[i++]);
/* but don't omit the '&' or '<'; rather, add it to wpbuf */
wpbuf[j++] = s[i];
break;
case '#':
/* note the goto above */
char_entity:
if (! (map_what & MAP_CHAR_ENTITIES))
wpbuf[j++] = s[i];
else
{
/* move past ''; set default base to 10 */
i += 2; base = 10;
if (s[i] == 'x')
{ i++; base = 16; }
wp = &s[i]; endptr = ℘
/* now try to convert the digits to a wchar */
l = uni_strtol (&s[i], endptr, base);
if (l == LONG_MAX && errno == ERANGE)
{
/* oops; can't convert */
if (s[i - 1] == 'x') i--;
i -= 2; wpbuf[j++] = s[i];
}
else if (**endptr != ';')
{
/* oops; there were non-digits in the char ref */
if (s[i - 1] == 'x') i--;
add_xml_error (xf, depth ? 1003 : 1010, uni_truncate_to (&s[i - 2], 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
i -= 2; wpbuf[j++] = s[i];
}
else
{
errcode = 0;
if (l <= 0 || l > 0x0010FFFFU)
/* oops; too big to be a valid Unicode char */
errcode = depth ? 1003 : 1010;
else if (l >= 0xD800 && l <= 0xDFFF)
/* Oops; char entity can't specify a value in the surrogate block */
errcode = 1054;
else if (l <= 0x1F && ! (l == 0x09 || l == 0x0A || l == 0x0D))
/* shouldn't be using control characters */
errcode = 1056;
else if (l == 0xFFFE)
/* byte-order marker is a valid Unicode character; FFFE is not */
errcode = 1055;
else if (l == 0xFFFF)
/* FFFF will never be a valid Unicode character */
errcode = 1057;
else if (l >= 0x80 && l <= 0x9F)
{
if (s[i - 1] == 'x') i--;
/* A "C1" (as opposed to ASCII "C0") control character */
add_xml_warning (xf, 1058, uni_truncate_to (&s[i - 2], 20));
}
if (errcode)
{
if (s[i - 1] == 'x') i--;
add_xml_error (xf, errcode, uni_truncate_to (&s[i - 2], 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
/* substitute the Unicode "dunno what this is" char, FFFD */
l = 0xFFFDU;
}
/* Convert integral l to a character */
tmp = int_2_utf_16_string (l);
if ((tmplen = uni_strlen (tmp)) > 1)
if ((len += tmplen) > wpbuflen)
{
/* buffer isn't big enough; enlarge it */
wpbuflen = len + uni_strlen (&s[i]) + 1;
wpbuf = realloc (wpbuf, wpbuflen * sizeof (my_wchar_t));
if (wpbuf == NULL)
errabort (41, "realloc() error in %s\n", "map_entities()");
}
memcpy (&wpbuf[j], tmp, tmplen * sizeof (my_wchar_t));
i += (*endptr - &s[i]);
j += tmplen;
}
}
break;
default:
if (! (map_what & MAP_GENERAL_ENTITIES) || uni_isspace (s[i + 1]))
wpbuf[j++] = s[i];
else
{
/* skip to next semicolon */
for (k = i + 1; s[k] != 0 && s[k] != ';'; k++)
{
if (uni_isspace (s[k]) ||
(s[k] <= 0x80 && (iscntrl (s[k]) || (ispunct (s[k]) && ! uni_utf_any (&s[k],":.-_")))))
break;
}
c = s[k]; s[k] = 0;
if (c != ';')
{
/* found null or junk chars before terminating ';' */
s[k] = c;
add_unique_error (xf, depth ? 1003 : 1010, uni_truncate_to (&s[i], (k - i) + 1));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
wpbuf[j++] = s[i];
}
else
{
tmp = expand_ext_eref (xf, &s[i + 1]);
if (map_what & NO_EXTERNAL_EREFS && tmp)
{
/* XML 1.0 says ext erefs in attvals are fatal errors */
add_xml_error (xf, 1211, uni_truncate_to (tmp, 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
}
if (tmp == NULL && (tmp = expand_eref (xf, &s[i + 1])) == NULL)
{
/* Depending on what the programmer tells us, either flag
* unparsed entities as errors, or else ignore them. In
* either event, we don't want to expand them.
*/
if (! (map_what & UNPARSED_ENTITIES_OK))
{
/* 1014 -> references to unparsed entities aren't allowed */
xue = expand_uperef (xf, &s[i + 1]);
s[k] = c;
if (! xue)
/* if not an uperef, jump to the normal "unresolvable" section */
goto unresolvable_eref;
else
{
add_xml_error (xf, 1014, uni_truncate_to (&s[i], 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
}
}
else
/* programmer is telling me UNPARSED_ENTITIES are to be ignored */
if (! expand_uperef (xf, &s[i + 1]))
{
/* not an unparsed entity; in fact, not ANY declared entity */
s[k] = c;
/* unresolvables_okay overrides err msg and abort_on_failure */
unresolvable_eref:
if (map_what & UNRESOLVABLES_OKAY)
xwrap (errdebug (7, "skipping unresolvable eref, %s\n",
uni_truncate_to (&s[i], 20)));
else
{
/* usually unresolvable references are an error */
add_xml_error (xf, 1012, uni_truncate_to (&s[i], 20));
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
}
}
wpbuf[j++] = s[i];
}
else
{
map_entity_recursively:
s[k] = c;
xwrap (errdebug (7, "recursively expand \"%s\"\n", utf_16_to_utf_8 (tmp)));
tmp = uni_strdup (tmp);
if ((ref = map_entities (xf, tmp, map_what, depth)))
{
free (tmp);
/* recursively expand entity replacement text */
wpbuf[j] = 0; tmp2 = uni_strdup (ref);
if (xmlparse_env.debug_level >= 7)
{
xwrap (errdebug (7, "recursively expanded -> \"%s\"\n",
utf_16_to_utf_8 (ref)));
xwrap (errdebug (7, "concat expansion with \"%s\"\n",
utf_16_to_utf_8 (wpbuf)));
}
tmp = uni_concatenate (wpbuf, tmp2);
if ((tmp2 = map_entities (xf, &s[k + 1], map_what, 0)) == NULL)
if (map_what & ABORT_ON_FAILURE)
cleanup (tmp);
free (stack.states);
return uni_concatenate (tmp, tmp2);
}
else
{
free (tmp);
if (map_what & ABORT_ON_FAILURE)
cleanup (wpbuf);
/* probably a recursion problem; just expand
* entity reference once and leave it at that
*/
len += (tmplen = uni_strlen (tmp));
if (len > wpbuflen)
{
wpbuflen = len + uni_strlen (&s[i]) + 1;
if ((wpbuf = realloc (wpbuf, wpbuflen * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc() error in %s\n", "map_entities()");
}
memcpy (&wpbuf[j], tmp, tmplen * sizeof (my_wchar_t));
j += tmplen;
i = k + 1;
}
}
s[k] = c;
}
}
/* end of default: in switch statement */
break;
}
}
/* end of '&' section */
break;
default:
/* default section */
wpbuf[j++] = s[i];
break;
}
}
wpbuf[j] = 0;
free (stack.states);
return wpbuf;
}
/*
* check_attribute
*
* Check whether the attribute name/value pair attname/attval (args
* 3 and 4) for element elname (arg 2) is valid, i.e., follows that
* attribute's declaration. Returns zero if the attribute is not
* valid.
*
* Call this function only after expanding the entities inside of
* nv->val. We're supposed to be checking their replacement text.
* Entities not in quotes get expanded by the lexer. We have to
* expand ones in attribute values "manually" in the parser.
*/
int
check_attribute (struct xml_file *xf, my_wchar_t *elname, struct name_val *nv, int insert_ids)
{
my_wchar_t *tmp;
size_t lineno, len, i;
struct xml_attribute *xa;
xwrap (errdebug (7, "Checking attribute, %s\n", utf_16_to_utf_8 (nv->name)));
/* see if element elname is defined */
if (expand_element (xf, elname) == NULL)
{
/* error - element isn't defined; use attribute's own line no. */
if (! nv->lineno)
add_xml_error (xf, 1203, uni_truncate_to (elname, 20));
else
{
lineno = xf->lineno; xf->lineno = nv->lineno;
add_xml_error (xf, 1203, uni_truncate_to (elname, 20));
xf->lineno = lineno;
}
return 0;
}
/* see if attribute nv->name is defined for elname */
if ((xa = expand_attribute (xf, elname, nv->name)) == NULL)
{
/* if it's not defined, see if the user made a casing error */
len = uni_strlen (nv->name);
tmp = malloc ((len + 1) * sizeof (my_wchar_t));
memcpy (tmp, nv->name, (len + 1) * sizeof (my_wchar_t));
/* try all uppercase */
for (i = 0; tmp[i]; i++)
tmp[i] = uni_toupper (tmp[i]);
if (expand_attribute (xf, elname, tmp))
add_xml_error (xf, 1116, nv->name);
else
{
/* try all lowercase */
for (i = 0; tmp[i]; i++)
tmp[i] = uni_tolower (tmp[i]);
if (expand_attribute (xf, elname, tmp))
add_xml_error (xf, 1116, nv->name);
}
/* element is defined; but nv->name isn't one of its attributes */
len = uni_strlen (elname) + 4 + uni_strlen (nv->name);
if ((tmp = realloc (tmp, len * sizeof (my_wchar_t))) == NULL)
errabort (41, "realloc error in %s\n", "check_attribute()");
uni_strcpy (tmp, elname); uni_utf_strcat (tmp, " (");
uni_strcat (tmp, nv->name); uni_utf_strcat (tmp, ")");
/* error - element isn't defined; use attribute's own line no. */
if (nv->lineno == 0)
add_xml_error (xf, 1202, tmp);
else
{
lineno = xf->lineno; xf->lineno = nv->lineno;
add_xml_error (xf, 1202, tmp);
xf->lineno = lineno;
}
free (tmp);
return 0;
}
/* values for attributes declared with #FIXED keyword must match default */
if (xa->default_type == fixed)
if (uni_strcmp (nv->val, xa->default_val) != 0)
{
len = uni_strlen (nv->name) + uni_strlen (xa->default_val) +
uni_strlen (nv->val) + 17;
tmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (tmp, nv->name);
uni_utf_strcat (tmp, "=\"");
uni_strcat (tmp, nv->val);
uni_utf_strcat (tmp, "\" (default \"");
uni_strcat (tmp, xa->default_val);
uni_utf_strcat (tmp, "\")");
if (nv->lineno == 0)
add_xml_error (xf, 1200, tmp);
else
{
lineno = xf->lineno; xf->lineno = nv->lineno;
add_xml_error (xf, 1200, tmp);
xf->lineno = lineno;
}
free (tmp);
}
/* make sure xml:lang tag has a reasonable value (see langcode.c) */
if (uni_utf_strcmp (nv->name, "xml:lang") == 0)
if (! langcode_ok (nv->val))
{
if (nv->lineno == 0)
add_xml_error (xf, 1220, uni_truncate_to (nv->val, 20));
else
{
lineno = xf->lineno; xf->lineno = nv->lineno;
add_xml_error (xf, 1220, uni_truncate_to (nv->val, 20));
xf->lineno = lineno;
}
}
/* check for excessively long attribute values */
if (! insert_ids || xa->default_type == implied || xa->default_type == required)
if (uni_strlen (nv->val) > 32767)
add_xml_warning (xf, 1223, uni_truncate_to (nv->val, 100));
/* Check things like whether NMTOKEN attributes have legal characters */
return basic_attribute_integrity_check (xf, xa, nv, insert_ids);
}
#define error_1221(c,s) { tmp_s = malloc (sizeof (my_wchar_t) * (16 + uni_strlen (c) + uni_strlen (s))); \
uni_strcpy (tmp_s, c); \
uni_utf_strcat (tmp_s, " (in "); \
uni_strcat (tmp_s, s); \
uni_utf_strcat (tmp_s, ")"); \
add_xml_error (xf, 1221, tmp_s); \
free (tmp_s); \
}
/*
* basic_attribute_integrity_check
*
* Check whether attribute/value nv for attribute xa has a legal
* value. E.g., if xa->type is ENTITY, make sure that value is an
* unparsed entity. Or if xa->type is ID, then add value to the
* "ids" hash table (it'll be checked later on). Or if xa->type
* is NMTOKEN, make sure value has no illegal characters in it.
*
* If insert_ids (arg 4) is nonnull, inserts any IDs it finds into
* the xf->ids hashtable; and so also with IDREFs and the xf->idrefs
* hashtable. Normally insert_ids will be set unless we're still in
* the DTD, and we're just checking attribute defaults.
*
* Returns 1 if value is OK; otherwise returns 0.
*/
static int
basic_attribute_integrity_check (xml_file *xf, xml_attribute *xa, name_val *nv, int insert_ids)
{
size_t i;
int errors, old_lineno;
my_wchar_t *tmp, *tmp_s;
#define RETURN(i) { xf->lineno = old_lineno; return i; }
old_lineno = xf->lineno;
if (nv->lineno > 0)
xf->lineno = nv->lineno;
/* Do some basic integrity checks. */
switch (xa->type)
{
case cdata:
/* Dunno what we should check here; any reasonable char is okay.
* NB: the XML 1.0 spec doesn't say that CDATA atts should be
* treated, with regard to entity expansion, any differently
* than other attribute types. Bottom line: Don't get CDATA
* sections (which don't do any entity expansion) and CDATA
* attributes mixed up.
*/
break;
case id:
/* This isn't a very good check; really we should do a full scan
* to make sure all ID values match the Name production in the
* XML 1.0 spec. See also nmtoken(s) below.
*/
if (uni_isspace (nv->val[0]) || uni_isdigit (nv->val[0]) ||
(nv->val[0] < 0x80 && ! (isalpha (nv->val[0]) || uni_utf_any (&nv->val[0], "_:&"))))
error_1221 (uni_truncate_to (nv->val, 2), nv->val);
if (nv->val[0])
for (i = 1; nv->val[i] != 0; i++)
if (uni_isspace (nv->val[i]) ||
(nv->val[i] < 0x80 && ! (isalnum (nv->val[i]) || uni_utf_any (&nv->val[i], ".-_:&;"))))
error_1221 (uni_truncate_to (&nv->val[i], 2), nv->val);
/* add this id to the ids hash table */
if (insert_ids)
add_id (xf, nv);
break;
case idref:
/* add this idref attribute's value to the idrefs hash table */
if (uni_isspace (nv->val[0]) || uni_isdigit (nv->val[0]) ||
(nv->val[0] < 0x80 && ! (isalpha (nv->val[0]) || uni_utf_any (&nv->val[0], "_:&"))))
error_1221 (uni_truncate_to (nv->val, 2), nv->val);
if (nv->val[0])
for (i = 1; nv->val[i] != 0; i++)
if (uni_isspace (nv->val[i]) ||
(nv->val[i] < 0x80 && ! (isalnum (nv->val[i]) || uni_utf_any (&nv->val[i], ".-_:&;"))))
error_1221 (uni_truncate_to (&nv->val[i], 2), nv->val);
if (insert_ids)
add_idref (xf, nv->val, NULL);
break;
case idrefs:
/* add each idref in nv->val to the idrefs hash table */
tmp = uni_tokenize (nv->val);
while (tmp != NULL)
{
if (uni_isdigit (nv->val[0]) ||
(tmp[0] < 0x80 && ! (isalpha (tmp[0]) || uni_utf_any (&tmp[0], "_:&"))))
error_1221 (uni_truncate_to (tmp, 2), tmp);
if (tmp[0])
for (i = 1; tmp[i] != 0; i++)
if (tmp[i] < 0x80 && ! (isalnum (tmp[i]) || uni_utf_any (&tmp[i], ".-_:&;")))
error_1221 (uni_truncate_to (&tmp[i], 2), tmp);
if (insert_ids)
add_idref (xf, tmp, NULL);
/* unlike strtok(), returns ptr to static buf */
tmp = uni_tokenize (NULL);
}
break;
case entity:
if (! expand_uperef (xf, nv->val))
{
/* unparsed entity nv->val is not declared */
add_unique_error (xf, 1012, uni_truncate_to (nv->val, 20));
RETURN (0);
}
break;
case entities:
/* check each entity to see if it's declared */
errors = 0;
tmp = uni_tokenize (nv->val);
while (tmp != NULL)
{
if (! expand_uperef (xf, tmp))
{
/* unparsed entity is not declared */
add_unique_error (xf, 1012, uni_truncate_to (tmp, 20));
errors++;
}
/* unlike strtok(), returns ptr to static buf */
tmp = uni_tokenize (NULL);
}
if (errors)
RETURN (0);
break;
case nmtoken:
/* This isn't a very good check, but it's enough for ASCII
* documents. On the TO-DO list is to make this really do a
* check for matches against the Nmtoken pattern. See also
* "id" above.
*/
errors = 0;
for (i = 0; nv->val[i] != 0; i++)
if (uni_isspace (nv->val[i]) ||
(nv->val[i] < 0x80 && ! (isalnum (nv->val[i]) || uni_utf_any (&nv->val[i], ".-_:&;"))))
{
error_1221 (uni_truncate_to (&nv->val[i], 2), nv->val);
errors++;
}
if (errors)
RETURN (0);
break;
case nmtokens:
/* This isn't a very good check. See above on nmtoken. */
errors = 0;
for (i = 0; nv->val[i] != 0; i++)
if (nv->val[i] < 0x80 && ! isspace (nv->val[i]) &&
! ((isalnum (nv->val[i]) || uni_utf_any (&nv->val[i], ".-_:&;"))))
{
error_1221 (uni_truncate_to (&nv->val[i], 2), nv->val);
errors++;
}
if (errors)
RETURN (0);
break;
case notation:
/* nv->val must resolve as a declared notation */
if (! expand_notname (xf, nv->val))
{
add_xml_error (xf, 1013, uni_truncate_to (nv->val, 20));
RETURN (0);
}
/* fall through */
case enumeration:
/* make sure nv->val matches one of the declared values
* enumerated in the original AttDef
*/
for (i = 0; i < xa->nmtoklen; i++)
if (xa->nmtokens[i])
if (uni_strcmp (xa->nmtokens[i], nv->val) == 0)
break;
if (i == xa->nmtoklen)
{
/* an error; see if there's a case-sensitivity problem */
for (i = 0; i < xa->nmtoklen; i++)
if (xa->nmtokens[i])
if (uni_strcasecmp (xa->nmtokens[i], nv->val) == 0)
break;
if (i < xa->nmtoklen)
add_xml_error (xf, 1116, uni_truncate_to (nv->name, 20));
add_xml_error (xf, 1205, uni_truncate_to (nv->name, 20));
RETURN (0);
}
break;
};
RETURN (1);
}
/*
* check_dup_enum_vals
*
* XML 1.0 spec, par. 3.3.1, says that XML enumerated attribute
* value lists should not use the same value twice for any two
* attributes of a single element. This is a stupid SGML compat
* restriction. But I guess it can't hurt to issue a warning.
*
* Returns 1 if all is well. Returns 0 if any warning messages were
* generated. I.e., a zero return value indicates a problem.
*/
int
check_dup_enum_vals (struct xml_file *xf, struct xml_element *xe)
{
int errors = 0;
size_t i, j, k, l;
/* go through every attribute defined for this element */
for (i = 0; i < xe->attlistlen; i++)
/* for each element, see if that element is enumerated */
if (xe->attlist[i]->type == enumeration)
/*
* if that element is enumerated, check to see if any of its
* values (xe->attlist[j]->nmtokens) have already been used -
* i.e., if it's a duplicate
*/
for (j = 0; j < xe->attlist[i]->nmtoklen; j++)
{
for (k = 0; k <= i; k++)
{
if (xe->attlist[k]->type == enumeration)
{
if (k < i)
{
for (l = 0; l < xe->attlist[k]->nmtoklen; l++)
if (uni_strcmp (xe->attlist[k]->nmtokens[l], xe->attlist[i]->nmtokens[j]) == 0)
{
/* just issue a warning; no error */
add_xml_warning (xf, 1210, xe->attlist[i]->nmtokens[j]);
errors++;
goto next;
}
}
else
{
for (l = 0; l < j; l++)
if (uni_strcmp (xe->attlist[k]->nmtokens[l], xe->attlist[i]->nmtokens[j]) == 0)
{
add_xml_warning (xf, 1210, xe->attlist[i]->nmtokens[j]);
errors++;
goto next;
}
}
}
}
/* we jump right to here if we got a duplicate */
next:
}
return ! errors;
}
/*
* check_content_models
*
* For every element declared in both the internal and external DTD
* subset, checks for undeclared elements in content models. Returns
* the number of elements with content models of type "mixed" or
* "children" (and that therefore had to be checked).
*/
int
check_content_models (xml_file *xf)
{
size_t len;
int count = 0;
my_wchar_t **wpp, *tmp;
struct xml_element *xe, *xe2;
struct rg_htable_item *result;
if (xf->element_names)
{
/* for each element... */
result = rg_get_htable_items (xf->element_names);
while (result != NULL)
{
/* ...check to see if every string in its content mode (barring
* #PCDATA, if mixed) corresponds to another declared element
*/
xe = (struct xml_element *)result->data;
if (xe->content_model)
{
if (xe->type == children)
{
/* make sure all the leaves in a cmnode (i.e., in a
* given content-model tree) name valid, declared
* elements; issue error 672 on undeclared elements
*/
check_leaves_in_cmnode (xf, result->uni_key, xe->content_model);
count++;
}
else if (xe->type == mixed)
{
for (wpp = xe->content_model; *wpp != NULL; wpp++)
if (uni_utf_strcmp (*wpp, "#PCDATA") != 0)
if ((xe2 = expand_element (xf, *wpp)) == NULL || xe2->type == dummy)
{
/* Oops, content model names an undeclared element */
len = uni_strlen (result->uni_key) + 4 + uni_strlen (*wpp);
tmp = malloc (len * sizeof (my_wchar_t));
uni_strcpy (tmp, *wpp);
uni_utf_strcat (tmp, " (");
uni_strcat (tmp, result->uni_key);
uni_utf_strcat (tmp, ")");
add_xml_warning (xf, 672, tmp);
free (tmp);
}
count++;
}
}
result = rg_get_htable_items (NULL);
}
}
return count;
}
/*
* state_to_string
*
* Turn a where_am_i enum into a my_wchar_t string. Returns a pointer
* to a static my_wchar_t buffer that may change on subsequent calls.
*/
static my_wchar_t *
state_to_string (enum where_am_i st)
{
char *tmp;
static my_wchar_t wbuf[32];
switch (st)
{
case nowhere:
tmp = "nowhere";
break;
case in_markup:
tmp = "in_markup";
break;
case in_single_quote:
tmp = "in_single_quote";
break;
case in_double_quote:
tmp = "in_double_quote";
break;
case in_pi:
tmp = "in_pi";
break;
case in_comment:
tmp = "in_comment";
break;
case in_marked_section:
tmp = "in_marked_section";
break;
case in_entity:
tmp = "in_entity";
break;
case in_cdsect:
tmp = "in_cdsect";
break;
default:
tmp = "???";
break;
}
uni_strcpy (wbuf, utf_8_to_utf_16 (tmp));
return wbuf;
}
#ifdef STANDALONE_GRAMMUTIL_TEST
#include "readcfg.h"
#include "dtdutil.h"
#include "strutil.h"
xmlparse_environment xmlparse_env;
int
main (int argc, char **argv)
{
char linebuf[2048];
struct name_val *nv;
struct xml_file *xf, *xf2;
struct xml_attribute *xa;
my_wchar_t wlinebuf[2048 * 4];
my_wchar_t *wp, *w_tmp, *w_tmp2, *w_tmp3;
/* zero out the xmlparse_env structure */
memset (&xmlparse_env, 0, sizeof (xmlparse_env));
/* and set it to retain all of the parse tree */
xmlparse_env.keep_children = yes;
readcfg (argc, argv);
xf = create_xml_file ("Test/grammutil.input");
/* create a notation called "GIF" */
w_tmp = uni_strdup (utf_8_to_utf_16 ("GIF"));
w_tmp2 = uni_strdup (utf_8_to_utf_16 ("http://www.mainsite.com/support"));
if (! add_notname (xf, w_tmp, w_tmp2))
printf ("Error adding notation GIF to xf\n");
free (w_tmp2);
wp = uni_strdup (utf_8_to_utf_16 ("fig.bump"));
w_tmp2 = uni_strdup (utf_8_to_utf_16 ("/Images/bump.gif"));
if (! add_uperef (xf, wp, w_tmp, w_tmp2))
printf ("Error adding unparsed entity, fig.bump, to xf\n");
free (w_tmp);
free (w_tmp2);
free (wp);
/* create an element called "IMG", and give it an attribute, "SRC" */
w_tmp = uni_strdup (utf_8_to_utf_16 ("IMG"));
if (! add_element (xf, w_tmp, empty, NULL))
printf ("Error adding element IMG to xf\n");
/* create an attribute called "SRC" */
w_tmp2 = uni_strdup (utf_8_to_utf_16 ("SRC"));
if ((xa = create_xml_attribute (xf, w_tmp2, entity, 0, NULL, required, NULL)) == NULL)
printf ("Error creating attribute SRC\n");
/* add SRC attribute to IMG entity definition */
if (! add_attribute (xf, w_tmp, xa))
printf ("Error adding attribute SRC to element IMG in xml_file xf.\n");
if (expand_attribute (xf, w_tmp, w_tmp2) != xa)
printf ("SRC was not correctly registered as an attribute of IMG.\n");
/* pretend we're parsing a file, and we just ran into
*/
w_tmp3 = uni_strdup (utf_8_to_utf_16 ("fig.bump"));
if ((nv = create_name_val (w_tmp2, w_tmp3, NULL, NULL, 0, no, NULL)))
{
if (! check_attribute (xf, w_tmp, nv, INSERT_IDS))
printf ("
doesn't seem to validate (it should).\n");
free_name_val (nv);
}
free (w_tmp3);
free (w_tmp2);
free (w_tmp);
while (fgets (linebuf, 2048, xf->file))
{
trim (linebuf, "\n");
wp = uni_strdup (utf_8_to_utf_16 (linebuf));
uni_strcpy (wlinebuf, wp);
uni_strcat (wlinebuf, utf_8_to_utf_16 ("-expansion"));
if (! add_peref (xf, wp, wlinebuf))
printf ("insert failed; key already present\n");
else
{
if ((w_tmp = expand_peref (xf, wp, WITHOUT_WHITESPACE)))
printf ("PEref \"%s\" expands to -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
else
printf ("Problem: PEref has no expansion.\n");
if ((w_tmp = expand_peref (xf, wp, WITH_WHITESPACE)))
printf ("PEref \"%s\" expands (with whitespace) to -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
else
printf ("Problem: PEref has no expansion.\n");
}
if (! add_eref (xf, wp, wlinebuf))
printf ("eref insert failed; key already present\n");
else
{
if ((w_tmp = expand_eref (xf, wp)))
printf ("Eref \"%s\" expands to -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
else
printf ("Problem: Eref has no expansion.\n");
}
w_tmp = utf_8_to_utf_16 ("junkdata.data");
if ((xf2 = resolve_pub_or_sysid (xf, NULL, w_tmp)) == NULL)
printf ("Error: Can't create xf2.\n");
else
{
if (add_ext_eref (xf, wp, xf2, MAP_CHAR_ENTITIES | MAP_PARAMETER_ENTITIES) <= 0)
printf ("external eref insert failed; already present\n");
else
{
if ((w_tmp = expand_ext_eref (xf, wp)))
printf ("External eref \"%s\" expands to -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
else
printf ("Problem: External eref has no expansion.\n");
}
}
free (wp);
}
wp = uni_strdup (utf_8_to_utf_16 ("shouldn't exist"));
printf ("Now, let's try one that shouldn't resolve: \"shouldn't exist\"\n");
if (! (w_tmp = expand_eref (xf, wp)))
printf ("Good; it doesn't resolve.\n");
else
printf ("Oh no, it resolves: \"%s\" -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
free (wp);
w_tmp = uni_strdup (utf_8_to_utf_16 ("<&stuff;, stuff, %stuff;>!"));
printf ("Mapping entities in: <&stuff;, stuff, %%stuff;>!\n");
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_PARAMETER_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
free (w_tmp);
free_xml_file (xf);
xf = create_xml_file ("Test/grammutil.input");
while (fgets (linebuf, 2048, xf->file))
{
trim (linebuf, "\n");
wp = uni_strdup (utf_8_to_utf_16 (linebuf));
w_tmp = utf_8_to_utf_16 ("junkdata.data");
if ((xf2 = resolve_pub_or_sysid (xf, NULL, w_tmp)) == NULL)
printf ("Error: Can't create xf2.\n");
else
{
if (add_ext_eref (xf, wp, xf2, MAP_CHAR_ENTITIES | MAP_PARAMETER_ENTITIES) <= 0)
printf ("insert failed; key already present\n");
else
{
if ((w_tmp = expand_ext_eref (xf, wp)))
printf ("External eref \"%s\" expands to -> \"%s\"\n",
linebuf, utf_16_to_utf_8 (w_tmp));
else
printf ("Problem: External eref has no expansion.\n");
}
}
free (wp);
}
w_tmp = uni_strdup (utf_8_to_utf_16 ("<&stuff;, stuff, %stuff;>!"));
printf ("Mapping entities in: <&stuff;, stuff, %%stuff;>!\n");
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_PARAMETER_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
free (w_tmp);
w_tmp = uni_strdup (utf_8_to_utf_16 ("junk;junk;&;, bad stuff, %error"));
printf ("Mapping entities in: junk;junk;&;, bad stuff, %%error\n");
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_PARAMETER_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
free (w_tmp);
w_tmp = uni_strdup (utf_8_to_utf_16 (""<>&'&" &<,&&"));
printf ("Mapping entities in: "<>&'&" &<,&&\n");
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_GENERAL_ENTITIES | MAP_CHAR_ENTITIES, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
w_tmp2 = map_entities (xf, w_tmp, MAP_PARAMETER_ENTITIES | MAP_CHAR_ENTITIES | MAP_AMP_AND_LT, 0);
printf ("\t -> %s\n", utf_16_to_utf_8 (w_tmp2));
free (w_tmp2);
free (w_tmp);
w_tmp = uni_strdup (utf_8_to_utf_16 ("ID"));
w_tmp2 = uni_strdup (utf_8_to_utf_16 ("1"));
if ((nv = create_name_val (w_tmp, w_tmp2, NULL, NULL, 0, no, NULL)))
{
printf ("Expanding nonexistent ID (should fail).\n");
if (! expand_id (xf, w_tmp))
printf ("Failed.\n");
printf ("Adding ID 1 via add_id().\n");
if (! add_id (xf, nv))
printf ("Failed.\n");
if (expand_id (xf, w_tmp2) != nv)
printf ("Oops; add/expand ID routines aren't working.\n");
printf ("Adding ID 1 again (should fail).\n");
if (! add_id (xf, nv))
printf ("Failed.\n");
free_name_val (nv);
}
free (w_tmp2);
free (w_tmp);
free_xml_file (xf);
exit (0);
}
#endif /* STANDALONE_GRAMMUTIL_TEST */