/* ***************************************************************************** * * $RCSfile: parstree.c,v $ * $Date: 1999/08/13 21:24:52 $ * $Source: /home/richard/Xml/RCS/parstree.c,v $ * $Revision: 1.45 $ * $Author: richard $ * ***************************************************************************** * * Copyright 1998, Brown University and Richard Goerwitz * ***************************************************************************** * * This file contains routines for building the structures that form * the parse trees for XML files. As one might expect, these * routines are used extensively in parsutil.y, the main parser * specification file. * * The routines here fall into three basic classes: * * 1) routines that handle cmnode structures * 2) routines that handle name_val structures * 3) routines that handle xml_node structures * * 1. Cmnode structures hold parse trees for content models (they * get used in xml_element structures, which have a field, * content_model, just for cmnode structs). To see where they are * used, look at the "Cp" rules in the parser. * * 2. Name_val structures hold name-value pairs. Mainly they are * used to hold attribute-value pairs discovered while parsing. They * get used in xml_node structures, which have a field, atts, that * holds name_val structures. The also get used in the "ids" hash * table. E.g., if I call expand_id(xf, "1"), I will get back a * pointer to a name_val structure in the parse tree that parsutil.y * is building for a given XML file. The ids hash table, that is, * returns a pointer to the name_val struct in the parse tree that * holds the attribute value where the ID was originally used. * * 3. Xml_node structures hold the overall parse tree generated by * the parser in parsutil.y. When parsutil.y is done with a file, * xf, and its parse tree is complete, you can retrieve the tree from * (struct xml_file *)xf->parstree. * * Note that free_xml_file(xf) (in fileutil.c) will automatically * free xf->parstree. * * See parstree.h for the definitions of cmnode, name_val, and * xml_node structures. * ***************************************************************************** */ #include "parstree.h" #include "errabort.h" #include "grammutil.h" #include "namespace.h" #include "utfutil.h" static my_wchar_t *node_type_to_string (xml_node *); static struct dfa_stack { dfa_state **states; size_t buflen; size_t len; } dstack = { NULL, 0, 0 }; static dfa_state *push_dfa_state (struct dfa_stack *, dfa_state *); static dfa_state *pop_dfa_state (struct dfa_stack *); static dfa_state *top_dfa_state (struct dfa_stack *); static size_t add_parent_links_to_children (xml_node *); struct cmnode * create_cmnode (struct cmnode *left, struct cmnode *right, enum cmnode_type type) { struct cmnode *new_cmn; /* What are we doing? */ xwrap (errdebug (7, "creating new cmnode (content-model node) structure\n")); if (type == slash || type == comma) { if (left == NULL) return right; else if (right == NULL) return left; } else if (right != NULL) errabort (42, "unexpectedly nonnull val in %s\n", "create_cmnode()"); if ((new_cmn = malloc (sizeof (struct cmnode))) == NULL) errabort (40, "malloc error in %s\n", "create_cmnode()"); memset (new_cmn, 0, sizeof (struct cmnode)); new_cmn->left = left; new_cmn->right = right; new_cmn->type = type; return new_cmn; } struct cmnode * copy_cmnode (struct cmnode *cmn) { struct cmnode *new_cmn; if (cmn == NULL) return NULL; /* What are we doing? */ xwrap (errdebug (7, "recursively copying cmnode structure\n")); new_cmn = create_cmnode (copy_cmnode (cmn->left), copy_cmnode (cmn->right), cmn->type); /* the important part */ if (cmn->text == NULL) new_cmn->text = NULL; else if ((new_cmn->text = uni_strdup (cmn->text)) == NULL) errabort (40, "malloc error in %s\n", "copy_cmnode()"); /* What did we just do? */ xwrap (errdebug (7, "recursively copied cmnode structure\n")); return new_cmn; } struct cmnode * create_cmleaf (my_wchar_t *text) { struct cmnode *new_cmn; /* What are we doing? */ xwrap (errdebug (7, "creating new cmnode structure (leaf)\n")); if ((new_cmn = malloc (sizeof (struct cmnode))) == NULL) errabort (40, "malloc error in %s\n", "create_cmleaf()"); memset (new_cmn, 0, sizeof (struct cmnode)); /* new_cmn->text never needs to be explicitly freed */ new_cmn->text = text ? uni_add_string (text) : NULL; new_cmn->type = leaf; return new_cmn; } /* * check_leaves_in_cmnode * * Check whether the leaves in a cmnode (content-model tree) name * valid, declared elements. This routine should not be called * until the DTD is done being parsed because it's okay, apparently, * to name elements in a content model that have not yet been * declared. They just have to end up declared by the end of the * DTD. * * Returns the number of leaves visited. */ int check_leaves_in_cmnode (struct xml_file *xf, my_wchar_t *elname, struct cmnode *cmn) { size_t len; my_wchar_t *tmp; int count = 0; struct xml_element *xe; if (cmn == NULL) return 0; else if (cmn->type == leaf) { if (cmn->text == NULL) errabort (43, "unexpectedly null value in %s\n", "check_leaves_in_cmnode()"); /* Now we get to the whole point of this routine: * to check whether the leaf node names a declared * element. */ if ((xe = expand_element (xf, cmn->text)) == NULL || xe->type == dummy) { len = uni_strlen (elname) + 4 + uni_strlen (cmn->text); tmp = malloc (len * sizeof (my_wchar_t)); uni_strcpy (tmp, cmn->text); uni_utf_strcat (tmp, " ("); uni_strcat (tmp, elname); uni_utf_strcat (tmp, ")"); add_xml_warning (xf, 672, tmp); free (tmp); } count++; } else { if (cmn->left != NULL) count += check_leaves_in_cmnode (xf, elname, cmn->left); if (cmn->right != NULL) count += check_leaves_in_cmnode (xf, elname, cmn->right); } return count; } void free_cmnode (struct cmnode *cmn) { if (cmn) { if (cmn->left != NULL) free_cmnode (cmn->left); if (cmn->right != NULL) free_cmnode (cmn->right); /* leave cmn->text alone; let uni_free_strings() clean it up */ /* What did we just do? */ xwrap (errdebug (5, "recursively freed cmnode structure\n")); free (cmn); } } /* * add_parent_links_to_attributes * * Make sure every attribute in node xn (arg 2) has a link in its * parent field back to xn. */ size_t add_parent_links_to_attributes (struct xml_file *xf, struct xml_node *xn) { size_t i = 0; if (xn->atts) for (i = 0; i < xn->attcount; i++) xn->atts[i]->parent = xn; return i; } /* * check_attributes * * Goes through every attribute in an xml_node, flagging those that * don't conform to their decls as errors. * * NOTE WELL: This routine must be called after the code that sets * up the default namespace for xn. Otherwise, there's no way to * whether xn lies in an external namespace, and thus should, if * xmlparse_env.force_valid_namespaces == yes, be skipped. */ size_t check_attributes (struct xml_file *xf, struct xml_node *xn) { int lineno; size_t i = 0; xwrap (errdebug (7, "Checking attributes in xml_node\n.")); if (xn->type != element) { xwrap (errdebug (5, "aborting attribute check; xml_node is not an element\n.")); return 0; } /* check each attribute against DTD; for those that have prefixes, * check that the prefix is declared; issue errors for attributes * that don't check out OK, unless prefixed or unless we're in a * namespace */ if (xn->atts) for (i = 0; i < xn->attcount; i++) { /* if this attribute has a namespace prefix, make sure it's declared */ if (xn->atts[i]->prefix) /* skip the resolution check if the prefix is 'xml' or 'xmlns' */ if (uni_utf_strcmp (xn->atts[i]->prefix, "xml") != 0) if (uni_utf_strcmp (xn->atts[i]->prefix, "xmlns") != 0) /* prefix_to_uri() is in namespace.c */ if (! prefix_to_uri (xf, xn, xn->atts[i]->prefix)) { /* Oops, attribute's namespace prefix isn't declared */ if (xn->atts[i]->lineno == 0) add_xml_error (xf, 1383, xn->atts[i]->name); else { lineno = xf->lineno; xf->lineno = xn->atts[i]->lineno; add_xml_error (xf, 1383, xn->atts[i]->name); xf->lineno = lineno; } } /* If the attribute is defined for xn->name, do the check. * Otherwise, only do it if 1) we're not in a namespace, 2) * the attribute itself has no namespace prefix (and isn't * 'xmlns', and 3) the force_valid_namespaces (i.e., -f) * option is not in effect. */ if (expand_attribute (xf, xn->name, xn->atts[i]->name) || ! forced_valid (xf, xn->atts[i], xn, xn, NULL, 0)) { if (xmlparse_env.force_valid_namespaces == no || uni_utf_strcmp (xn->atts[i]->name, "xmlns") != 0) /* check_attribute() is in grammutil.c */ check_attribute (xf, xn->name, xn->atts[i], INSERT_IDS); } } return i; } /* * add_child_and_parent_backlinks * * The XML parse tree starts with one xml_node structure at the top * level. This node may have children xml_node structs, and they, in * turn, still other children (and so on). * * This routine simply takes a pointer to an array of children * xml_node structures (arg 3) of length 'count' (arg 2), and * inserts them into a parent's 'nodes' field, xn (arg 1). * * It then inserts links in each child xml_node structure back to * the parent. * * Returns the number of child xml_nodes. */ size_t add_child_and_parent_backlinks (struct xml_node *xn, size_t count, xml_node **xnp) { xwrap (errdebug (7, "Adding child xml nodes to parent node, %s\n", xn->name ? utf_16_to_utf_8 (xn->name) : "(anonymous)")); if (xn == NULL) errabort (43, "unexpected null xml_node in %s\n", "add_child_and_parent_backlinks()"); if (count == 0 && xnp != NULL) errabort (42, "unexpected nonnull val in %s\n", "add_child_and_parent_backlinks()"); if (count != 0 && xnp == NULL) errabort (43, "unexpected null val in %s\n", "add_child_and_parent_backlinks()"); if (xn->nodes) errabort (51, "node, %s, already has children\n", xn->name ? utf_16_to_utf_8 (xn->name) : NULL); xn->nodecount = count; xn->nodes = xnp; /* add links back to the parent to all the children */ add_parent_links_to_children (xn); return count; } /* * add_parent_links_to_children * * This function takes an xml_node structure, and goes through all * its children (xn->nodes), setting the parent field of each child * to point back to the parent xml_node. */ static size_t add_parent_links_to_children (struct xml_node *xn) { size_t count; xwrap (errdebug (7, "Setting child xml_node structs to point to parent.\n")); for (count = 0; count < xn->nodecount; count++) { if (xn->nodes[count]->parent != NULL) { /* to deal with namespaces, xn->nodes[count]->parent was set * earlier for xml_node structs of type "element"; there is * a problem if that parent is not xn */ xwrap (errdebug (7, "child node, %s, already has a parent (ok)\n", xn->name ? utf_16_to_utf_8 (xn->name) : "(null)")); if (xn->nodes[count]->parent != xn) { xwrap (errdebug (1, "Child has a different parent, %s (this is bad)\n", xn->nodes[count]->parent->name ? utf_16_to_utf_8 (xn->nodes[count]->parent->name) : "(null)")); } } xn->nodes[count]->parent = xn; } xwrap (errdebug (5, "Set child xml_node structs to point to parent.\n")); /* cound should equal xn->nodecount */ return count; } struct xml_node * create_xml_node (enum node_type type, my_wchar_t *name, my_wchar_t *prefix, my_wchar_t *localpart, my_wchar_t *data, my_wchar_t *default_namespace, size_t namecount, struct name_val **namespaces, size_t attcount, struct name_val **atts, size_t nodecount, struct xml_node **nodes, struct xml_node *parent) { size_t i, len; struct xml_node *xn; xwrap (errdebug (7, "Creating new xml_node structure for %s\n", name ? utf_16_to_utf_8 (name) : "(anonymous)")); if ((xn = malloc (sizeof (struct xml_node))) == NULL) errabort (40, "malloc error in %s\n", "create_xml_node()"); memset (xn, 0, sizeof (struct xml_node)); xn->type = type; if (name) xn->name = uni_strdup (name); /* namespace-related fields */ if (prefix) xn->prefix = uni_strdup (prefix); if (localpart) xn->localpart = uni_strdup (localpart); if (data) xn->data = uni_strdup (data); /* convert all-whitespace chardata nodes to whitespace nodes */ if (xn->type == chardata) { len = xn->data ? uni_strlen (xn->data) : 0; for (i = 0; i < len; i++) if (xn->data[i] >= 0x80 || ! isspace (xn->data[i])) break; if (i == len) xn->type = whitespace; } /* more namespace fields */ if (default_namespace) xn->default_namespace = uni_strdup (default_namespace); if (namecount) xn->namecount = namecount; if (namespaces) xn->namespaces = NULL; if (attcount == 0 && atts != NULL) errabort (42, "unexpected nonnull val, %s\n", "create_xml_node()"); if (attcount != 0 && atts == NULL) errabort (43, "unexpected null val, %s\n", "create_xml_node()"); xn->attcount = attcount; xn->atts = atts; xn->nodecount = nodecount; xn->nodes = nodes; xn->parent = parent; xwrap (errdebug (5, "Created new xml_node structure for %s\n", name ? utf_16_to_utf_8 (name) : "(anonymous)")); return xn; } void free_xml_node (struct xml_node *xn) { size_t i; xwrap (errdebug (7, "Freeing up xml_node %s\n", xn->name ? utf_16_to_utf_8 (xn->name) : "(anonymous)" )); if (xn) { if (xn->name) free (xn->name); if (xn->prefix) free (xn->prefix); if (xn->localpart) free (xn->localpart); if (xn->data) free (xn->data); if (xn->default_namespace) free (xn->default_namespace); if (xn->namespaces) { for (i = 0; i < xn->namecount; i++) free_name_val (xn->namespaces[i]); free (xn->namespaces); } if (xn->atts) { for (i = 0; i < xn->attcount; i++) free_name_val (xn->atts[i]); free (xn->atts); } if (xn->nodes) { for (i = 0; i < xn->nodecount; i++) free_xml_node (xn->nodes[i]); free (xn->nodes); } /* don't touch xn->parent */ free (xn); } } /* * check_node * * This function checks a given node (xn_inner) against its parent's * (xn_outer) contend model - assuming, that is, that the parent is * of type 'element' and assuming that its parent's content model * type is 'children' or 'mixed'. Does some basic checks on other * node types. * * Returns 1 if the node checks out okay; 0 if not (e.g., if the * parent has the wrong type or something. */ int check_node (struct xml_file *xf, struct xml_node *xn_outer, struct xml_node *xn_inner) { size_t i; struct xml_element *xe; my_wchar_t **misc_content_model; if (xn_inner->type == comment || xn_inner->type == pi) /* comments and processing instructions don't count as content */ return 1; /* This can happen if the enclosing element is malformed somehow */ if (xn_outer == NULL) { /* enclosing element isn't defined; can't check its content model */ add_xml_error (xf, 1153, node_type_to_string (xn_inner)); return 0; } /* elname is the name of the enclosing tag (whose content model we * check against) */ if (xn_outer->type != element) errabort (52, "non-element parsed as STag\n"); /* check node against parent's content model, unless the node is * just whitespace, or we're in a namespace and the -f option was * used */ if ((xe = expand_element (xf, xn_outer->name)) == NULL || xe->type == dummy) { if (forced_valid (xf, xn_inner, xn_outer, xn_outer, NULL, 0)) return 1; else if (xn_inner->type == whitespace) return 1; else { /* enclosing element isn't defined; can't check its content model */ add_xml_error (xf, 1150, node_type_to_string (xn_inner)); return 0; } } /* externally defined elements can't directly contain whitespace * in documents declared standalone (see spec., section 2.9) */ if (xe && (xe->flags & DEFINED_EXTERNALLY)) if (xf->standalone == yes) add_xml_error (xf, 1165, xe->name ? uni_truncate_to (xe->name, 20) : utf_8_to_utf_16 ("???")); switch (xe->type) { case empty: /* Oops; xn_inner occurs inside element declared as empty */ add_xml_error (xf, 1151, node_type_to_string (xn_inner)); return 0; case dummy: /* dummy element types are created ad hoc to hold attlists for * elements that aren't (yet) defined */ case Any: /* anything goes, according to xn_outer's content model */ return 1; case mixed: misc_content_model = (my_wchar_t **)xe->content_model; if (! misc_content_model) { /* probably a programmer's error; should be nonnull */ add_xml_error (xf, 1101, node_type_to_string (xn_inner)); return 0; } if (xn_inner->type == CData || xn_inner->type == chardata || xn_inner->type == whitespace) /* CharData is always okay for mixed content models */ return 1; else { if (xn_inner->type != element) errabort (44, "unexpected type (#%d) in %s\n", xn_inner->type, "check_node()"); /* Okay, xn_inner is an element. See if xn_outer->name's * content model allows it to be here. Skip check if the * element has a namespace, doesn't expand, and the -f * option is in effect. */ if (! (expand_element (xf, xn_inner->name) || ! forced_valid (xf, xn_inner, (name_val *)NULL, xn_inner, NULL, 0))) return 0; else { for (i = 0; misc_content_model[i] != NULL; i++) if (uni_strcmp (misc_content_model[i], xn_inner->name) == 0) break; if (misc_content_model[i]) return 1; } /* if we get to here, xn_inner violates xn_outer's content model */ add_xml_error (xf, 1152, xn_inner->name); } break; case children: if (xn_inner->type == element) { if (xe->compiled_content_model == NULL) errabort (43, "unexpected NULL value in %s\n", "check_node()"); /* Pick up content-model check where we left off. Skip * check if xn_inner->name isn't declared, it's in a * namespace, and the -f option is in effect */ if (expand_element (xf, xn_inner->name) || ! forced_valid (xf, xn_inner, (name_val *)NULL, xn_inner, NULL, 0)) return check_node_against_dfa (xf, xe->compiled_content_model, xn_inner->name, RESTART_DFA_WALK); } else if (xn_inner->type == chardata || xn_inner->type == CData) { if (xn_inner->type == CData) /* processing instructions, comments are okay; warn about CDATA */ add_xml_error (xf, 1107, node_type_to_string (xn_inner)); /* check CharData for non-whitespace (this code is redundant) */ for (i = 0; xn_inner->data[i] != 0; i++) /* flag non-whitespace as an error */ if (xn_inner->data[i] >= 0x80 || ! isspace (xn_inner->data[i])) { add_xml_error (xf, 1155, node_type_to_string (xn_inner)); break; } } break; } return 0; } static my_wchar_t * node_type_to_string (struct xml_node *xn) { char *tmp; switch (xn->type) { case comment: tmp = "(comment)"; break; case pi: tmp = "(pi)"; break; case whitespace: tmp = "(whitespace)"; break; case chardata: tmp = "(CharData)"; break; case CData: tmp = "(CDATA)"; break; case element: return xn->name; break; default: tmp = "(unknown)"; break; } return utf_8_to_utf_16 (tmp); } /* * check_node_against_dfa * * This routine steps through a DFA (arg 1), taken from an element's * compiled_content_model field, and depending on the do_what * argument does one of three things: 1) pushes a new DFA, and * resets our state in that DFA to that DFA's initial state, 2) * checks whether there is a transition out of our current state * with the label, name (arg 2), 3) checks whether we are in a * final state, or 4) zeroes out the stack and reports on its * size before zeroing (should be empty). * * Values for argument three, do_what: * * INITIALIZE_DFA_WALK action 1 above (pushes new DFA) * RESTART_DFA_WALK action 2 above (check for transition) * FINISH_DFA_WALK action 3 above (check if in final state) * CLOBBER_STACK action 4 above (resets stack) * * Returns 1 for "yes" answers to 2 and 3 above; returns 0 on "no". * For more information on the internal structure of DFAs, see the * nfadfa.c file. Returns the size of the stack before zeroing for * 4 above. */ int check_node_against_dfa (struct xml_file *xf, struct dfa_state **dfa_states, my_wchar_t *name, int do_what) { size_t i; int error_num = 0; my_wchar_t *expecting, *msg; struct dfa_state *last_state; /* dstack is file-level static */ /* What was the last DFA state we were in? */ last_state = top_dfa_state (&dstack); switch (do_what) { case INITIALIZE_DFA_WALK: /* intialize by setting last_state to our start state */ push_dfa_state (&dstack, dfa_states[0]); xwrap (errdebug (7, "initialized DFA walk\n")); return 1; case RESTART_DFA_WALK: if (last_state == NULL) { xwrap (errdebug (7, "DFA check already ended for enclosing element (skipping)\n")); /* add_xml_error (xf, 1156, name); * return 0; */ return 1; } else { for (i = 0; i < last_state->tcount; i++) if (uni_strcmp (name, last_state->transitions[i]->label) == 0) { /* good, there's a transiton with label, name (arg 2) */ dstack.states[dstack.len - 1] = last_state->transitions[i]->new_state; xwrap (errdebug (7, "DFA check found transition labeled \"%s\"\n", utf_16_to_utf_8 (name))); return 1; } /* No transition leading from this state with label "name" */ xwrap (errdebug (7, "DFA check shows no transition labeled \"%s\"\n", utf_16_to_utf_8 (name))); error_num = 1152; } break; case FINISH_DFA_WALK: if (last_state == NULL) { /* hmmm; DFA check finished early and we're left with extra elements */ xwrap (errdebug (7, "DFA check is probably out of sync with element nesting\n")); /* add_xml_error (xf, 1157, name); * return 0; */ pop_dfa_state (&dstack); return 1; } else if (last_state->type & FINAL) { /* if we ended in a FINAL state, hurray! */ pop_dfa_state (&dstack); xwrap (errdebug (7, "DFA check finds us in a final state (this is good)\n")); return 1; } else { /* oops; we ended in a non-final state */ xwrap (errdebug (7, "DFA check terminates in a non-final state (this is an error)\n")); error_num = 1154; } break; case CLOBBER_STACK: i = dstack.len; xwrap (errdebug (7, "Done with DFA stack; clobbering it (len = %d; should already be 0)\n", i)); while (pop_dfa_state (&dstack)); return i; break; } /* build up a list of tokens we were expecting to see */ expecting = NULL; for (i = 0; i < last_state->tcount; i++) expecting = append_string_to_comma_delimited_string ( expecting, last_state->transitions[i]->label); /* now form an error msg from the "expecting" list */ msg = uni_strdup (name); msg = uni_concatenate_no_free (msg, utf_8_to_utf_16 (" (expecting: ")); msg = uni_concatenate (msg, expecting ? expecting : uni_strdup (utf_8_to_utf_16 ("[nothing]"))); msg = uni_concatenate_no_free (msg, utf_8_to_utf_16 (")")); add_xml_error (xf, error_num, msg); free (msg); /* set last state to NULL to prevent further parsing of this element's content */ dstack.states[dstack.len - 1] = NULL; return 0; } /* * push_dfa_state * * Push the current DFA state, st, onto stack dstack. See also * pop_dfa() below. * * It is assumed that dstack->states will be NULL when this routine * is first called. So be sure to initialize dstack to { NULL,0,0 }; */ static struct dfa_state * push_dfa_state (struct dfa_stack *dstack, struct dfa_state *st) { if (dstack->states == NULL) { dstack->buflen = 5; if ((dstack->states = malloc (sizeof (st) * dstack->buflen)) == NULL) errabort (40, "malloc() failure in %s\n", "push_dfa()"); } if (++dstack->len > dstack->buflen) { dstack->buflen = dstack->len; if ((dstack->states = realloc (dstack->states, sizeof (st) * dstack->buflen)) == NULL) errabort (41, "realloc() failure in %s\n", "push_dfa()"); } dstack->states[dstack->len - 1] = st; return st; } /* * pop_dfa_state * * Pop an int-valued dfa off of dfa_stack dstack. See also * push_dfa() above and top_dfa() below. If there is no dfa on the * stack, returns NULL (which the caller is to interpret as the * initial or start state). */ static struct dfa_state * pop_dfa_state (struct dfa_stack *dstack) { if (dstack->len) dstack->len--; /* default to the start dfa */ if (dstack->len == 0) return NULL; return dstack->states[dstack->len]; } /* * top_dfa_state * * Return top state on dfa_stack, dstack. See also push_dfa() and * pop_dfa() above. If the stack is empty, returns NULL, which the * caller is supposed to interpret as the initial or start state). */ static struct dfa_state * top_dfa_state (struct dfa_stack *dstack) { /* default to the start dfa */ if (dstack->len == 0) return NULL; return dstack->states[dstack->len - 1]; } /* * create_name_val * * Utility routine for creating and initializing a name_val struct. * Returns a pointer to the newly allocated struct. */ struct name_val * create_name_val (my_wchar_t *name, my_wchar_t *val, my_wchar_t *prefix, my_wchar_t *localpart, int lineno, yesno was_defaulted, struct xml_node *parent) { struct name_val *nv; if ((nv = malloc (sizeof (struct name_val))) == NULL) errabort (40, "malloc() error in %s\n", "create_name_val()"); /* shouldn't ever be null */ if (name == NULL) errabort (43, "unexpectedly NULL value in %s\n", "create_name_val()"); nv->name = uni_strdup (name); nv->val = val ? uni_strdup (val) : NULL; nv->prefix = prefix ? uni_strdup (prefix) : NULL; nv->localpart = localpart ? uni_strdup (localpart) : NULL; nv->parent = parent ? parent : NULL; /* should be 'yes' if this is an attribute value added by DTD defaulting mechanisms */ nv->was_defaulted = was_defaulted; nv->lineno = lineno; return nv; } /* * free_name_val * * Utility routine for freeing a name_val struct, along with * the name and val strings it points to. */ void free_name_val (struct name_val *nv) { if (nv) { if (nv->name) free (nv->name); if (nv->val) free (nv->val); if (nv->prefix) free (nv->prefix); if (nv->localpart) free (nv->localpart); /* don't touch nv->parent */ free (nv); } } #ifdef STANDALONE_PARSTREE_TEST #include "hashutil.h" #include "parsutil.h" #include "readcfg.h" xmlparse_environment xmlparse_env; static char * print_cmnode (struct cmnode *cmn) { char *buf; /* forget the memory leaks; this is just a test */ buf = malloc (1024); switch (cmn->type) { case qmark: sprintf (buf, "(%s?)", print_cmnode (cmn->left)); break; case star: sprintf (buf, "(%s*)", print_cmnode (cmn->left)); break; case plus: sprintf (buf, "(%s+)", print_cmnode (cmn->left)); break; case slash: sprintf (buf, "(%s|%s)", print_cmnode (cmn->left), print_cmnode (cmn->right)); break; case comma: sprintf (buf, "(%s,%s)", print_cmnode (cmn->left), print_cmnode (cmn->right)); break; case leaf: sprintf (buf, "(%s)", utf_16_to_utf_8 (cmn->text)); break; } /* yes, yes, this will end up being a lost pointer */ return buf; } int main (int argc, char **argv) { size_t i; struct rg_htable *ht; struct xml_element *xe; struct rg_htable_item *result; /* zero out the xmlparse_env structure */ memset (&xmlparse_env, 0, sizeof (xmlparse_env)); readcfg (argc, argv); if (argc < 2) errabort (11, "no files to process\n"); optind = argc - 1; xmlparse_env.filelen = 1; xmlparse_env.xml_files = malloc (sizeof (xml_file) * xmlparse_env.filelen); for (i = 0; xmlparse_env.filelen > i; i++) xmlparse_env.xml_files[i] = create_xml_file (argv[optind + i]); for (i = 0; i < xmlparse_env.filelen; i++) parse_xml_file (xmlparse_env.xml_files[i]); for (i = 0; i < xmlparse_env.filelen; i++) if ((ht = xmlparse_env.xml_files[i]->element_names)) { result = rg_get_htable_items (ht); while (result != NULL) { xe = (struct xml_element *)result->data; if (xe->type == children) { printf ("Content model for %s: ", utf_16_to_utf_8 (result->uni_key)); printf ("%s\n", print_cmnode (xe->content_model)); } result = rg_get_htable_items (NULL); } } return 0; } #endif /* STANDALONE_PARSTREE_TEST */