/* ***************************************************************************** * * $rcsfile: lexutil.l,v $ * $Date: 1999/08/16 18:20:58 $ * $Source: /home/richard/Xml/RCS/lexutil.l,v $ * $Revision: 1.141 $ * $Author: richard $ * ***************************************************************************** * * Copyright 1998, 1999 Brown University and Richard Goerwitz * ***************************************************************************** * * Lexer for xmlparse. Goes with parsutil.y. Note that this file * will not work with ordinary Lex. Nor will it work with versions * of flex that cannot handle Unicode. See the INSTALL file for * more details, and for information on obtaining Unicode patches for * flex. * * Note well: To view this file productively, you'll need to set your * editor to show at least 100 columns of text. * ***************************************************************************** * * This file contains specifications for recognizing basic XML language * tokens, including: * * - XML declarations and processing instructions * - character and general/parameter entity references * - doctype and other DTD-related markup declarations * - general markup, quoted strings, comments * * Like most (f)lex specifications, this one contains three sections. * They hold the following items: * * 1) The definitions section contains specifications for Unicode * character classes that form part of the XML spec * 2) The rule section contains patterns for recognizing markup, * processing instructions, etc. (as noted above) * 3) The user code section contains state stack manipulation and * stream-switching utilities * * When reading the code below, several points should be kept in * mind: * * - Global variables all have an xml_yy prefix (e.g., xml_yytext) * - Extensive use is made of state stacks and state-stack utility * routines such as yy_push/pop_state() * - Extensive use is made of Flex's stream-switching capabilities * * This last point requires some elaboration. * * XML makes extensive use of PUBLIC/SYSTEM identifiers, which point * to external files that must generally be read in and parsed. In * order to parse these files, the parser (in parsutil.y) pushes a new * input source onto a stack (xml_file_stack) using push_xml_file(), * push_entity_text(), or push_xml_string(). Often these routines * will also push lexer states using yy_push_state(). * * Note also: This lexer uses a simple string-allocation and garbage * collection mechanism. To strdup and to return a pointer to the * newly allocated space, use uni_add_string(). To free all strings * so allocated, use uni_free_strings() (this gets done after yyparse * returns). * ***************************************************************************** */ %{ #include "general.h" #include "grammutil.h" #include "parsutil.h" #include "utfutil.h" struct xml_file_state { YY_BUFFER_STATE buffer; struct xml_file *xf; enum scan_objects what_object; int lexer_stack_marker; }; /* used for pushing dummy states onto the state stack */ #define MARKER_BASE 40000 static int eof_count = 0; static int bad_char_count = 0; static int xml_file_stack_len = 0; static int xml_file_stack_buflen = 0; static struct xml_file_state *xml_file_stack = NULL; static int pushback (my_wchar_t *); static int push_xml_string (struct xml_file *, my_wchar_t *); static int check_parend_nesting (my_wchar_t *, my_wchar_t *); #define STATECOUNT 10 /* used to hold line-number starting points for markup, xmldecl, etc. */ enum states { markup, xmldecl, pistuff, commstuff, decl, dtd, include, ignore, marked }; static int starting_points[STATECOUNT] = { 0 }; %} /* control characters minus nil, HT, CR, and LF (not used, for the most part) */ Control [\x01-\x08\x0b-\x0c\x0e-\x1f] /* Unicode digits */ Digit [\x0030-\x0039\x0660-\x0669\x06f0-\x06f9\x0966-\x096f\x09e6-\x09ef\x0a66-\x0a6f\x0ae6-\x0aef\x0b66-\x0b6f\x0be6-\x0bef\x0c66-\x0c6f\x0ce6-\x0cef\x0d66-\x0d6f\x0e50-\x0e59\x0ed0-\x0ed9\x0f20-\x0f29] /* The XML standard defines this; doesn't use it right */ Char [\x09\x0a\x0d\x20-\xfffd] /* Used in the comment production - all chars except '-' */ CommentChar [\x09\x0a\x0d\x20-\x2c\x2e-\xfffd] CommentCharNoGt [\x09\x0a\x0d\x20-\x2c\x2e-\x3d\x3f-\xfffd] CommentCharNoGtOrLt [\x09\x0a\x0d\x20-\x2c\x2e-\x3b\x3d\x3f-\xfffd] /* Everything but the left square bracket, ] (\x5d) */ CDChar [\x09\x0a\x0d\x20-\x5c\x5e-\xfffd] CDCharNoGt [\x09\x0a\x0d\x20-\x3d\x3f-\x5c\x5e-\xfffd] /* Everything but the greater-than sign */ CharNoGt [\x09\x0a\x0d\x20-\x3d\x3f-\xfffd] /* Everything but the less-than sign, &, and whitespace */ CharNoLtOrAmpOrSpace [\x21-\x25\x27-\x3b\x3d-\xfffd] CharNoLtOrGtOrAmpOrPercentOrLpOrQuoteOrSpace [\x21\x24\x29-\x3a\x40-\xfffd] /* Everything but the question mark, ? (\x3f) */ PiChar [\x09\x0a\x0d\x20-\x3e\x40-\xfffd] PiCharNoGt [\x09\x0a\x0d\x20-\x3d\x40-\xfffd] PiCharNoGtOrLt [\x09\x0a\x0d\x20-\x3b\x3d\x40-\xfffd] /* Unicode ideographic characters */ Ideographic [\x3007\x3021-\x3029\x4e00-\x9fa5] /* I have no idea what these are for */ Extender [\x00b7\x02d0\x02d1\x0387\x0640\x0e46\x0ec6\x3005\x3031-\x3035\x309d-\x309e\x30fc-\x30fe] /* Unicode combining characters (diacritics; Hebrew vowel letters, * etc.; these go with base characters) */ CombiningChar [\x0300-\x0345\x0360-\x0361\x0483-\x0486\x0591-\x05a1\x05a3-\x05b9\x05bb-\x05bd\x05bf\x05c1-\x05c2\x05c4\x064b-\x0652\x0670\x06d6-\x06dc\x06dd-\x06df\x06e0-\x06e4\x06e7-\x06e8\x06ea-\x06ed\x0901-\x0903\x093c\x093e-\x094c\x094d\x0951-\x0954\x0962-\x0963\x0981-\x0983\x09bc\x09be\x09bf\x09c0-\x09c4\x09c7-\x09c8\x09cb-\x09cd\x09d7\x09e2-\x09e3\x0a02\x0a3c\x0a3e\x0a3f\x0a40-\x0a42\x0a47-\x0a48\x0a4b-\x0a4d\x0a70-\x0a71\x0a81-\x0a83\x0abc\x0abe-\x0ac5\x0ac7-\x0ac9\x0acb-\x0acd\x0b01-\x0b03\x0b3c\x0b3e-\x0b43\x0b47-\x0b48\x0b4b-\x0b4d\x0b56-\x0b57\x0b82-\x0b83\x0bbe-\x0bc2\x0bc6-\x0bc8\x0bca-\x0bcd\x0bd7\x0c01-\x0c03\x0c3e-\x0c44\x0c46-\x0c48\x0c4a-\x0c4d\x0c55-\x0c56\x0c82-\x0c83\x0cbe-\x0cc4\x0cc6-\x0cc8\x0cca-\x0ccd\x0cd5-\x0cd6\x0d02-\x0d03\x0d3e-\x0d43\x0d46-\x0d48\x0d4a-\x0d4d\x0d57\x0e31\x0e34-\x0e3a\x0e47-\x0e4e\x0eb1\x0eb4-\x0eb9\x0ebb-\x0ebc\x0ec8-\x0ecd\x0f18-\x0f19\x0f35\x0f37\x0f39\x0f3e\x0f3f\x0f71-\x0f84\x0f86-\x0f8b\x0f90-\x0f95\x0f97\x0f99-\x0fad\x0fb1-\x0fb7\x0fb9\x20d0-\x20dc\x20e1\x302a-\x302f\x3099\x309a] /* Unicode base characters (A, B, C, etc.; may be followed by things * like combining characters). Starts at ASCII capital A (\x0041). */ BaseChar [\x0041-\x005a\x0061-\x007a\x00c0-\x00d6\x00d8-\x00f6\x00f8-\x00ff\x0100-\x0131\x0134-\x013e\x0141-\x0148\x014a-\x017e\x0180-\x01c3\x01cd-\x01f0\x01f4-\x01f5\x01fa-\x0217\x0250-\x02a8\x02bb-\x02c1\x0386\x0388-\x038a\x038c\x038e-\x03a1\x03a3-\x03ce\x03d0-\x03d6\x03da\x03dc\x03de\x03e0\x03e2-\x03f3\x0401-\x040c\x040e-\x044f\x0451-\x045c\x045e-\x0481\x0490-\x04c4\x04c7-\x04c8\x04cb-\x04cc\x04d0-\x04eb\x04ee-\x04f5\x04f8-\x04f9\x0531-\x0556\x0559\x0561-\x0586\x05d0-\x05ea\x05f0-\x05f2\x0621-\x063a\x0641-\x064a\x0671-\x06b7\x06ba-\x06be\x06c0-\x06ce\x06d0-\x06d3\x06d5\x06e5-\x06e6\x0905-\x0939\x093d\x0958-\x0961\x0985-\x098c\x098f-\x0990\x0993-\x09a8\x09aa-\x09b0\x09b2\x09b6-\x09b9\x09dc-\x09dd\x09df-\x09e1\x09f0-\x09f1\x0a05-\x0a0a\x0a0f-\x0a10\x0a13-\x0a28\x0a2a-\x0a30\x0a32-\x0a33\x0a35-\x0a36\x0a38-\x0a39\x0a59-\x0a5c\x0a5e\x0a72-\x0a74\x0a85-\x0a8b\x0a8d\x0a8f-\x0a91\x0a93-\x0aa8\x0aaa-\x0ab0\x0ab2-\x0ab3\x0ab5-\x0ab9\x0abd\x0ae0\x0b05-\x0b0c\x0b0f-\x0b10\x0b13-\x0b28\x0b2a-\x0b30\x0b32-\x0b33\x0b36-\x0b39\x0b3d\x0b5c-\x0b5d\x0b5f-\x0b61\x0b85-\x0b8a\x0b8e-\x0b90\x0b92-\x0b95\x0b99-\x0b9a\x0b9c\x0b9e-\x0b9f\x0ba3-\x0ba4\x0ba8-\x0baa\x0bae-\x0bb5\x0bb7-\x0bb9\x0c05-\x0c0c\x0c0e-\x0c10\x0c12-\x0c28\x0c2a-\x0c33\x0c35-\x0c39\x0c60-\x0c61\x0c85-\x0c8c\x0c8e-\x0c90\x0c92-\x0ca8\x0caa-\x0cb3\x0cb5-\x0cb9\x0cde\x0ce0-\x0ce1\x0d05-\x0d0c\x0d0e-\x0d10\x0d12-\x0d28\x0d2a-\x0d39\x0d60-\x0d61\x0e01-\x0e2e\x0e30\x0e32-\x0e33\x0e40-\x0e45\x0e81-\x0e82\x0e84\x0e87-\x0e88\x0e8a\x0e8d\x0e94-\x0e97\x0e99-\x0e9f\x0ea1-\x0ea3\x0ea5\x0ea7\x0eaa-\x0eab\x0ead-\x0eae\x0eb0\x0eb2-\x0eb3\x0ebd\x0ec0-\x0ec4\x0f40-\x0f47\x0f49-\x0f69\x10a0-\x10c5\x10d0-\x10f6\x1100\x1102-\x1103\x1105-\x1107\x1109\x110b-\x110c\x110e-\x1112\x113c\x113e\x1140\x114c\x114e\x1150\x1154-\x1155\x1159\x115f-\x1161\x1163\x1165\x1167\x1169\x116d-\x116e\x1172-\x1173\x1175\x119e\x11a8\x11ab\x11ae-\x11af\x11b7-\x11b8\x11ba\x11bc-\x11c2\x11eb\x11f0\x11f9\x1e00-\x1e9b\x1ea0-\x1ef9\x1f00-\x1f15\x1f18-\x1f1d\x1f20-\x1f45\x1f48-\x1f4d\x1f50-\x1f57\x1f59\x1f5b\x1f5d\x1f5f-\x1f7d\x1f80-\x1fb4\x1fb6-\x1fbc\x1fbe\x1fc2-\x1fc4\x1fc6-\x1fcc\x1fd0-\x1fd3\x1fd6-\x1fdb\x1fe0-\x1fec\x1ff2-\x1ff4\x1ff6-\x1ffc\x2126\x212a-\x212b\x212e\x2180-\x2182\x3041-\x3094\x30a1-\x30fa\x3105-\x312c\xac00-\xd7a3] Letter {BaseChar}|{Ideographic} NameChar {Letter}|{Digit}|\.|-|_|:|{CombiningChar}|{Extender} /* xml_yytext instead of yytext, etc. */ %option prefix="xml_yy" /* we'll be using start-condition stacks */ %option stack /* make sure we don't look ahead farther than necessary */ %option always-interactive /* exclusive states; ain't flex cool? */ %x MARKUP XMLDECL PISTUFF COMMSTUFF DTD DECL IGNORE IGNORE_DECL IGNORE_MARKED_SECT %% <*>\xFFFE|\xFFFF { /* FFFE and FFFF are NOT Unicode characters. FFFE * is used to detect byte order; if we see it first * thing in a file, it tells the processor to swap * bytes. We should never see it here. */ add_xml_error (this_file, 1057, xml_yytext); } ]*]]> { add_xml_error (this_file, 1300, xml_yytext); } {CharNoLtOrAmpOrSpace}+ { my_wchar_t *wp; if ((wp = uni_strchr (xml_yytext, ']'))) if (uni_utf_strstr (wp, "]]>")) /* see the above pattern */ add_xml_error (this_file, 1300, xml_yytext); xml_yylval.wstr = uni_add_string (xml_yytext); return CHAR_DATA; } [\x09\x0d\x0a\x20]+<\?[\x09\x0d\x0a\x20]*([Xx][Mm][Ll]) { my_wchar_t c, *wp; /* only perform check on main document and * its external DTD entity (if there is one); * read_entire_xml_file() will handle other * files */ for (wp = xml_yytext; *wp && *wp != '<'; wp++); if (validating (this_file) && this_file->lineno <= 1) { c = *wp; *wp = 0; add_xml_error (this_file, 386, xml_yytext); *wp = c; } pushback (wp); } [\x09\x0d\x0a\x20]+ { xml_yylval.wstr = uni_add_string (xml_yytext); return WHITESPACE; } && { xml_yylval.wstr = uni_add_string (xml_yytext); return CHAR_DATA; } &< { xml_yylval.wstr = uni_add_string (xml_yytext); return CHAR_DATA; } &#x?0+; { add_xml_error (this_file, 1051, xml_yytext); } &({Digit})+; { add_xml_error (this_file, 1052, xml_yytext); } &({Digit})+ { add_xml_error (this_file, 1053, xml_yytext); } &(#({Digit})+|#x[0-9a-fA-F]+); { my_wchar_t *expansion; if (! validating (this_file)) { xml_yylval.wstr = uni_add_string (xml_yytext); return CHAR_DATA; } else { if (! in_document_content && what_are_we_scanning () != file) add_xml_error (this_file, 1050, xml_yytext); /* attempt to expand entity */ expansion = map_entities (this_file, xml_yytext, MAP_CHAR_ENTITIES | MAP_AMP_AND_LT, 0); if (expansion == NULL) { add_xml_error (this_file, 1003, xml_yytext); xml_yylval.wstr = uni_add_string (xml_yytext); return BAD_ENTITY_REF; } else { xml_yylval.wstr = uni_add_string (expansion); /* map_entities returns a malloc'd string */ free (expansion); return CHAR_DATA; } } } &(({Letter})|_|:)({NameChar})*; { my_wchar_t *expansion; if (! validating (this_file)) { xml_yylval.wstr = uni_add_string (xml_yytext); return CHAR_DATA; } else { if (! in_document_content && what_are_we_scanning () == file) /* entity refs must be in document content */ add_xml_error (this_file, 1050, xml_yytext); /* recursively expand entity */ expansion = map_entities (this_file, xml_yytext, MAP_GENERAL_ENTITIES | ABORT_ON_FAILURE, 0); if (expansion == NULL) { add_xml_error (this_file, 1003, xml_yytext); xml_yylval.wstr = uni_add_string (xml_yytext); return BAD_ENTITY_REF; } else { /* rescan expansion */ push_xml_string (this_file, expansion); /* map_entities returns a malloc'd string */ free (expansion); } } } &[^;<\x09\x0d\x0a\x20]* { add_xml_error (this_file, 1004, xml_yytext); xml_yylval.wstr = uni_add_string (xml_yytext); return BAD_ENTITY_REF; } \child == no) { /* Try to catch bogus DOCTYPE decls w/ no ! */ if (this_file->lineno <= 2) add_xml_warning (this_file, 405, xml_yytext); } yy_push_state (MARKUP); /* set global var to indicate we're in document content */ if (! in_document_content && this_file->child == no && what_are_we_scanning () != entity_replacement_text) in_document_content = 1; starting_points[markup] = this_file->lineno; xml_yylval.wstr = uni_add_string (&xml_yytext[1]); return ELEMENT; } \<[\x09\x0d\x0a\x20]*(({Letter})|_|:)({NameChar})* { size_t i; yy_push_state (MARKUP); /* set global var to indicate we're in document content */ if (! in_document_content && this_file->child == no && what_are_we_scanning () != entity_replacement_text) in_document_content = 1; starting_points[markup] = this_file->lineno; if (uni_isspace (xml_yytext[1])) add_xml_error (this_file, 1115, uni_truncate_to (xml_yytext, 20)); for (i = 1; uni_isspace (xml_yytext[i]); i++); xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return ELEMENT; } \<\/[\x09\x0d\x0a\x20]* { int i; yy_push_state (MARKUP); starting_points[markup] = this_file->lineno; /* an empty tag; too bad they didn't use this in XML */ add_xml_error (this_file, 1111, NULL); for (i = 2; uni_isspace (xml_yytext[i]); i++); xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return ETAG; } \<[^\x09\x0d\x0a\x20>?!\/]+({NameChar})+ { size_t i; /* element name (GI) begins with illegal char(s) */ add_xml_error (this_file, 1123, xml_yytext); yy_push_state (MARKUP); starting_points[markup] = this_file->lineno; /* set global var to indicate we're in document content */ if (! in_document_content && this_file->child == no && what_are_we_scanning () != entity_replacement_text) in_document_content = 1; if (uni_isspace (xml_yytext[1])) add_xml_error (this_file, 1115, uni_truncate_to (xml_yytext, 20)); for (i = 1; uni_isspace (xml_yytext[i]); i++); xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return ELEMENT; } \<\/[\x09\x0d\x0a\x20]*(({Letter})|_|:)({NameChar})* { size_t i; yy_push_state (MARKUP); starting_points[markup] = this_file->lineno; if (uni_isspace (xml_yytext[2])) add_xml_error (this_file, 1115, uni_truncate_to (xml_yytext, 20)); for (i = 2; uni_isspace (xml_yytext[i]); i++); xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return ETAG; } \<\/[^\x09\x0d\x0a\x20>?!]+({NameChar})+ { size_t i; /* element name (GI) begins with illegal char(s) */ add_xml_error (this_file, 1123, xml_yytext); yy_push_state (MARKUP); starting_points[markup] = this_file->lineno; if (uni_isspace (xml_yytext[2])) add_xml_error (this_file, 1115, uni_truncate_to (xml_yytext, 20)); for (i = 2; uni_isspace (xml_yytext[i]); i++); xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return ETAG; } #[Cc][Oo][Nn][Rr][Ee][Ff] { xml_yylval.wstr = uni_add_string (xml_yytext); return CONREF; } #[Cc][Uu][Rr][Rr][Ee][Nn][Tt] { xml_yylval.wstr = uni_add_string (xml_yytext); return CURRENT; } \/[\x09\x0d\x0a\x20]+> { yy_pop_state (); xml_yytext[1] = 0; /* spurious whitespace after / in "/ >" */ add_xml_error (this_file, 1114, xml_yytext); return EMPTY_ELEMENT_END_DELIM; } \/> { yy_pop_state (); return EMPTY_ELEMENT_END_DELIM; } > { yy_pop_state (); return ELEMENT_OR_ETAG_END_DELIM; } \<[\x09\x0d\x0a\x20]+\/ { my_wchar_t *wp; xml_yytext[1] = 0; /* spurious whitespace after < in "< /" */ add_xml_error (this_file, 1114, xml_yytext); wp = uni_strdup (utf_8_to_utf_16 ("<[\x09\x0d\x0a\x20]+\? { my_wchar_t *wp; xml_yytext[1] = 0; /* spurious whitespace after < in "< ?" */ add_xml_error (this_file, 1114, xml_yytext); wp = uni_strdup (utf_8_to_utf_16 ("<\?(({Letter})|_|:)({NameChar})* { size_t i; xml_yylval.wstr = uni_add_string (&xml_yytext[2]); if (uni_utf_strcasecmp (xml_yylval.wstr, "xml") != 0) { /* not an lineno; return PI_TARGET; } else { if (this_file->lineno <= 1) { for (i = 0; xml_yytext[i] != 0; i++) { if (uni_utf_any (&xml_yytext[i], "XML")) { /* "XML" is supposed to be lower case */ add_xml_error (this_file, 365, xml_yylval.wstr); uni_downcase (xml_yytext); break; } } /* if we're on line one, lineno; return XMLSTART_DELIM; } if (what_are_we_scanning () != file) /* XML decl should only occur literally (== file) */ add_xml_error (this_file, 367, xml_yylval.wstr); /* lineno; yy_push_state (PISTUFF); return PI_TARGET; } } \<\?[\x09\x0d\x0a\x20]+ { my_wchar_t *wp; /* superfluous (and in fact invalid) whitespace */ add_xml_error (this_file, 501, uni_truncate_to (xml_yytext, 20)); xml_yytext[2] = 0; wp = uni_strdup (xml_yytext); pushback (wp); free (wp); } VERSION { xml_yylval.wstr = uni_add_string (xml_yytext); add_xml_error (this_file, 365, xml_yytext); return VERSION; } version { xml_yylval.wstr = uni_add_string (xml_yytext); return VERSION; } ENCODING { xml_yylval.wstr = uni_add_string (xml_yytext); add_xml_error (this_file, 365, xml_yytext); return ENCODING; } encoding { xml_yylval.wstr = uni_add_string (xml_yytext); return ENCODING; } STANDALONE { xml_yylval.wstr = uni_add_string (xml_yytext); add_xml_error (this_file, 365, xml_yytext); return STANDALONE; } standalone { xml_yylval.wstr = uni_add_string (xml_yytext); return STANDALONE; } \?[\x09\x0d\x0a\x20]+> { yy_pop_state (); xml_yytext[1] = 0; /* spurious whitespace after ? in "? >" */ add_xml_error (this_file, 1114, xml_yytext); return XML_DECL_END_DELIM; } \?+> { yy_pop_state (); if (uni_utf_strncmp (xml_yytext, "??", 2) == 0) /* XML decl shouldn't end with "??>", etc. */ add_xml_error (this_file, 373, xml_yytext); return XML_DECL_END_DELIM; } <\?[\x09\x0d\x0a\x20]+ { my_wchar_t *wp; add_xml_error (this_file, 501, xml_yytext); xml_yytext[2] = 0; wp = uni_strdup (xml_yytext); pushback (wp); free (wp); } <\?[^?>\x09\x0d\x0a\x20]* { xml_yylval.wstr = uni_add_string (&xml_yytext[2]); starting_points[pistuff] = this_file->lineno; add_xml_error (this_file, 354, xml_yylval.wstr); yy_push_state (PISTUFF); return PI_TARGET; } (({PiCharNoGtOrLt})|\?+({PiCharNoGtOrLt}))* { my_wchar_t *wp, *tmp; yy_pop_state (); if (! starting_points[pistuff]) { tmp = utf_8_to_utf_16 ("lineno = starting_points[pistuff]; tmp = utf_8_to_utf_16 ("lineno++; } } xml_yytext[xml_yyleng - 1] = 0; xml_yylval.wstr = uni_add_string (xml_yytext); return PIDATA; } (({PiChar})|\?+({PiCharNoGt}))*\?+> { yy_pop_state (); xml_yytext[xml_yyleng - 2] = 0; xml_yylval.wstr = uni_add_string (xml_yytext); return PIDATA; } ")); xml_yylval.wstr = uni_add_string (xml_yytext); return COMMDATA; } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; /* INCLUDE/IGNORE marked sections are illegal in content */ add_xml_error (this_file, 860, uni_truncate_to (xml_yytext, 20)); yy_push_state (IGNORE); } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; /* ignore this for now */ starting_points[ignore] = this_file->lineno; yy_push_state (IGNORE); } } { int i = 9; if (YY_START != INITIAL) { /* CDATA can't occur in DTD; only in text */ add_xml_error (this_file, 1301, uni_truncate_to (xml_yytext, 20)); } else { if (xml_yytext[3] == 'R') { i++; /* RCDATA not allowed anywhere in XML */ add_xml_error (this_file, 1310, uni_truncate_to (xml_yytext, 20)); } xml_yytext[xml_yyleng - 3] = 0; /* add this CDATA string to string pool */ xml_yylval.wstr = uni_add_string (&xml_yytext[i]); return CDATA; } } \child != no) /* we're in an external XML file */ add_xml_error (this_file, 410, xml_yytext); if (YY_START == DTD) /* we're already inside a DOCTYPE decl */ add_xml_error (this_file, 412, xml_yytext); if (seen_doctype_decl) /* naive users sometimes put in two DOCTYPE decls */ add_xml_error (this_file, 863, xml_yytext); yy_push_state (DECL); add_xml_error (this_file, 368, &xml_yytext[2]); starting_points[decl] = this_file->lineno; /* NB: DocType must match doc's root element type */ return DOCTYPEDECL; } \child != no) /* we're in an external XML file */ add_xml_error (this_file, 410, xml_yytext); if (YY_START == DTD) /* we're already inside a DOCTYPE decl */ add_xml_error (this_file, 412, xml_yytext); if (seen_doctype_decl) /* naive users sometimes put in two DOCTYPE decls */ add_xml_error (this_file, 863, xml_yytext); yy_push_state (DECL); starting_points[decl] = this_file->lineno; /* NB: DocType must match doc's root element type */ return DOCTYPEDECL; } \[ { /* switch input streams if there was a pub or sys * identifier; switch back when we hit <> */ yy_push_state (DTD); starting_points[dtd] = this_file->lineno; return BEGIN_DTD_DELIM; } lineno; return ELEMENTDECL_DELIM; } lineno; return ELEMENTDECL_DELIM; } lineno; return ELEMENTDECL_DELIM; } lineno; yy_push_state (DECL); return ATTLISTDECL_DELIM; } lineno; /* Emit warning if element isn't declared elsewhere */ return ATTLISTDECL_DELIM; } lineno; /* Emit warning if element isn't declared elsewhere */ return ATTLISTDECL_DELIM; } lineno; yy_push_state (DECL); return PARAMETER_ENTITY_DECL_DELIM; } lineno; return PARAMETER_ENTITY_DECL_DELIM; } lineno; yy_push_state (DECL); return ENTITYDECL_DELIM; } lineno; return ENTITYDECL_DELIM; } lineno; return ENTITYDECL_DELIM; } lineno; yy_push_state (DECL); return NOTATIONDECL_DELIM; } lineno; return NOTATIONDECL_DELIM; } lineno; return NOTATIONDECL_DELIM; } <\/?(({Letter})|_|:)({NameChar})*([\"\'][^\"\']*[\'\"]|[^>])*> { int errnum; /* oops; appears in DTD or ext PEnt */ errnum = validating (this_file) ? 464 : 465; add_xml_error (this_file, errnum, xml_yytext); } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; /* ignore this for now */ starting_points[ignore] = this_file->lineno; yy_push_state (IGNORE); } else { /* recursively expand entity; don't pad here */ expansion = map_entities (this_file, xml_yytext, MAP_PARAMETER_ENTITIES | ABORT_ON_FAILURE, 0); if (expansion == NULL) add_xml_error (this_file, 450, uni_truncate_to (xml_yytext, 20)); else { uni_utf_strip (expansion, "\x09\x0d\x0a\x20"); if (uni_utf_strncmp (expansion, "lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; xwrap (errdebug (5, "lineno; yy_push_state (IGNORE); } else if (uni_utf_strncmp (expansion, "lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; xwrap (errdebug (5, "lineno; yy_push_state (YY_START); if (uni_strlen (expansion) > 11) push_xml_string (this_file, &expansion[11]); } else { /* rescan expansion */ xwrap (errdebug (5, " "); uni_strcat (tmp, uni_truncate_to (expansion, 20)); uni_utf_strcat (tmp, ")"); add_xml_error (this_file, 862, tmp); free (tmp); } if (uni_strlen (expansion) <= xml_yyleng) pushback (expansion); else /* necessitated here by an apparent Flex bug */ push_xml_string (this_file, expansion); } /* map_entities() returns a malloc'd string */ free (expansion); } } } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; if (! in_external_dtd_subset (this_file)) /* Dunno why INCLUDE/IGNORE are not OK in internal subset. */ add_xml_error (this_file, 861, uni_truncate_to (xml_yytext, 14)); starting_points[include] = this_file->lineno; yy_push_state (YY_START); } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; if (! in_external_dtd_subset (this_file)) /* Dunno why INCLUDE/IGNORE are not OK in internal subset. */ add_xml_error (this_file, 861, uni_truncate_to (xml_yytext, 14)); add_xml_error (this_file, 852, uni_truncate_to (xml_yytext, 20)); starting_points[include] = this_file->lineno; yy_push_state (YY_START); } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; if (! in_external_dtd_subset (this_file)) /* Dunno why INCLUDE/IGNORE are not OK in internal subset. */ add_xml_error (this_file, 861, uni_truncate_to (xml_yytext, 14)); starting_points[ignore] = this_file->lineno; yy_push_state (IGNORE); } lineno, this_file->cond_levels, this_file->cond_levels + 1)); this_file->cond_levels++; if (! in_external_dtd_subset (this_file)) /* Dunno why INCLUDE/IGNORE are not OK in internal subset. */ add_xml_error (this_file, 861, uni_truncate_to (xml_yytext, 14)); add_xml_error (this_file, 852, uni_truncate_to (xml_yytext, 20)); starting_points[ignore] = this_file->lineno; yy_push_state (IGNORE); } ]]> { xwrap (errdebug (7, "cond section end, line %d; level was %d; now = %d\n", this_file->lineno, this_file->cond_levels, this_file->cond_levels - 1)); if (--this_file->cond_levels >= 0) yy_pop_state (); else { this_file->cond_levels = 0; add_xml_error (this_file, 851, xml_yytext); } } <\?(({Letter})|_|:)({NameChar})*({PiChar}|\?{CharNoGt})+\?> { xwrap (errdebug (7, "bypassed PI ending at line %d\n", this_file->lineno)); }