/* ***************************************************************************** * * $RCSfile: dtdutil.c,v $ * $Date: 1999/05/18 18:36:20 $ * $Source: /home/richard/Xml/RCS/dtdutil.c,v $ * $Revision: 1.50 $ * $Author: richard $ * ***************************************************************************** * * Copyright 1998, Brown University and Richard Goerwitz * ***************************************************************************** * * Routines for resolving public and/or system identifiers into an * xml_file structure pointer. To view this file effectively, you'll * need to set your editor to display at least 100 columns across. * * Main entry point is: * * resolve_pub_or_sysid (xml_file *xf, my_wchar_t *pubid, my_wchar_t *sysid) * * Where xf points to the resource where pubid and/or sysid were * found, where pubid is a public identifier string (UTF-16) taken * from an XML file, and where sysid is a similar system identifier * string taken from an XML file. Remember that, internally, all XML * files are converted to UTF-16. * * Either pubid (arg1) or sysid (arg2) may be NULL. If neither are * NULL, they should be alternatives (that is, different designations * for the same resource). The public identifier will be resolved * first, and if that fails to resolve to a readable file, then the * system identifier will be tried. * * If neither resolves, resolve_pub_or_sysid() returns NULL. If one * or the other resolves, the return value is a pointer to an * xml_file struct, which may be turned into a child xml_file struct * via add_xml_file(), or read in via read_entire_xml_file(). * * There is also an entry point for resolving public identifiers as * URIs (used when parsing NOTATION decls): * * resolve_pubid_as_uri (xml_file *xf, my_wchar_t *pubid) * * All this routine does is convert (if possible) pubid into a system * identifier (i.e., a URI). If it can't, it returns NULL. * Otherwise it returns a pointer to a my_wchar_t string. * ***************************************************************************** * * Note that, one must either define an external FPI resolution cmd * (see the sample configuration file on how to do this), or else * provide the names of catalog files in one of the following ways: * 1) by setting one's SGML_CATALOG_FILES environment variable, 2) by * the -C command-line switch, or 3) through the configuration file * (see the sample xmlparse.cfg file). * * The external FPI resolution command must read an FPI from its * stdin, and write a corresponding system identifier (URI) to its * stdout. It must exit with zero status on success and with * non-zero status on failure. * * The SGML_CATALOG_FILES environment variable is a colon-separated * list of paths. The -C option uses the same format. * * Catalog files are essentially legacy SGML auxiliary files. The * routines contained in this file ignore all lines in them that do * not begin with the keyword "PUBLIC". * * The format of PUBLIC lines in the catalog file is basically: * * PUBLIC "quoted public identifier" "quoted system identifier" * * Quotation marks may be either single ('') or double (""), and * must be correctly paired (e.g., 'this' or "that"). * * Note 1: * * Comments may be freely interspersed with everything else. They * are bounded by a double dash: * * PUBLIC -- this keyword is mandatory -- * "-//W3C//DTD HTML 3.2//EN" -- must be quoted -- * /usr/local/lib/sgml/html/html32.dtd * * Note 2: * * System identifiers should be quoted, but need not be (as above). * If one is found unquoted, it is considered terminated by the * next newline. Public identifiers, though, must be quoted. The * quotes do not protect whitespace inside, however. According to * the standard, all strings of whitespace get mapped to a single * space. * * Note 3: * * Paths in system identifiers are relative to the directory in * which the catalog file sits, so full (i.e., absolute) paths are * probably to be discouraged, since they reduce the portability of * the DTD hierarchy. * * Note 4: * * If for some bizarre reason you wish to place a double quotation * mark (") inside a quoted identifier, just use single quotes on * the outside - or backslash escape the quotation mark (this is * the only backslash escape the parser recognizes; it's needed if * you have a system identifier with both single and double quotes * in it). * ****************************************************************************" */ #include "dtdutil.h" #include "bidipipe.h" #include "errabort.h" #include "fileutil.h" #include "hashutil.h" #include "lockutil.h" #include "sigstuff.h" #include "strutil.h" #include "utfutil.h" #include "xtrautil.h" #include "setjmp.h" #ifdef HAVE_SIGSETJMP static sigjmp_buf jump_buffer; #else # ifdef sigsetjmp static sigjmp_buf jump_buffer; # else static jmp_buf jump_buffer; # endif #endif #ifndef ptrdiff_t # define ptrdiff_t int #endif enum states { initial, pubid, sysid, in_comment, discard }; static FILE *external_prog = NULL; static struct state_stack stack = { NULL, 0, 0 }; RETSIGTYPE external_prog_timed_out (int); static char *read_line_from_external_program (char *, char *); static struct rg_htable *create_public_to_system_identifier_hashtable (char *); static char *map_spaces_to_space (char *); static char *see_if_its_local (char *, char *); static struct rg_htable *process_catalog_file (char *, struct rg_htable *); /* * resolve_pubid_as_uri: * * Resolves pubid as a URI (returned here as a UTF-16 string). * Returns NULL if there is no CATALOG file (or if, for some * reason, the one we expect to find doesn't exist, or is * unreadable), or if there is no entry for pubid in an existing * CATALOG file. */ my_wchar_t * resolve_pubid_as_uri (struct xml_file *xf, my_wchar_t *pubid) { size_t len; char *tmp, *cmd; my_wchar_t *wp, *utf_16_sysid; static my_wchar_t *uribuf = NULL; static size_t uribuflen = 0; struct rg_htable_item it, *result; if (uribuf == NULL) { /* set up static buffer to store results in */ uribuflen = 64U; if ((uribuf = malloc (uribuflen * sizeof (my_wchar_t))) == NULL) errabort (40, "malloc() error in %s\n", "resolve_pubid_as_uri()"); } if (pubid != NULL) { /* check for legal PubID chars; see 1.0 spec, sect. 13 */ for (wp = pubid; *wp != 0; wp++) if (! (*wp < 0x80 && (isspace (*wp) || isalnum (*wp) || uni_utf_any (wp, "-'()+,./:=?;!*#@$_%")))) add_xml_warning (xf, 569, uni_truncate_to (wp, 2)); cmd = xmlparse_env.fpi_resolution_cmd_string; if (cmd) { /* If cmd is nonnull, the user has given us (via the -p * option or via the fpi_resolution_cmd_string configuration * file option) an external command for resolving FPIs. * Try to use it. */ tmp = utf_16_to_utf_8 (pubid); if ((tmp = read_line_from_external_program (tmp, cmd)) == NULL) { /* the external ID resolution command failed for some reason (redundant) */ /* add_xml_error (xf, 566, pubid); */ } else { trim (tmp, "\r\n"); utf_16_sysid = utf_8_to_utf_16 (tmp); len = uni_strlen (utf_16_sysid) + 1; if (len > uribuflen) { uribuflen = len; if ((uribuf = realloc (uribuf, uribuflen * sizeof (my_wchar_t))) == NULL) errabort (41, "realloc() error in %s\n", "resolve_pubid_as_uri()"); } memcpy (uribuf, utf_16_sysid, len * sizeof (my_wchar_t)); return uribuf; } } /* Either an fpi_resolution_cmd_string was not supplied, * or the command it gives failed. In either case, now try * the catalog file(s). */ /* read in all catalog files; initialize pub->sysid hashtable */ if (xmlparse_env.public_to_system_identifier_hashtable == NULL) xmlparse_env.public_to_system_identifier_hashtable = create_public_to_system_identifier_hashtable ( xmlparse_env.sgml_catalog_filenames); /* now, finally, see if pubid (arg 2) is in the hashtable */ it.uni_key = NULL; it.key = map_spaces_to_space (strdup (utf_16_to_utf_8 (pubid))); xwrap (errdebug (3, "trying to resolve pubid %s\n", it.key)); it.data = NULL; result = rg_find_item ( xmlparse_env.public_to_system_identifier_hashtable, it); free (it.key); if (result != NULL) { if (*(char *)result->data == '\0') /* empty, "", system identifier; I hate it when people do this */ add_xml_error (xf, 564, NULL); utf_16_sysid = utf_8_to_utf_16 (result->data); len = uni_strlen (utf_16_sysid) + 1; if (len > uribuflen) { uribuflen = len; if ((uribuf = realloc (uribuf, uribuflen * sizeof (my_wchar_t))) == NULL) errabort (41, "realloc() error in %s\n", "resolve_pubid_as_uri()"); } /* copy sysid into temporary static holding area */ memcpy (uribuf, utf_16_sysid, len * sizeof (my_wchar_t)); return uribuf; } } /* Can't resolve pubid */ return NULL; } /* * resolve_pub_or_sysid: * * Resolves pubid or, if pubid is NULL or unresolvable, sysid, into * an xml_file structure. Pubid is a public identifier string * (my_wchar_t *) taken from an XML file; sysid is a similar system * identifier string taken from an XML file. Note that these * (my_wchar_t *) strings are converted to UTF-8 before being passed * to external "helper" programs or to system calls that attempt * to resolve them into local files. */ struct xml_file * resolve_pub_or_sysid (xml_file *xf, my_wchar_t *pubid, my_wchar_t *sysid) { size_t i, j, len; struct xml_file *new_xf; my_wchar_t *wp, *utf_16_sysid; char *tmp, *tmpfname, *utf_8_sysid; static size_t buflen; static char *cmdbuf = NULL; if (pubid != NULL) { /* first try to resolve_pubid_as_uri; if it resolves, then * resolve the resulting URI */ if ((utf_16_sysid = resolve_pubid_as_uri (xf, pubid))) if (*utf_16_sysid != 0) /* resolve the resulting URI */ if ((new_xf = resolve_pub_or_sysid (xf, NULL, utf_16_sysid))) return new_xf; } if (sysid != NULL) { if (*sysid == 0) { /* flag empty sysids; people do this to work around the spec */ wp = utf_8_to_utf_16 ("\"\""); if (xf) add_xml_error (xf, 565, wp); return NULL; } if (cmdbuf == NULL) if ((cmdbuf = malloc (buflen = 256)) == NULL) errabort (40, "malloc() error in %s\n", "resolve_pub_or_sysid()"); /* XML 1.0 standard, sect. 4.2.2 doesn't specify whitespace * normalization for system identifiers (cf. public ones). */ tmp = strdup (utf_16_to_utf_8 (sysid)); xwrap (errdebug (3, "trying to resolve sysid %s\n", tmp)); /* strip file: prefix; turn relative path/URL into absolute path/URL */ if (strncasecmp (tmp, "file:", 5) == 0) { xwrap (errdebug (5, "stripping file: prefix from sysid %s", tmp)); for (i = 0, j = 5; tmp[j]; j++) tmp[i++] = tmp[j]; tmp[i] = 0; } utf_8_sysid = xf ? absolutize_relative_to (tmp, xf->filename) : tmp; if (! xf || xmlparse_env.no_local_files == no) { /* Check for a local file (check only if first char is '/' if * we are called from inside the parser. If we're not called * from the parser, i.e., if xf is NULL, try to open the file * as-is (we're probably, in this case, being called from main() * to open a top-level XML file). */ if (! xf || *utf_8_sysid == '/') { /* First, see if it's a straight-up filename. */ if (access (utf_8_sysid, R_OK) == 0) { /* If it is, open that file and return. */ new_xf = create_xml_file (utf_8_sysid); free (tmp); return new_xf; } /* Now try destructively unescaping utf_8_sysid and * seeing if it's a local filename */ unescape_uri (utf_8_sysid); if (access (utf_8_sysid, R_OK) == 0) { /* If it is, open that file and return. */ new_xf = create_xml_file (utf_8_sysid); free (tmp); return new_xf; } } /* if ! xf, then utf_8_sysid was supplied as a cmd-line arg */ if (! xf && ! (*utf_8_sysid == '/') && ! strchr (utf_8_sysid, ':') && ! (strncasecmp (utf_8_sysid, "www/", 4) == 0) && ! (strncasecmp (utf_8_sysid, "localhost/", 10) == 0)) { /* don't be aggressive about trying to resolve plain * filenames as URIs when they're given as an argument * on the command line */ free (tmp); return NULL; } } /* Okay, it's not a local file (or we aren't allowed to look at * local files, for security reasons). So try now to interpret * utf_8_sysid as a URI. */ if (xmlparse_env.no_local_files == yes) { /* if we're running in no-local-file mode, only accept * sysids with external URL-ish prefixes; no-local-file mode * is really a kind of stepped-up security mode, and this is * just an added security measure */ if (! (strncasecmp (utf_8_sysid, "http://", 7) == 0 || strncasecmp (utf_8_sysid, "urn:", 4) == 0 || strncasecmp (utf_8_sysid, "ftp://", 6) == 0)) { if (xf) { /* this sort of thing can be an attempt at a security breach */ xwrap (errdebug (1, "Blocked (non-http/urn/ftp) system ID resolution, %s\n", utf_8_sysid)); free (tmp); return NULL; } /* called from main(); see docs on -n option in man page */ errabort (60, "non-http/urn/ftp URI resolution is blocked, %s\n", utf_8_sysid); } } /* escape the system ID, and expand the hostname (NB: * escape_uri() calls malloc(); expand_hostname_in_uri() calls * realloc()). */ utf_8_sysid = escape_uri (utf_8_sysid); /* e.g., goon.stg -> goon.stg.brown.edu */ utf_8_sysid = expand_hostname_in_uri (utf_8_sysid); if (xmlparse_env.no_local_files == yes) if (refers_to_localhost (utf_8_sysid)) { if (xf) { /* this sort of thing can be an attempt at a security breach */ xwrap (errdebug (1, "Localhost URI resolution is blocked, %s\n", utf_8_sysid)); free (tmp); return NULL; } /* called from main(); see docs on -n option in man page */ errabort (62, "localhost URI resolution is blocked, %s\n", utf_8_sysid); } free (tmp); /* now try to use our url_resolution_cmd_string to resolve it */ if (utf_8_sysid != NULL) { tmpfname = tmpnam (NULL); len = strlen (utf_8_sysid) + strlen (xmlparse_env.url_resolution_cmd_string) + strlen (tmpfname) + 1; if (len > buflen) if ((cmdbuf = realloc (cmdbuf, buflen = len)) == NULL) errabort (41, "realloc() error in %s\n", "resolve_pub_or_sysid()"); sprintf (cmdbuf, xmlparse_env.url_resolution_cmd_string, utf_8_sysid, tmpfname); /* NULL here means we have no data to send the external program */ if ((tmp = read_line_from_external_program (NULL, cmdbuf)) == NULL) { /* the external ID resolution command failed for some reason (redundant) */ /* if (xf) add_xml_error (xf, 566, sysid); */ } else { /* external_prog is supposed to send the real URL to stdout */ len = strlen (tmp); if (*tmp != '\0' && *tmp != '\n' && tmp[len - 1] == '\n') { trim (tmp, "\r\n"); free (utf_8_sysid); utf_8_sysid = strdup (tmp); } /* external_prog is supposed to put the URL's text in tempfname */ if (access (tmpfname, R_OK) != 0) /* Darn; external_prog didn't create tempfname */ errwarn (161, "%s produced no readable file, %s\n", cmdbuf, tmpfname); else { new_xf = create_xml_tmpfile (utf_8_sysid, tmpfname); free (utf_8_sysid); return new_xf; } } free (utf_8_sysid); } } /* Can't resolve pubid or utf_8_sysid */ return NULL; } /* * read_line_from_external_program * * Reads a line of text from external program, cmd's, stdout. If * data (arg 1) is a nonnull string, read_line_from_external_program * will send that data to cmd's stdin, in effect creating a * bidirectional pipe. * * A NULL return value indicates some failure (e.g., an exec * failure, a timeout, abnormal termination of the external * command). A nonnull return value indicates success (normal * termination of the external program within the allotted * time-period). * * Note: This routine is messy. It calls setjmp, and uses a file- * level static variable, external_prog, which is used to store the * file descriptor returned by popen. Since there's only one such * variable, this routine is not re-entrant. */ static char * read_line_from_external_program (char *data, char *command) { char *tmp; int lineno, status; #ifdef HAVE_SIGSETJMP if (sigsetjmp (jump_buffer, 1)) #else # ifdef sigsetjmp if (sigsetjmp (jump_buffer, 1)) # else if (setjmp (jump_buffer)) # endif #endif { /* alarm should be off anyway, but I'm paranoid */ alarm (0); sig_default (SIGALRM); /* There was a timeout; signal handler executed longjmp */ errwarn (33, "external command timed out: %s\n", command); } else { /* SIGALRM handler executes longjmp on timeout */ external_prog = NULL; sig_catch_no_restart (SIGALRM, external_prog_timed_out); alarm (300); /* external_prog sends URI -> stdout */ xwrap (errdebug (5, "running external command: %s\n", command)); if ((external_prog = bidi_spopen (data, command)) == NULL) errwarn (30, "system error running %s\n", command); else { lineno = 0; /* read the URL from external_prog's stdout */ xwrap (errdebug (5, "attempting to read a URI back from external command\n")); tmp = getline (external_prog, command, &lineno); /* got what we need; now turn off the alarm */ alarm (0); /* close up the pipe; check external_prog's exit status */ status = bidi_pclose (external_prog); if (WIFEXITED (status) && WEXITSTATUS (status)) errwarn (31, "error running %s; status = %d\n", command, WEXITSTATUS (status)); else if (WIFSIGNALED (status)) errwarn (35, "extern cmd %s killed; signal = %d\n", command, WTERMSIG (status)); else if (! tmp) /* getline returns NULL on EOF */ errwarn (34, "extern cmd produced no output, %s\n", command); else return tmp; } } return NULL; } RETSIGTYPE external_prog_timed_out (int sig) { int status = 1; /* clean up open pipe, take subprocess's exit status */ if (external_prog) { status = bidi_pclose (external_prog); external_prog = NULL; } /* jump back into read_line_from_external_program */ #ifdef HAVE_SIGSETJMP siglongjmp (jump_buffer, status ? status : 1); #else # ifdef sigsetjmp siglongjmp (jump_buffer, status ? status : 1); # else longjmp (jump_buffer, status ? status : 1); # endif #endif } static struct rg_htable * create_public_to_system_identifier_hashtable (char *sgml_catalog_filenames) { char *p, *q; size_t i, filecount; char *filelist[128]; char filebuf[MAXPATHLEN + 1]; struct rg_htable *ht = NULL; if ((ht = rg_create_htable (500)) == NULL) errabort (40, "malloc() error in %s\n", "create_public_to_system_identifier_hashtable"); q = NULL; filecount = 0; for (p = sgml_catalog_filenames; *p != '\0'; p++) { /* colons separate file names */ if (*p == ':') { if (q != NULL) { if (filecount > 128) { errwarn (173, "> %d catalog files\n", 128); return ht; } else { /* check to see if we've already seen this file */ *q = '\0'; for (i = 0; i < filecount; i++) if (strcoll (filelist[i], filebuf) == 0) break; } if (i == filecount && *filebuf) /* nope, we haven't already seen this file */ process_catalog_file (filebuf, ht); filelist[filecount++] = strdup (filebuf); q = NULL; } } else { if (q == NULL) q = filebuf; /* anything may be backslash escaped */ if (*p == '\\') p++; else *(q++) = *p; if ((q - filebuf) > MAXPATHLEN) { *q = '\0'; errabort (176, "filename too long, %s...\n", filebuf); } } } if (q != NULL) { *q = '\0'; for (i = 0; i < filecount; i++) if (strcoll (filelist[i], filebuf) == 0) break; if (i == filecount && *filebuf) process_catalog_file (filebuf, ht); } for (i = 0; i < filecount; i++) free (filelist[i]); return ht; } #define skip_whitespace(p) { while (isspace (*(p))) (p)++; } static struct rg_htable * process_catalog_file (char *q, struct rg_htable *ht) { FILE *f; size_t len; char quote; ptrdiff_t pdiff; int lineno, errcount; char *p, *p2, *line, *pubid_str, *sysid_str; struct rg_htable_item it; static char *idbuf = NULL; static size_t idbuflen; /* shut up about uninitialized variables */ quote = len = 0; pubid_str = p2 = NULL; if ((f = fopen_and_readlock (q)) == NULL) { errwarn (170, "can't open %s\n", q); return ht; } if (idbuf == NULL) if ((idbuf = malloc (idbuflen = 32)) == NULL) errabort (40, "malloc() error in %s\n", "process_catalog_file"); errcount = lineno = 0; push_state (&stack, initial); while ((line = getline (f, q, &lineno)) != NULL) { p = line; while (*p != '\0') { switch (top_state (&stack)) { case initial: /* bypass leading blanks */ skip_whitespace (p); if (*p == '-' && *(p + 1) == '-') { /* comment starts */ push_state (&stack, in_comment); p += 2; } else if (strncmp (p, "PUBLIC", 6) == 0) { /* look for a public identifier */ push_state (&stack, pubid); /* p2 will hold the raw pubid string */ p2 = idbuf; len = 0; p += 6; skip_whitespace (p); } else /* discard lines that don't start with PUBLIC */ push_state (&stack, discard); break; case in_comment: if (*p == '-' && *(p + 1) == '-') /* comment ends; go back to previous state */ { pop_state (&stack); p += 2; skip_whitespace (p); } else p++; break; case pubid: case sysid: /* If p2 == idbuf, we're at the START of a pub/sysid... */ if (p2 == idbuf) { switch (*p) { case '\n': p++; case '\0': /* if we're at the end of a line, get another line */ continue; case '"': case '\'': /* save begin quote (later, match it with the end one) */ quote = *p++; break; case '-': /* oops; somebody stuck a comment in here */ if (*(p + 1) == '-') { push_state (&stack, in_comment); p += 2; continue; } default: /* Hmmm. This pub/sysid doesn't begin with a quote. */ if (top_state (&stack) == sysid) /* the system identifier doesn't need one */ quote = '\0'; else { /* public identifiers DO need quotation marks */ errwarn (178, "malformed ident in %s, line %d\n", q, lineno); if (++errcount > 50) { errwarn (179, "too many errors in %s\n", q); goto done; } while (top_state (&stack) != initial) pop_state (&stack); } } } if (++len > idbuflen) { pdiff = p2 - idbuf; if ((idbuf = realloc (idbuf, idbuflen += 32)) == NULL) errwarn (41, "realloc() error in %s\n", "process_catalog_file"); p2 = idbuf + pdiff; } /* Okay, now let's see if we're at the END of a pub/sysid */ switch (*p) { case '\0': break; case '\\': if (quote && *(p + 1) == quote) /* backslashes "escape" quotes inside of quoted identifiers */ p++; *(p2++) = *(p++); break; case '\n': if (quote) { *(p2++) = *(p++); break; } case '"': case '\'': /* if we have either a matching quote, or an unquoted * sysid that's come to the end of a line... */ if (*p == quote || (*p == '\n' && ! quote && top_state (&stack) == sysid)) { *p2 = '\0'; if (top_state (&stack) == pubid) { pop_state (&stack); push_state (&stack, sysid); if ((pubid_str = strdup (idbuf)) == NULL) errabort (40, "malloc() error in %s\n", "process_catalog_file"); len = 0; p2 = idbuf; if (p) p++; skip_whitespace (p); } else { if ((sysid_str = see_if_its_local (q, idbuf)) == NULL) errabort (40, "malloc() error in %s\n", "process_catalog_file"); map_spaces_to_space (pubid_str); it.key = pubid_str; it.uni_key = NULL; it.data = sysid_str; if (! rg_find_item (ht, it)) rg_add_item (ht, it); else /* Overwrite previous mapping for this pubid */ errwarn (175, "pubid, %s (in %s), defined; skipped\n", it.key, q); #ifdef STANDALONE_DTDUTIL_TEST printf ("Number of items in hash table = %d\n", ht->no_items); #endif pop_state (&stack); if (p) p++; } break; } default: /* by default, just add *p to p2 */ *(p2++) = *(p++); break; } break; case discard: break; } /* Stop processing the current line */ if (top_state (&stack) == discard) { pop_state (&stack); break; } } } done: /* plug memory leak */ if (top_state (&stack) == sysid) free (pubid_str); /* We may not need the catalog, so report a non-fatal error */ if (ferror (f)) errwarn (172, "error reading %s\n", q); fclose_and_unlock (f); return ht; } /* * map_spaces_to_space * * Destructively maps sequences of spaces in string s to * a single space ('\x20'). */ static char * map_spaces_to_space (char *s) { char *p, *p2; p = s; /* skip over leading whitespace */ skip_whitespace (p); /* trim off trailing whitespace */ trim (p, "\r\n\t "); for (p2 = s; *p != '\0'; p++) { if (! isspace (*p)) *p2++ = *p; else if (! isspace (*(p + 1))) *p2++ = ' '; } *p2 = '\0'; return s; } /* * see_if_its_local * * If idbuf has a leading '/' or contains the string "://", then * returns strdup(idbuf). Otherwise, figures out the directory in * which the catalog file lives, and tries to interpret idbuf as a * relative path, starting from that directory, to a local file. If * such a file exists, returns an absolute path to that file. If * not, returns strdup(idbuf). * * The basic idea here is that if idbuf is a local path, we want it * to be absolute. But if it's not a local path, we want it to be * left as-is. */ static char * see_if_its_local (char *catfile, char *idbuf) { char *p, *p2; char *fullpath; size_t len, len2; if (*idbuf == '/' || strstr (idbuf, "://")) return strdup (idbuf); p2 = NULL; catfile = absolutize (catfile); /* find catfile's path component */ for (p = catfile; *p != '\0'; p++) if (*p == '/') p2 = p; len2 = strlen (idbuf) + 1; len = (p2 - catfile) + 1; /* concatenate catfile's path component and idbuf */ if ((fullpath = malloc (len + len2)) == NULL) errabort (40, "malloc() error in %s\n", "see_if_its_local()"); memcpy (fullpath, catfile, len); memcpy (&fullpath[len], idbuf, len2); /* reduce //// multiple slashes to just one */ for (p = p2 = fullpath; *p != '\0'; p++) if (! (*p == '/' && *(p + 1) == '/')) *p2++ = *p; *p2 = '\0'; if (access (fullpath, F_OK) == 0) return fullpath; free (fullpath); return strdup (idbuf); } #ifdef STANDALONE_DTDUTIL_TEST #include "readcfg.h" xmlparse_environment xmlparse_env; int main (int argc, char **argv) { my_wchar_t *wp, *utf_16_id; struct rg_htable *ht; struct xml_file *xf, *xf2, *xf3; struct rg_htable_item *result; readcfg (argc, argv); ht = create_public_to_system_identifier_hashtable (xmlparse_env.sgml_catalog_filenames); #ifdef STANDALONE_DTDUTIL_TEST printf ("Grand total of items in hash table = %d\n", ht->no_items); #endif result = rg_get_htable_items (ht); while (result != NULL) { printf ("pubid_str = \"%s\"; sysid_str = \"%s\"\n", (char *)result->key, (char *)result->data); result = rg_get_htable_items (NULL); } xf = resolve_pub_or_sysid (NULL, NULL, NULL); printf ("xf is %s\n", xf ? "nonnull" : "null"); utf_16_id = uni_strdup (utf_8_to_utf_16 ("stuff")); if ((xf = resolve_pub_or_sysid (NULL, utf_16_id, NULL)) != NULL) printf ("xf->filename is %s\n", xf->filename); if ((wp = resolve_pubid_as_uri (xf, utf_16_id)) != NULL) printf ("stuff (PubID) resolves as URI: %s\n", utf_16_to_utf_8 (wp)); free (utf_16_id); utf_16_id = uni_strdup (utf_8_to_utf_16 ("no_no_no_no")); printf ("Trying no_no_no_no (which shouldn't resolve)\n"); if ((wp = resolve_pubid_as_uri (xf, utf_16_id)) != NULL) printf ("no_no_no_no (PubID) resolves as URI: %s\n", utf_16_to_utf_8 (wp)); else printf ("It doesn't.\n"); free (utf_16_id); if (xf) { utf_16_id = utf_8_to_utf_16 ("dtdutil.input"); if ((xf2 = resolve_pub_or_sysid (xf, NULL, utf_16_id)) != NULL) { printf ("xf2->filename is %s\n", xf2->filename); free_xml_file (xf2); } utf_16_id = utf_8_to_utf_16 ("this.will.fail"); if ((xf2 = resolve_pub_or_sysid (xf, NULL, utf_16_id)) != NULL) { printf ("eek; xf2->filename is %s (should have failed)\n", xf2->filename); free_xml_file (xf2); } utf_16_id = utf_8_to_utf_16 ("/this.will.fail"); if ((xf2 = resolve_pub_or_sysid (xf, NULL, utf_16_id)) != NULL) { printf ("eek; xf2->filename is %s (should have failed)\n", xf2->filename); free_xml_file (xf2); } utf_16_id = utf_8_to_utf_16 ("http://www.brown.edu"); if ((xf2 = resolve_pub_or_sysid (xf, NULL, utf_16_id)) != NULL) { printf ("xf2->filename is %s\n", xf2->filename); free_xml_file (xf2); } utf_16_id = utf_8_to_utf_16 ("http://www.brown.edu/"); if ((xf2 = resolve_pub_or_sysid (xf, NULL, utf_16_id)) != NULL) { printf ("xf2->filename is %s\n", xf2->filename); utf_16_id = utf_8_to_utf_16 ("index.html"); if ((xf3 = resolve_pub_or_sysid (xf2, NULL, utf_16_id)) != NULL) { printf ("xf3->filename is %s\n", xf3->filename); free_xml_file (xf3); } free_xml_file (xf2); } free_xml_file (xf); } rg_free_htable_and_data (ht); exit (0); } #endif /* STANDALONE_DTD_TEST */