/* ***************************************************************************** * * $RCSfile: xml2utf.c,v $ * $Date: 1999/05/18 18:36:20 $ * $Source: /home/richard/Xml/RCS/xml2utf.c,v $ * $Revision: 1.6 $ * $Author: richard $ * ***************************************************************************** * * Copyright 1998, Brown University and Richard Goerwitz * ***************************************************************************** * * Simple utility for converting XML to UTF-8 or UTF-16. * ***************************************************************************** */ #include "general.h" #include "errabort.h" #include "readcfg.h" #include "utfutil.h" xmlparse_environment xmlparse_env; int main (int argc, char **argv) { u_int16_t uc; int output_utf_8 = 1; int i, j, c, were_errors; my_wchar_t *wp, charbuf[BUFSIZ]; /* zero out the xmlparse_env structure */ memset (&xmlparse_env, 0, sizeof (xmlparse_env)); /* Set up xmlparse_env; parse command-line opts; read/parse config file */ readcfg (argc, argv); xwrap (errdebug (3, "re-parsing command-line arguments (third pass)\n")); while ((c = getopt (argc, argv, "1:8c:C:d:E:hl:m:p:su:")) != EOF) { switch (c) { case 'h': /* emit an extensive help message */ fprintf (stderr, "Usage: %s [same arguments as for xmlparse] [-8] [-16] \n", argv[0]); exit (1); break; case '8': output_utf_8 = 1; break; case '1': if (*optarg == '6') { output_utf_8 = 0; break; } case '?': case ':': errabort (10, "syntax error; invoke w/ -h option\n"); break; default: /* other command-line arguments are handled in readcfg() */ break; } } if ((xmlparse_env.filelen = argc - optind) == 0) errabort (11, "no files to process\n"); else { /* Stuff all input files supplied on the cmd line into * xml_file structures within xmlparse_env. */ xwrap (errdebug (5, "initializing %d xml_file structs\n", xmlparse_env.filelen)); xmlparse_env.xml_files = malloc (sizeof (xml_file) * xmlparse_env.filelen); if (xmlparse_env.xml_files == NULL) errabort (40, "malloc error in %s\n", "main()"); for (i = 0; xmlparse_env.filelen > i; i++) xmlparse_env.xml_files[i] = create_xml_file (argv[optind + i]); } /* process xml files */ were_errors = 0; xwrap (errdebug (5, "converting files (qua xml_file structs)...\n")); for (i = 0; i < xmlparse_env.filelen; i++) { /* check for NULL here (indicates un-openable file) */ if (xmlparse_env.xml_files[i]) { while ((c = read_xml_file (xmlparse_env.xml_files[i], charbuf, BUFSIZ))) { if (! output_utf_8) { /* c (count) includes the trailing nil, 0; remove it */ c--; /* my_wchar_t may be "int", so we have to downsize */ for (j = 0; j < c; j++) { uc = charbuf[j]; /* UTF-16 chars are sixteen bits wide by definition */ fwrite (&uc, sizeof (u_int16_t), 1, stdout); } } else { wp = charbuf; if (*wp == 0xFFFE || *wp == 0xFEFF) /* strip out byte-order marker */ { wp++; c--; } fputs (utf_16_to_utf_8 (wp), stdout); } } } } /* We're done! */ return 0; }