/* ***************************************************************************** * * $RCSfile: strutil.c,v $ * $Date: 1998/05/18 13:38:37 $ * $Source: /home/richard/Xml/RCS/strutil.c,v $ * $Revision: 1.9 $ * $Author: richard $ * ***************************************************************************** * * Copyright 1998, Brown University and Richard Goerwitz * ***************************************************************************** * * General string-handling utilities: * * char *any(char *cs, char *s) true (s + 1) if a char in cs begins s * char *trim(char *s, char *cs) remove chars in cs that trail s * char *trimhash(char *s) strip #comments from s * char *get_string(char *) strdup() a string, possibly xlating C- * style escapes (a la snarf_string()) * char *snarf_string(char *) translate C-style \n, \r, etc. escapes * (sets global snarfed_string_endpoint) * char *strip(char *s, char *cs) strip chars in cs from s * char *downcase(char *s) map uppercase chars in s to lowercase * char *upcase(char *s) map lowercase chars in s to uppercase * char *truncate_to(char *s, int i) truncate string *s to i chars * char *replace(char *s, char *old, char *new) * return a new s in which old is new * Note also * * char *trimhash_no_quotes_or_backslash(char *s) * * which works just like trimhash, except that it ignores #-signs * that are enclosed in double/single quotes or are backslash * escaped. * * Most of the above functions are destructive, in the sense that * they alter s. They also return s (so they can be used in contexts * like fprintf(f, "%s\n", trim(s, " \t")). * ***************************************************************************** */ #include #include "strutil.h" /* holds a pointer to the end of a snarfed string */ char *snarfed_string_endpoint = NULL; /* * any: string x string -> string * (cs, s) -> s + 1 (NULL on fail) * * Where cs is a string and s is a string, and i is either s + 1 or * NULL, depending on whether some character in cs begins s or not. * The idea here is to return true for, say, any(" \t", " hello"), * but false for any(" \t", "hello"). */ char * any (char *cs, char *s) { char *p; for (p = cs; *p != '\0'; p++) if (*p == *s) return s + 1; return NULL; } /* * trimhash: string -> string * s -> s (s is altered) * * Where s is a null-terminated string, and the return value is s, * stripped of #hash-style comments at line's end. Does not check * to see if # is embedded in quotes, "this\"#is#\" a quote"; nor * does it check to see if it is backslashed. Simple example: * * trimhash("hello # comment") -> "hello " */ char * trimhash (char *s) { char *end, *last_end = NULL; for (end = s + strlen (s) - 1; end >= s; end--) if (*end == '#') last_end = end; if (last_end) *last_end = '\0'; return s; } /* * get_string: * * If line begins with a quotation mark, then un-quote it (and * unescape \backslash-escaped chars, like \n -> newline). Return a * malloc'd copy of the new string. If the string doesn't begin * with a quotation mark, just return a strdup'd copy of it. */ char * get_string (char *line) { char *p; while (isspace (*line)) line++; if (*line == '"') if ((p = snarf_string (line)) != NULL) return p; return strdup (line); } /* * snarf_string: * * Finds a C-style "string" with backslash escapes for newlines, * returns, quotation marks, etc., in arg 1 (line). That is, given * a line like * * "\"hello there,\" he said" * * snarf_string returns a pointer to - * * "hello there," he said * * The returned string is malloc()ed internally by snarf_string, so * it needs to be freed later if there is some need to clobber the * structure it gets incorporated into. * * Leaves a pointer to the last character of line that snarf_string * looked at in snarfed_string_endpoint (which, barring error, will * point to a trailing quotation mark). */ char * snarf_string (char *line) { size_t len; char c, *msg; static char *buf = NULL; static size_t buflen; len = strlen (line) + 1; if (buf == NULL) { buflen = len; if ((buf = malloc (buflen * sizeof (char))) == NULL) return NULL; } else if (len > buflen) { buflen = len; if ((buf = realloc (buf, buflen * sizeof (char))) == NULL) return NULL; } while (isspace (*line)) line++; /* move line past first quotation mark, then look for next one */ for (msg = buf, line++; *line != '"'; line++) { /* look for terminating quotation mark */ switch (*line) { case '\\': switch (c = *++line) { case 'a': { *msg++ = '\a'; break; } case 'b': { *msg++ = '\b'; break; } case 'f': { *msg++ = '\f'; break; } case 'n': { *msg++ = '\n'; break; } case 'r': { *msg++ = '\r'; break; } case 't': { *msg++ = '\t'; break; } case 'v': { *msg++ = '\v'; break; } case '\0': { *msg++ = '\\'; break; } default: { *msg++ = c; break; } } break; case '\0': /* snarfed_string_endpoint is externally visible */ snarfed_string_endpoint = line; return NULL; default: *msg++ = *line; break; } } *msg = '\0'; /* snarfed_string_endpoint is externally visible */ snarfed_string_endpoint = line; return strdup (buf); } /* * trimhash_no_quotes_or_backslash: string -> string * s -> s (s is altered) * * Where s is a null-terminated string, and the return value is s, * stripped of #hash-style comments at line's end. Checks to see * if # is embedded in quotes, "this\"#is#\" a quote" or '#', and * checks to be sure the # itself isn't backslashed \#. */ char * trimhash_no_quotes_or_backslash (char *s) { char *begin; int inside_dquote = 0, inside_squote = 0; for (begin = s; *begin != '\0'; begin++) { switch (*begin) { case '\\': begin++; continue; case '"': if (!inside_squote) inside_dquote = inside_dquote ? 0 : 1; break; case '\'': if (!inside_dquote) inside_squote = inside_squote ? 0 : 1; break; case '#': if (inside_squote || inside_dquote) break; else { *begin = '\0'; return s; } } } return s; } /* * trim: string x string -> string * (s, cs) -> s (s is altered) * * Where s is a string, and cs is a string, and the return value is * arg 1 (s) with a null replacing the last character in s found * also in cs, moving backwards from the end of s. In plainer terms, * trim() trims characters in cs off the end of string s: * * trim("hello, world ", " ") -> "hello, world" */ char * trim (char *s, char *cs) { char *end, *p; /* * For each character in s, moving backwards from the end, see if * that character is in cs. If it is, keep going. If not, stop, * tack on a null after that char., and return. */ for (end = s + strlen (s) - 1; end >= s; end--) { for (p = cs; *p != '\0'; p++) if (*p == *end) break; if (*p == '\0') { *(end + 1) = '\0'; return s; } } *s = '\0'; return s; } /* * strip: string x string -> string * (s, cs) -> s (s is altered) * * Where s is a string, and cs is a string, and the return value is * arg 1 (s) with all characters in it that also occur in arg 2 (cs) * stripped out, e.g.: * * strip("hello, world ", ", ") -> "helloworld" */ char * strip (char *s, char *cs) { char *sp1, *sp2, *csp; for (sp1 = sp2 = s; *sp2 != '\0'; sp2++) { for (csp = cs; *csp != '\0'; csp++) if (*csp == *sp2) break; if (*csp == '\0') *sp1++ = *sp2; } *sp1 = '\0'; return s; } /* * downcase: char * -> char * * s -> s (side effect: changes s) * * Convert all letters in string s to lowercase; return s. Note * that the elements of s are altered; then s is returned. */ char * downcase (char *s) { char *p; for (p = s; *p != '\0'; p++) *p = tolower (*p); return s; } /* * upcase: char * -> char * * s -> s (side effect: changes s) * * Convert all letters in string s to uppercase; return s. Note * that the elements of s are altered; then s is returned. */ char * upcase (char *s) { char *p; for (p = s; *p != '\0'; p++) *p = toupper (*p); return s; } /* * truncate_to: * * Truncate string s (arg 1) to maxlen characters, and place the * result in a static buffer. Trailing null counts as a character - * because the point of this routine is to trim a string down so it * fits into a buffer of a given size. * * Returns a pointer into static storage space that may change on * subsequent calls. This routine is nondestructive. Arg 1 (s) is * not changed. */ char * truncate_to (char *s, size_t maxlen) { size_t i; static size_t buflen; static char *buf = NULL; if (buf == NULL) if ((buf = malloc ((buflen = maxlen) * sizeof (char))) == NULL) return NULL; if (maxlen > buflen) if ((buf = realloc (buf, (buflen = maxlen) * sizeof (char))) == NULL) return NULL; *buf = '\0'; if (maxlen > 0) { if ((i = strlen (s)) < maxlen) strcpy (buf, s); else { if (maxlen <= 4) { memcpy (buf, s, maxlen - 1); buf[maxlen - 1] = '\0'; } else { memcpy (buf, s, maxlen - 4); memcpy (buf + (maxlen - 4), "...", 4); } } } return buf; } /* * replace: * * Replace old_str with new_str in big_str. For example, replace * ("hello, goodbye", "hello", "goodbye") returns a new string, * "goodbye, goodbye". Note that the new string points into static * storage that changes on subsequent calls to replace(). * * Replace cannot fail. At worst, it will simply return a string * identical to big_str. To delete old_str (arg 2), use "" as the * third argument to replace(). */ char * replace (char *big_str, char *old_str, char *new_str) { int do_free = 0; size_t i, old_i; size_t j, k, l, diff = 0; char *p, *sp1, *sp2; static char *str_buf = NULL; static size_t str_buf_len = 0; /* * Check to see if big_str points to a value returned by a previous * call to replace(). If so, strdup it, then free storage later. */ if (str_buf == big_str) { big_str = strdup (big_str); do_free = 1; } old_i = i = strlen (big_str) + 1; j = strlen (old_str); k = strlen (new_str); /* * Make sure that str_buf is at least big enough to hold big_str; * we'll make it bigger later if new_str is also bigger than old_str. */ if (i > str_buf_len) { str_buf = realloc (str_buf, sizeof (char) * i); str_buf_len = i; } /* * Check if new_str is longer than old_str. Remember that size_t * might be unsigned, so be careful with subtractions. */ if (k > j) diff = k - j; p = str_buf; sp1 = sp2 = big_str; while ((sp2 = strstr (sp1, old_str))) { l = sp2 - sp1; memcpy (p, sp1, sizeof (char) * l); p += l; if (diff) if ((i += diff) > str_buf_len) { l = p - str_buf; str_buf = realloc (str_buf, sizeof (char) * i); str_buf_len = i; p = str_buf + l; } memcpy (p, new_str, sizeof (char) * k); sp1 = sp2 + j; p += k; } memcpy (p, sp1, sizeof (char) * ((big_str + old_i) - sp1)); if (do_free) free (big_str); return str_buf; }