From a9e0784cbcd32dbd6184b280accaafd1c9575ba5 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 29 Oct 2009 00:44:27 -0400 Subject: Complete string parsing. --- dimension/tokenize.c | 75 +++++++++++++++++++++++++++++++++++++++++---- tests/dimension/strings.pov | 2 +- tests/dimension/strings.sh | 3 +- 3 files changed, 71 insertions(+), 9 deletions(-) diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 9eeab45..e34564c 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -220,6 +220,8 @@ dmnsn_tokenize_string(const char *filename, char *map, size_t size, char **next, dmnsn_token *token) { unsigned int i = 0, alloc = 32; + char unicode[5] = { 0 }, *end; + unsigned long wchar; if (**next != '"') { return 1; @@ -240,18 +242,74 @@ dmnsn_tokenize_string(const char *filename, ++*next; switch (**next) { - case '\\': - token->value[i] = '\\'; + case 'a': + token->value[i] = '\a'; break; - case '"': - token->value[i] = '"'; + case 'b': + token->value[i] = '\b'; + break; + + case 'f': + token->value[i] = '\f'; break; case 'n': token->value[i] = '\n'; break; + case 'r': + token->value[i] = '\r'; + break; + + case 't': + token->value[i] = '\t'; + break; + + case 'u': + /* Escaped unicode character */ + strncpy(unicode, *next + 1, 4); + wchar = strtoul(unicode, &end, 16); + if (*next - map >= size - 4) { + dmnsn_diagnostic(filename, *line, *col, + "EOF before end of escape sequence"); + free(token->value); + return 1; + } + if (end != &unicode[4]) { + dmnsn_diagnostic(filename, *line, *col, + "WARNING: Invalid unicode character \"\\u%s\"", + unicode); + } else { + token->value[i] = wchar/256; + ++i; + if (i + 1 >= alloc) { + alloc *= 2; + token->value = realloc(token->value, alloc); + } + token->value[i] = wchar%256; + + *col += 4; + *next += 4; + } + break; + + case 'v': + token->value[i] = '\v'; + break; + + case '\\': + token->value[i] = '\\'; + break; + + case '\'': + token->value[i] = '\''; + break; + + case '"': + token->value[i] = '"'; + break; + default: dmnsn_diagnostic(filename, *line, *col, "WARNING: unrecognised escape sequence '\\%c'", @@ -267,10 +325,15 @@ dmnsn_tokenize_string(const char *filename, ++*col; ++*next; } - ++*next; - token->value[i] = '\0'; + if (**next != '"') { + dmnsn_diagnostic(filename, *line, *col, "Non-terminated string"); + free(token->value); + return 1; + } + ++*next; + token->value[i] = '\0'; return 0; } diff --git a/tests/dimension/strings.pov b/tests/dimension/strings.pov index 307b774..663afdd 100644 --- a/tests/dimension/strings.pov +++ b/tests/dimension/strings.pov @@ -18,4 +18,4 @@ *************************************************************************/ // Test string handling, including escape sequences -"This is a string with\n\"escape sequences\"\\" +"This is a string with escape sequences: \a\b\f\n\r\t\u2123\v\\\'\"" \ No newline at end of file diff --git a/tests/dimension/strings.sh b/tests/dimension/strings.sh index 7557100..58e0751 100755 --- a/tests/dimension/strings.sh +++ b/tests/dimension/strings.sh @@ -20,8 +20,7 @@ ######################################################################### strings=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/strings.pov) -strings_exp='((string "This is a string with -"escape sequences"\"))' +strings_exp=$(echo -e "((string \"This is a string with escape sequences: \a\b\f\n\r\t!#\v\\\'\"\"))") if [ "$strings" != "$strings_exp" ]; then echo "strings.pov tokenized as \"$strings\"" >&2 -- cgit v1.2.3