From a9e0784cbcd32dbd6184b280accaafd1c9575ba5 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 29 Oct 2009 00:44:27 -0400 Subject: Complete string parsing. --- dimension/tokenize.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 6 deletions(-) (limited to 'dimension/tokenize.c') diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 9eeab45..e34564c 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -220,6 +220,8 @@ dmnsn_tokenize_string(const char *filename, char *map, size_t size, char **next, dmnsn_token *token) { unsigned int i = 0, alloc = 32; + char unicode[5] = { 0 }, *end; + unsigned long wchar; if (**next != '"') { return 1; @@ -240,18 +242,74 @@ dmnsn_tokenize_string(const char *filename, ++*next; switch (**next) { - case '\\': - token->value[i] = '\\'; + case 'a': + token->value[i] = '\a'; break; - case '"': - token->value[i] = '"'; + case 'b': + token->value[i] = '\b'; + break; + + case 'f': + token->value[i] = '\f'; break; case 'n': token->value[i] = '\n'; break; + case 'r': + token->value[i] = '\r'; + break; + + case 't': + token->value[i] = '\t'; + break; + + case 'u': + /* Escaped unicode character */ + strncpy(unicode, *next + 1, 4); + wchar = strtoul(unicode, &end, 16); + if (*next - map >= size - 4) { + dmnsn_diagnostic(filename, *line, *col, + "EOF before end of escape sequence"); + free(token->value); + return 1; + } + if (end != &unicode[4]) { + dmnsn_diagnostic(filename, *line, *col, + "WARNING: Invalid unicode character \"\\u%s\"", + unicode); + } else { + token->value[i] = wchar/256; + ++i; + if (i + 1 >= alloc) { + alloc *= 2; + token->value = realloc(token->value, alloc); + } + token->value[i] = wchar%256; + + *col += 4; + *next += 4; + } + break; + + case 'v': + token->value[i] = '\v'; + break; + + case '\\': + token->value[i] = '\\'; + break; + + case '\'': + token->value[i] = '\''; + break; + + case '"': + token->value[i] = '"'; + break; + default: dmnsn_diagnostic(filename, *line, *col, "WARNING: unrecognised escape sequence '\\%c'", @@ -267,10 +325,15 @@ dmnsn_tokenize_string(const char *filename, ++*col; ++*next; } - ++*next; - token->value[i] = '\0'; + if (**next != '"') { + dmnsn_diagnostic(filename, *line, *col, "Non-terminated string"); + free(token->value); + return 1; + } + ++*next; + token->value[i] = '\0'; return 0; } -- cgit v1.2.3