From 8ce6029a6a14c0ef003c88cb9fb29e522c022f9c Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Wed, 28 Oct 2009 18:57:31 -0400 Subject: Tokenize strings and language directives. --- dimension/tokenize.c | 143 ++++++++++++++++++++++++++++++++++++++++- dimension/tokenize.h | 7 ++ tests/dimension/directives.pov | 24 +++++++ tests/dimension/strings.pov | 21 ++++++ tests/dimension/tokenizer.sh | 19 ++++++ 5 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 tests/dimension/directives.pov create mode 100644 tests/dimension/strings.pov diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 8401dc7..97b26be 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -121,7 +121,7 @@ dmnsn_tokenize_label(char *map, size_t size, dmnsn_token *token, ++i; ++*col; ++*next; - } while (*next - map < size && (isalnum(**next) || **next == '_')); + } while (*next - map < size && (isalnum(**next) || **next == '_')); token->value[i] = '\0'; @@ -146,6 +146,113 @@ dmnsn_tokenize_label(char *map, size_t size, dmnsn_token *token, return 0; } +/* Tokenize a language directive (#include, #declare, etc.) */ +static int +dmnsn_tokenize_directive(char *map, size_t size, dmnsn_token *token, + char **next, unsigned int *line, unsigned int *col) +{ + unsigned int i = 0, alloc = 32; + + if (**next != '#') { + return 1; + } + + char *directive = malloc(alloc); + + do { + if (i + 1 >= alloc) { + alloc *= 2; + directive = realloc(directive, alloc); + } + + directive[i] = **next; + + ++i; + ++*col; + ++*next; + } while (*next - map < size && (isalnum(**next) || **next == '_')); + + directive[i] = '\0'; + + /* Now check if we really found a directive */ + +#define dmnsn_directive(str, tp) \ + do { \ + if (strcmp(directive, str) == 0) { \ + free(directive); \ + token->type = tp; \ + return 0; \ + } \ + } while (0) + + dmnsn_directive("#include", DMNSN_INCLUDE); + dmnsn_directive("#declare", DMNSN_DECLARE); + + free(directive); + return 1; +} + +/* Tokenize a string */ +static int +dmnsn_tokenize_string(char *map, size_t size, dmnsn_token *token, + char **next, unsigned int *line, unsigned int *col) +{ + unsigned int i = 0, alloc = 32; + + if (**next != '"') { + return 1; + } + + token->type = DMNSN_STRING; + token->value = malloc(alloc); + + ++*next; + while (*next - map < size && **next != '"') { + if (i + 1 >= alloc) { + alloc *= 2; + token->value = realloc(token->value, alloc); + } + + if (**next == '\\') { + ++*col; + ++*next; + + switch (**next) { + case '\\': + token->value[i] = '\\'; + break; + + case '"': + token->value[i] = '"'; + break; + + case 'n': + token->value[i] = '\n'; + break; + + default: + fprintf(stderr, + "Warning: unrecognised escape sequence '\\%c'" + " on line %u, column %u\n", + (int)**next, *line, *col); + token->value[i] = **next; + break; + } + } else { + token->value[i] = **next; + } + + ++i; + ++*col; + ++*next; + } + ++*next; + + token->value[i] = '\0'; + + return 0; +} + dmnsn_array * dmnsn_tokenize(FILE *file) { @@ -250,14 +357,37 @@ dmnsn_tokenize(FILE *file) } break; + case '#': + /* Language directive */ + if (dmnsn_tokenize_directive(map, size, &token, + &next, &line, &col) == 0) { + if (token.type == DMNSN_INCLUDE) { + } + } else { + fprintf(stderr, "Invalid directive on line %u, column %u.\n", + line, col); + goto bailout; + } + break; + + case '"': + if (dmnsn_tokenize_string(map, size, &token, &next, &line, &col) != 0) { + fprintf(stderr, "Invalid string on line %u, column %u.\n", + line, col); + goto bailout; + } + break; + default: if (dmnsn_tokenize_label(map, size, &token, &next, &line, &col) != 0) { /* Unrecognised character */ fprintf(stderr, - "Unrecognized character 0x%X in input at line %u, column %u.\n", - (unsigned int)*next, line, col); + "Unrecognized character '%c' (0x%X) in input at line %u," + " column %u.\n", + (int)*next, (unsigned int)*next, line, col); goto bailout; } + break; } dmnsn_array_push(tokens, &token); @@ -351,6 +481,13 @@ dmnsn_token_name(dmnsn_token_type token_type) dmnsn_token_map(DMNSN_SPHERE, "sphere"); dmnsn_token_map(DMNSN_BOX, "box"); + /* Directives */ + dmnsn_token_map(DMNSN_INCLUDE, "#include"); + dmnsn_token_map(DMNSN_DECLARE, "#declare"); + + /* Strings */ + dmnsn_token_map(DMNSN_STRING, "string"); + /* Identifiers */ dmnsn_token_map(DMNSN_IDENTIFIER, "identifier"); diff --git a/dimension/tokenize.h b/dimension/tokenize.h index 2e82f98..cc8148f 100644 --- a/dimension/tokenize.h +++ b/dimension/tokenize.h @@ -45,8 +45,15 @@ typedef enum { DMNSN_SPHERE, DMNSN_BOX, + /* Directives (#declare, etc.) */ + DMNSN_INCLUDE, /* Only used internally */ + DMNSN_DECLARE, + /* Identifiers */ DMNSN_IDENTIFIER, + + /* Strings */ + DMNSN_STRING, } dmnsn_token_type; typedef struct dmnsn_token dmnsn_token; diff --git a/tests/dimension/directives.pov b/tests/dimension/directives.pov new file mode 100644 index 0000000..eebedaa --- /dev/null +++ b/tests/dimension/directives.pov @@ -0,0 +1,24 @@ +/************************************************************************* + * Copyright (C) 2009 Tavian Barnes * + * * + * This file is part of The Dimension Test Suite. * + * * + * The Dimension Test Suite is free software; you can redistribute it * + * and/or modify it under the terms of the GNU General Public License as * + * published by the Free Software Foundation; either version 3 of the * + * License, or (at your option) any later version. * + * * + * The Dimension Test Suite is distributed in the hope that it will be * + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program. If not, see . * + *************************************************************************/ + +// Test the language directives + +#include "punctuation.pov" + +#declare x \ No newline at end of file diff --git a/tests/dimension/strings.pov b/tests/dimension/strings.pov new file mode 100644 index 0000000..307b774 --- /dev/null +++ b/tests/dimension/strings.pov @@ -0,0 +1,21 @@ +/************************************************************************* + * Copyright (C) 2009 Tavian Barnes * + * * + * This file is part of The Dimension Test Suite. * + * * + * The Dimension Test Suite is free software; you can redistribute it * + * and/or modify it under the terms of the GNU General Public License as * + * published by the Free Software Foundation; either version 3 of the * + * License, or (at your option) any later version. * + * * + * The Dimension Test Suite is distributed in the hope that it will be * + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program. If not, see . * + *************************************************************************/ + +// Test string handling, including escape sequences +"This is a string with\n\"escape sequences\"\\" diff --git a/tests/dimension/tokenizer.sh b/tests/dimension/tokenizer.sh index 6d67da3..f1f2b6b 100755 --- a/tests/dimension/tokenizer.sh +++ b/tests/dimension/tokenizer.sh @@ -39,6 +39,16 @@ if [ "$numeric" != "$numeric_exp" ]; then exitstatus=1 fi +strings=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/strings.pov) +strings_exp='((string "This is a string with +"escape sequences"\"))' + +if [ "$strings" != "$strings_exp" ]; then + echo "strings.pov tokenized as \"$strings\"" >&2 + echo " -- expected \"$strings_exp\"" >&2 + exitstatus=1 +fi + labels=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/labels.pov) labels_exp='(camera { } sphere { color (identifier "new_identifier") } box { color (identifier "new_identifier") })'; @@ -48,4 +58,13 @@ if [ "$labels" != "$labels_exp" ]; then exitstatus=1 fi +directives=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/directives.pov) +directives_exp='(#include (string "punctuation.pov") #declare (identifier "x"))'; + +if [ "$directives" != "$directives_exp" ]; then + echo "directives.pov tokenized as \"$directives\"" >&2 + echo " -- expected \"$directives_exp\"" >&2 + exitstatus=1 +fi + exit $exitstatus -- cgit v1.2.3