summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@gmail.com>2009-10-29 01:34:28 -0400
committerTavian Barnes <tavianator@gmail.com>2009-10-29 01:34:28 -0400
commit87cb2d172843e114a8640de3fde61db3a2bf0a6a (patch)
tree759747df1cde1f8647ea34e7738b3d013197128b
parent7e77347d6135a9cffe82f5b1384a34fe3cd4c5cc (diff)
downloaddimension-87cb2d172843e114a8640de3fde61db3a2bf0a6a.tar.xz
Even more tokenizer work.
The tokenizer is now able to tokenize all of POV-Ray 3.6's include files. It turns out that some #includes may be dynamic (ex. with #writes beforehand), so the tokenizer can't directly perform includes - the executor must do this.
-rw-r--r--dimension/tokenize.c175
-rw-r--r--dimension/tokenize.h36
-rwxr-xr-xtests/dimension/directives.sh2
3 files changed, 96 insertions, 117 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c
index 7a38f59..9a091f7 100644
--- a/dimension/tokenize.c
+++ b/dimension/tokenize.c
@@ -178,9 +178,15 @@ dmnsn_tokenize_directive(const char *filename,
return 1;
}
+ ++*next;
+ /* Handle spaces between `#' and directive */
+ while (*next - map < size && (**next == ' ' || **next == '\t')) {
+ ++*next;
+ }
+
char *directive = malloc(alloc);
- do {
+ while (*next - map < size && (isalnum(**next) || **next == '_')) {
if (i + 1 >= alloc) {
alloc *= 2;
directive = realloc(directive, alloc);
@@ -191,7 +197,7 @@ dmnsn_tokenize_directive(const char *filename,
++i;
++*col;
++*next;
- } while (*next - map < size && (isalnum(**next) || **next == '_'));
+ }
directive[i] = '\0';
@@ -206,32 +212,32 @@ dmnsn_tokenize_directive(const char *filename,
} \
} while (0)
- dmnsn_directive("#break", DMNSN_T_BREAK);
- dmnsn_directive("#case", DMNSN_T_CASE);
- dmnsn_directive("#debug", DMNSN_T_DEBUG);
- dmnsn_directive("#declare", DMNSN_T_DECLARE);
- dmnsn_directive("#default", DMNSN_T_DEFAULT);
- dmnsn_directive("#else", DMNSN_T_ELSE);
- dmnsn_directive("#end", DMNSN_T_END);
- dmnsn_directive("#error", DMNSN_T_ERROR);
- dmnsn_directive("#fclose", DMNSN_T_FCLOSE);
- dmnsn_directive("#fopen", DMNSN_T_FOPEN);
- dmnsn_directive("#if", DMNSN_T_IF);
- dmnsn_directive("#ifdef", DMNSN_T_IFDEF);
- dmnsn_directive("#ifndef", DMNSN_T_IFNDEF);
- dmnsn_directive("#include", DMNSN_T_INCLUDE);
- dmnsn_directive("#local", DMNSN_T_LOCAL);
- dmnsn_directive("#macro", DMNSN_T_MACRO);
- dmnsn_directive("#range", DMNSN_T_RANGE);
- dmnsn_directive("#read", DMNSN_T_READ);
- dmnsn_directive("#render", DMNSN_T_RENDER);
- dmnsn_directive("#statistics", DMNSN_T_STATISTICS);
- dmnsn_directive("#switch", DMNSN_T_SWITCH);
- dmnsn_directive("#undef", DMNSN_T_UNDEF);
- dmnsn_directive("#version", DMNSN_T_VERSION);
- dmnsn_directive("#warning", DMNSN_T_WARNING);
- dmnsn_directive("#while", DMNSN_T_WHILE);
- dmnsn_directive("#write", DMNSN_T_WRITE);
+ dmnsn_directive("break", DMNSN_T_BREAK);
+ dmnsn_directive("case", DMNSN_T_CASE);
+ dmnsn_directive("debug", DMNSN_T_DEBUG);
+ dmnsn_directive("declare", DMNSN_T_DECLARE);
+ dmnsn_directive("default", DMNSN_T_DEFAULT);
+ dmnsn_directive("else", DMNSN_T_ELSE);
+ dmnsn_directive("end", DMNSN_T_END);
+ dmnsn_directive("error", DMNSN_T_ERROR);
+ dmnsn_directive("fclose", DMNSN_T_FCLOSE);
+ dmnsn_directive("fopen", DMNSN_T_FOPEN);
+ dmnsn_directive("if", DMNSN_T_IF);
+ dmnsn_directive("ifdef", DMNSN_T_IFDEF);
+ dmnsn_directive("ifndef", DMNSN_T_IFNDEF);
+ dmnsn_directive("include", DMNSN_T_INCLUDE);
+ dmnsn_directive("local", DMNSN_T_LOCAL);
+ dmnsn_directive("macro", DMNSN_T_MACRO);
+ dmnsn_directive("range", DMNSN_T_RANGE);
+ dmnsn_directive("read", DMNSN_T_READ);
+ dmnsn_directive("render", DMNSN_T_RENDER);
+ dmnsn_directive("statistics", DMNSN_T_STATISTICS);
+ dmnsn_directive("switch", DMNSN_T_SWITCH);
+ dmnsn_directive("undef", DMNSN_T_UNDEF);
+ dmnsn_directive("version", DMNSN_T_VERSION);
+ dmnsn_directive("warning", DMNSN_T_WARNING);
+ dmnsn_directive("while", DMNSN_T_WHILE);
+ dmnsn_directive("write", DMNSN_T_WRITE);
free(directive);
return 1;
@@ -441,6 +447,13 @@ dmnsn_tokenize(const char *filename, FILE *file)
dmnsn_simple_token('-', DMNSN_T_MINUS);
dmnsn_simple_token('*', DMNSN_T_STAR);
dmnsn_simple_token(',', DMNSN_T_COMMA);
+ dmnsn_simple_token('=', DMNSN_T_EQUALS);
+ dmnsn_simple_token(';', DMNSN_T_SEMICOLON);
+ dmnsn_simple_token('?', DMNSN_T_QUESTION);
+ dmnsn_simple_token(':', DMNSN_T_COLON);
+ dmnsn_simple_token('&', DMNSN_T_AND);
+ dmnsn_simple_token('!', DMNSN_T_EXCLAMATION);
+ dmnsn_simple_token('|', DMNSN_T_PIPE);
/* Possible comment */
case '/':
@@ -456,7 +469,6 @@ dmnsn_tokenize(const char *filename, FILE *file)
break;
/* Numeric values */
- case '.': /* Number begins with a decimal point, as in `.2' */
case '0':
case '1':
case '2':
@@ -474,68 +486,19 @@ dmnsn_tokenize(const char *filename, FILE *file)
}
break;
+ case '.': /* Number may begin with a decimal point, as in `.2' */
+ if (dmnsn_tokenize_number(filename, &line, &col,
+ map, size, &next, &token) != 0) {
+ token.type = DMNSN_T_DOT;
+ ++col;
+ ++next;
+ }
+ break;
+
case '#':
/* Language directive */
if (dmnsn_tokenize_directive(filename, &line, &col,
- map, size, &next, &token) == 0) {
- if (token.type == DMNSN_T_INCLUDE) {
- /* Skip whitespace */
- while (next - map < size && isspace(*next) && *next != '\n') {
- ++next;
- }
-
- if (dmnsn_tokenize_string(filename, &line, &col,
- map, size, &next, &token) != 0) {
- dmnsn_diagnostic(filename, line, col,
- "Expected string after #include");
- goto bailout;
- }
-
- /* Search in same directory as current file */
- char *filename_copy = strdup(filename);
- char *localdir = dirname(filename_copy);
- char *local_include = malloc(strlen(localdir)
- + strlen(token.value)
- + 2);
- strcpy(local_include, localdir);
- strcat(local_include, "/");
- strcat(local_include, token.value);
- free(filename_copy);
- free(token.value);
-
- /* Try to open the included file */
- FILE *include = fopen(local_include, "r");
- if (!include) {
- dmnsn_diagnostic(filename, line, col,
- "Couldn't open included file \"%s\"",
- local_include);
- free(local_include);
- goto bailout;
- }
-
- /* Parse it recursively */
- dmnsn_array *included_tokens = dmnsn_tokenize(local_include, include);
- if (!included_tokens) {
- dmnsn_diagnostic(filename, line, col,
- "Error tokenizing included file \"%s\"",
- local_include);
- free(local_include);
- goto bailout;
- }
-
- fclose(include);
- free(local_include);
-
- /* Append the tokens from the included file */
- unsigned int i;
- for (i = 0; i < dmnsn_array_size(included_tokens); ++i) {
- dmnsn_array_push(tokens, dmnsn_array_at(included_tokens, i));
- }
-
- dmnsn_delete_array(included_tokens);
- continue;
- }
- } else {
+ map, size, &next, &token) != 0) {
dmnsn_diagnostic(filename, line, col, "Invalid language directive");
goto bailout;
}
@@ -644,19 +607,27 @@ dmnsn_token_name(dmnsn_token_type token_type)
return str;
/* Punctuation */
- dmnsn_token_map(DMNSN_T_LBRACE, "{");
- dmnsn_token_map(DMNSN_T_RBRACE, "}")
- dmnsn_token_map(DMNSN_T_LPAREN, "\\(");
- dmnsn_token_map(DMNSN_T_RPAREN, "\\)");
- dmnsn_token_map(DMNSN_T_LBRACKET, "[");
- dmnsn_token_map(DMNSN_T_RBRACKET, "]");
- dmnsn_token_map(DMNSN_T_LT, "<");
- dmnsn_token_map(DMNSN_T_GT, ">");
- dmnsn_token_map(DMNSN_T_PLUS, "+");
- dmnsn_token_map(DMNSN_T_MINUS, "-");
- dmnsn_token_map(DMNSN_T_STAR, "*");
- dmnsn_token_map(DMNSN_T_SLASH, "/");
- dmnsn_token_map(DMNSN_T_COMMA, ",");
+ dmnsn_token_map(DMNSN_T_LBRACE, "{");
+ dmnsn_token_map(DMNSN_T_RBRACE, "}")
+ dmnsn_token_map(DMNSN_T_LPAREN, "\\(");
+ dmnsn_token_map(DMNSN_T_RPAREN, "\\)");
+ dmnsn_token_map(DMNSN_T_LBRACKET, "[");
+ dmnsn_token_map(DMNSN_T_RBRACKET, "]");
+ dmnsn_token_map(DMNSN_T_LT, "<");
+ dmnsn_token_map(DMNSN_T_GT, ">");
+ dmnsn_token_map(DMNSN_T_PLUS, "+");
+ dmnsn_token_map(DMNSN_T_MINUS, "-");
+ dmnsn_token_map(DMNSN_T_STAR, "*");
+ dmnsn_token_map(DMNSN_T_SLASH, "/");
+ dmnsn_token_map(DMNSN_T_COMMA, ",");
+ dmnsn_token_map(DMNSN_T_EQUALS, "=");
+ dmnsn_token_map(DMNSN_T_SEMICOLON, ";");
+ dmnsn_token_map(DMNSN_T_QUESTION, "?");
+ dmnsn_token_map(DMNSN_T_COLON, ":");
+ dmnsn_token_map(DMNSN_T_AND, "&");
+ dmnsn_token_map(DMNSN_T_EXCLAMATION, "!");
+ dmnsn_token_map(DMNSN_T_DOT, ".");
+ dmnsn_token_map(DMNSN_T_PIPE, "|");
/* Numeric values */
dmnsn_token_map(DMNSN_T_INT, "int");
@@ -703,7 +674,7 @@ dmnsn_token_name(dmnsn_token_type token_type)
dmnsn_token_map(DMNSN_T_IDENTIFIER, "identifier");
default:
- printf("Warning: unrecognised token %d.\n", (int)token_type);
+ fprintf(stderr, "Warning: unrecognised token %d.\n", (int)token_type);
return "unrecognized-token";
}
}
diff --git a/dimension/tokenize.h b/dimension/tokenize.h
index a90073b..7a36232 100644
--- a/dimension/tokenize.h
+++ b/dimension/tokenize.h
@@ -21,19 +21,27 @@
typedef enum {
/* Punctuation */
- DMNSN_T_LBRACE, /* { */
- DMNSN_T_RBRACE, /* } */
- DMNSN_T_LPAREN, /* ( */
- DMNSN_T_RPAREN, /* ) */
- DMNSN_T_LBRACKET, /* [ */
- DMNSN_T_RBRACKET, /* ] */
- DMNSN_T_LT, /* < */
- DMNSN_T_GT, /* > */
- DMNSN_T_PLUS, /* + */
- DMNSN_T_MINUS, /* - */
- DMNSN_T_STAR, /* * */
- DMNSN_T_SLASH, /* / */
- DMNSN_T_COMMA, /* , */
+ DMNSN_T_LBRACE, /* { */
+ DMNSN_T_RBRACE, /* } */
+ DMNSN_T_LPAREN, /* ( */
+ DMNSN_T_RPAREN, /* ) */
+ DMNSN_T_LBRACKET, /* [ */
+ DMNSN_T_RBRACKET, /* ] */
+ DMNSN_T_LT, /* < */
+ DMNSN_T_GT, /* > */
+ DMNSN_T_PLUS, /* + */
+ DMNSN_T_MINUS, /* - */
+ DMNSN_T_STAR, /* * */
+ DMNSN_T_SLASH, /* / */
+ DMNSN_T_COMMA, /* , */
+ DMNSN_T_EQUALS, /* = */
+ DMNSN_T_SEMICOLON, /* ; */
+ DMNSN_T_QUESTION, /* ? */
+ DMNSN_T_COLON, /* : */
+ DMNSN_T_AND, /* & */
+ DMNSN_T_EXCLAMATION, /* ! */
+ DMNSN_T_DOT, /* . */
+ DMNSN_T_PIPE, /* | */
/* Numeric values */
DMNSN_T_INT,
@@ -59,7 +67,7 @@ typedef enum {
DMNSN_T_IF,
DMNSN_T_IFDEF,
DMNSN_T_IFNDEF,
- DMNSN_T_INCLUDE, /* Only used internally */
+ DMNSN_T_INCLUDE,
DMNSN_T_LOCAL,
DMNSN_T_MACRO,
DMNSN_T_RANGE,
diff --git a/tests/dimension/directives.sh b/tests/dimension/directives.sh
index a4114a8..68c01e3 100755
--- a/tests/dimension/directives.sh
+++ b/tests/dimension/directives.sh
@@ -20,7 +20,7 @@
#########################################################################
directives=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/directives.pov)
-directives_exp='({ \( [ < + - * / , > ] \) } #declare (identifier "x"))';
+directives_exp='(#include (string "punctuation.pov") #declare (identifier "x"))';
if [ "$directives" != "$directives_exp" ]; then
echo "directives.pov tokenized as \"$directives\"" >&2