From f6ce73bd76ee9b07bb13a6df9a5663a38ccf4013 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Tue, 27 Oct 2009 20:35:17 -0400 Subject: Tokenize numeric values. --- dimension/tokenize.c | 61 ++++++++++++++++++++++++++++++++++++++++++-- dimension/tokenize.h | 5 ++++ tests/dimension/Makefile.am | 2 +- tests/dimension/numeric.pov | 2 ++ tests/dimension/tokenizer.sh | 11 +++++++- 5 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 tests/dimension/numeric.pov diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 7019ad7..c34cfc6 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -40,7 +40,9 @@ dmnsn_tokenize(FILE *file) return NULL; } - char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map; + char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), + *next = map, + *endi, *endf; if (map == MAP_FAILED) { fprintf(stderr, "Couldn't mmap() input stream.\n"); @@ -50,19 +52,28 @@ dmnsn_tokenize(FILE *file) dmnsn_token token; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); + unsigned int line = 0, col = 0; + unsigned int i; + while (next - map < size) { /* Saves us some code repetition in the vast majority of cases */ token.value = NULL; switch (*next) { case ' ': - case '\n': case '\r': case '\t': case '\f': case '\v': /* Skip whitespace */ ++next; + ++col; + continue; + + case '\n': + ++next; + ++line; + col = 0; continue; /* Macro to make basic symbol tokens easier */ @@ -86,6 +97,46 @@ dmnsn_tokenize(FILE *file) dmnsn_simple_token('/', DMNSN_SLASH); dmnsn_simple_token(',', DMNSN_COMMA); + /* Numeric values */ + case '.': /* Number begins with a decimal point, as in `.2' */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + strtoul(next, &endi, 0); + strtod(next, &endf); + if (endf > endi + /* These next conditions catch invalid octal integers being parsed as + floats, eg 08 */ + && (*endi == '.' || *endi == 'e' || *endi == 'E' || *endi == 'p' + || *endi == 'P')) + { + token.type = DMNSN_FLOAT; + token.value = malloc(endf - next + 1); + strncpy(token.value, next, endf - next); + token.value[endf - next] = '\0'; + next = endf; + } else if (endi > next) { + token.type = DMNSN_INT; + token.value = malloc(endi - next + 1); + strncpy(token.value, next, endi - next); + token.value[endi - next] = '\0'; + next = endi; + } else { + fprintf(stderr, "Invalid numeric value on line %u, column %u.\n", + line, col); + dmnsn_delete_tokens(tokens); + munmap(map, size); + return NULL; + } + break; + default: /* Unrecognised character */ fprintf(stderr, "Unrecognized character 0x%X in input.\n", @@ -97,6 +148,7 @@ dmnsn_tokenize(FILE *file) dmnsn_array_push(tokens, &token); ++next; + ++col; } munmap(map, size); @@ -157,6 +209,7 @@ dmnsn_token_name(dmnsn_token_type token_type) case type: \ return str; + /* Punctuation */ dmnsn_token_map(DMNSN_LBRACE, "{"); dmnsn_token_map(DMNSN_RBRACE, "}") dmnsn_token_map(DMNSN_LPAREN, "\\("); @@ -171,6 +224,10 @@ dmnsn_token_name(dmnsn_token_type token_type) dmnsn_token_map(DMNSN_SLASH, "/"); dmnsn_token_map(DMNSN_COMMA, ","); + /* Numeric values */ + dmnsn_token_map(DMNSN_INT, "int"); + dmnsn_token_map(DMNSN_FLOAT, "float"); + default: printf("Warning: unrecognised token %d.\n", (int)token_type); return "unrecognized-token"; diff --git a/dimension/tokenize.h b/dimension/tokenize.h index e64b7eb..91d59f6 100644 --- a/dimension/tokenize.h +++ b/dimension/tokenize.h @@ -20,6 +20,7 @@ #include "../libdimension/dimension.h" typedef enum { + /* Punctuation */ DMNSN_LBRACE, /* { */ DMNSN_RBRACE, /* } */ DMNSN_LPAREN, /* ( */ @@ -33,6 +34,10 @@ typedef enum { DMNSN_STAR, /* * */ DMNSN_SLASH, /* / */ DMNSN_COMMA, /* , */ + + /* Numeric values */ + DMNSN_INT, + DMNSN_FLOAT, } dmnsn_token_type; typedef struct dmnsn_token dmnsn_token; diff --git a/tests/dimension/Makefile.am b/tests/dimension/Makefile.am index 7bacab5..e9b7f7a 100644 --- a/tests/dimension/Makefile.am +++ b/tests/dimension/Makefile.am @@ -25,4 +25,4 @@ TESTS_ENVIRONMENT = top_builddir=$(top_builddir) tokenizer.sh: cp $(srcdir)/tokenizer.sh . -EXTRA_DIST = tokenizer.sh punctuation.pov +EXTRA_DIST = tokenizer.sh punctuation.pov numeric.pov diff --git a/tests/dimension/numeric.pov b/tests/dimension/numeric.pov new file mode 100644 index 0000000..df850d6 --- /dev/null +++ b/tests/dimension/numeric.pov @@ -0,0 +1,2 @@ +1 123456789 01234567 0x123456789 -0x01 +.1 0.1 1.0 0.123456789 -0.123456789 diff --git a/tests/dimension/tokenizer.sh b/tests/dimension/tokenizer.sh index d965741..edd3bb4 100755 --- a/tests/dimension/tokenizer.sh +++ b/tests/dimension/tokenizer.sh @@ -24,6 +24,15 @@ punctuation_exp='({ \( [ < + - * / , > ] \) })' if [ "$punctuation" != "$punctuation_exp" ]; then echo "punctuation.pov tokenized as \"$punctuation\"" >&2 - echo " -- expected \"$punctuation_exp\"" >&2 + echo " -- expected \"$punctuation_exp\"" >&2 + exit 1; +fi + +numeric=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/numeric.pov) +numeric_exp='((int "1") (int "123456789") (int "01234567") (int "0x123456789") - (int "0x01") (float ".1") (float "0.1") (float "1.0") (float "0.123456789") - (float "0.123456789"))' + +if [ "$numeric" != "$numeric_exp" ]; then + echo "numeric.pov tokenized as \"$numeric\"" >&2 + echo " -- expected \"$numeric_exp\"" >&2 exit 1; fi -- cgit v1.2.3