diff options
Diffstat (limited to 'dimension/tokenize.c')
-rw-r--r-- | dimension/tokenize.c | 61 |
1 files changed, 59 insertions, 2 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 7019ad7..c34cfc6 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -40,7 +40,9 @@ dmnsn_tokenize(FILE *file) return NULL; } - char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map; + char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), + *next = map, + *endi, *endf; if (map == MAP_FAILED) { fprintf(stderr, "Couldn't mmap() input stream.\n"); @@ -50,19 +52,28 @@ dmnsn_tokenize(FILE *file) dmnsn_token token; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); + unsigned int line = 0, col = 0; + unsigned int i; + while (next - map < size) { /* Saves us some code repetition in the vast majority of cases */ token.value = NULL; switch (*next) { case ' ': - case '\n': case '\r': case '\t': case '\f': case '\v': /* Skip whitespace */ ++next; + ++col; + continue; + + case '\n': + ++next; + ++line; + col = 0; continue; /* Macro to make basic symbol tokens easier */ @@ -86,6 +97,46 @@ dmnsn_tokenize(FILE *file) dmnsn_simple_token('/', DMNSN_SLASH); dmnsn_simple_token(',', DMNSN_COMMA); + /* Numeric values */ + case '.': /* Number begins with a decimal point, as in `.2' */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + strtoul(next, &endi, 0); + strtod(next, &endf); + if (endf > endi + /* These next conditions catch invalid octal integers being parsed as + floats, eg 08 */ + && (*endi == '.' || *endi == 'e' || *endi == 'E' || *endi == 'p' + || *endi == 'P')) + { + token.type = DMNSN_FLOAT; + token.value = malloc(endf - next + 1); + strncpy(token.value, next, endf - next); + token.value[endf - next] = '\0'; + next = endf; + } else if (endi > next) { + token.type = DMNSN_INT; + token.value = malloc(endi - next + 1); + strncpy(token.value, next, endi - next); + token.value[endi - next] = '\0'; + next = endi; + } else { + fprintf(stderr, "Invalid numeric value on line %u, column %u.\n", + line, col); + dmnsn_delete_tokens(tokens); + munmap(map, size); + return NULL; + } + break; + default: /* Unrecognised character */ fprintf(stderr, "Unrecognized character 0x%X in input.\n", @@ -97,6 +148,7 @@ dmnsn_tokenize(FILE *file) dmnsn_array_push(tokens, &token); ++next; + ++col; } munmap(map, size); @@ -157,6 +209,7 @@ dmnsn_token_name(dmnsn_token_type token_type) case type: \ return str; + /* Punctuation */ dmnsn_token_map(DMNSN_LBRACE, "{"); dmnsn_token_map(DMNSN_RBRACE, "}") dmnsn_token_map(DMNSN_LPAREN, "\\("); @@ -171,6 +224,10 @@ dmnsn_token_name(dmnsn_token_type token_type) dmnsn_token_map(DMNSN_SLASH, "/"); dmnsn_token_map(DMNSN_COMMA, ","); + /* Numeric values */ + dmnsn_token_map(DMNSN_INT, "int"); + dmnsn_token_map(DMNSN_FLOAT, "float"); + default: printf("Warning: unrecognised token %d.\n", (int)token_type); return "unrecognized-token"; |