From 30754dcca0f8a4d553e9c865f7cd27f85d2919ee Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Tue, 27 Oct 2009 00:02:47 -0400 Subject: New mmap-based tokenizer. --- dimension/tokenize.c | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'dimension/tokenize.c') diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 6a9a723..efa7fce 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -21,35 +21,55 @@ #include #include #include +#include +#include dmnsn_array * dmnsn_tokenize(FILE *file) { - char c; + int fd = fileno(file); + off_t size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map; + dmnsn_token token; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); - while (!feof(file)) { - fread(&c, 1, 1, file); - - if (isspace(c)) - continue; - - if (c == '{') { - token.type = DMNSN_LBRACE; + while (next - map < size) { + switch (*next) { + case ' ': + case '\n': + case '\r': + case '\t': + case '\f': + case '\v': + /* Skip whitespace */ + break; + + case '{': + token.type = DMNSN_LBRACE; token.value = NULL; dmnsn_array_push(tokens, &token); - } else if (c == '}') { - token.type = DMNSN_RBRACE; + break; + + case '}': + token.type = DMNSN_LBRACE; token.value = NULL; dmnsn_array_push(tokens, &token); - } else { - /* Invalid character */ + break; + + default: + /* Unrecognised character */ + fprintf(stderr, "Unrecognized character 0x%X in input.\n", (unsigned int)*next); dmnsn_delete_tokens(tokens); + munmap(map, size); return NULL; } + + ++next; } + munmap(map, size); return tokens; } -- cgit v1.2.3