summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@gmail.com>2009-10-27 20:35:17 -0400
committerTavian Barnes <tavianator@gmail.com>2009-10-27 20:35:17 -0400
commitf6ce73bd76ee9b07bb13a6df9a5663a38ccf4013 (patch)
treef51f468548dae6ee78ff78bb0d43d9a914b1cdc3
parent50d844e91589a8f51cade42b732bcd1de889987b (diff)
downloaddimension-f6ce73bd76ee9b07bb13a6df9a5663a38ccf4013.tar.xz
Tokenize numeric values.
-rw-r--r--dimension/tokenize.c61
-rw-r--r--dimension/tokenize.h5
-rw-r--r--tests/dimension/Makefile.am2
-rw-r--r--tests/dimension/numeric.pov2
-rwxr-xr-xtests/dimension/tokenizer.sh11
5 files changed, 77 insertions, 4 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c
index 7019ad7..c34cfc6 100644
--- a/dimension/tokenize.c
+++ b/dimension/tokenize.c
@@ -40,7 +40,9 @@ dmnsn_tokenize(FILE *file)
return NULL;
}
- char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map;
+ char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0),
+ *next = map,
+ *endi, *endf;
if (map == MAP_FAILED) {
fprintf(stderr, "Couldn't mmap() input stream.\n");
@@ -50,19 +52,28 @@ dmnsn_tokenize(FILE *file)
dmnsn_token token;
dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token));
+ unsigned int line = 0, col = 0;
+ unsigned int i;
+
while (next - map < size) {
/* Saves us some code repetition in the vast majority of cases */
token.value = NULL;
switch (*next) {
case ' ':
- case '\n':
case '\r':
case '\t':
case '\f':
case '\v':
/* Skip whitespace */
++next;
+ ++col;
+ continue;
+
+ case '\n':
+ ++next;
+ ++line;
+ col = 0;
continue;
/* Macro to make basic symbol tokens easier */
@@ -86,6 +97,46 @@ dmnsn_tokenize(FILE *file)
dmnsn_simple_token('/', DMNSN_SLASH);
dmnsn_simple_token(',', DMNSN_COMMA);
+ /* Numeric values */
+ case '.': /* Number begins with a decimal point, as in `.2' */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ strtoul(next, &endi, 0);
+ strtod(next, &endf);
+ if (endf > endi
+ /* These next conditions catch invalid octal integers being parsed as
+ floats, eg 08 */
+ && (*endi == '.' || *endi == 'e' || *endi == 'E' || *endi == 'p'
+ || *endi == 'P'))
+ {
+ token.type = DMNSN_FLOAT;
+ token.value = malloc(endf - next + 1);
+ strncpy(token.value, next, endf - next);
+ token.value[endf - next] = '\0';
+ next = endf;
+ } else if (endi > next) {
+ token.type = DMNSN_INT;
+ token.value = malloc(endi - next + 1);
+ strncpy(token.value, next, endi - next);
+ token.value[endi - next] = '\0';
+ next = endi;
+ } else {
+ fprintf(stderr, "Invalid numeric value on line %u, column %u.\n",
+ line, col);
+ dmnsn_delete_tokens(tokens);
+ munmap(map, size);
+ return NULL;
+ }
+ break;
+
default:
/* Unrecognised character */
fprintf(stderr, "Unrecognized character 0x%X in input.\n",
@@ -97,6 +148,7 @@ dmnsn_tokenize(FILE *file)
dmnsn_array_push(tokens, &token);
++next;
+ ++col;
}
munmap(map, size);
@@ -157,6 +209,7 @@ dmnsn_token_name(dmnsn_token_type token_type)
case type: \
return str;
+ /* Punctuation */
dmnsn_token_map(DMNSN_LBRACE, "{");
dmnsn_token_map(DMNSN_RBRACE, "}")
dmnsn_token_map(DMNSN_LPAREN, "\\(");
@@ -171,6 +224,10 @@ dmnsn_token_name(dmnsn_token_type token_type)
dmnsn_token_map(DMNSN_SLASH, "/");
dmnsn_token_map(DMNSN_COMMA, ",");
+ /* Numeric values */
+ dmnsn_token_map(DMNSN_INT, "int");
+ dmnsn_token_map(DMNSN_FLOAT, "float");
+
default:
printf("Warning: unrecognised token %d.\n", (int)token_type);
return "unrecognized-token";
diff --git a/dimension/tokenize.h b/dimension/tokenize.h
index e64b7eb..91d59f6 100644
--- a/dimension/tokenize.h
+++ b/dimension/tokenize.h
@@ -20,6 +20,7 @@
#include "../libdimension/dimension.h"
typedef enum {
+ /* Punctuation */
DMNSN_LBRACE, /* { */
DMNSN_RBRACE, /* } */
DMNSN_LPAREN, /* ( */
@@ -33,6 +34,10 @@ typedef enum {
DMNSN_STAR, /* * */
DMNSN_SLASH, /* / */
DMNSN_COMMA, /* , */
+
+ /* Numeric values */
+ DMNSN_INT,
+ DMNSN_FLOAT,
} dmnsn_token_type;
typedef struct dmnsn_token dmnsn_token;
diff --git a/tests/dimension/Makefile.am b/tests/dimension/Makefile.am
index 7bacab5..e9b7f7a 100644
--- a/tests/dimension/Makefile.am
+++ b/tests/dimension/Makefile.am
@@ -25,4 +25,4 @@ TESTS_ENVIRONMENT = top_builddir=$(top_builddir)
tokenizer.sh:
cp $(srcdir)/tokenizer.sh .
-EXTRA_DIST = tokenizer.sh punctuation.pov
+EXTRA_DIST = tokenizer.sh punctuation.pov numeric.pov
diff --git a/tests/dimension/numeric.pov b/tests/dimension/numeric.pov
new file mode 100644
index 0000000..df850d6
--- /dev/null
+++ b/tests/dimension/numeric.pov
@@ -0,0 +1,2 @@
+1 123456789 01234567 0x123456789 -0x01
+.1 0.1 1.0 0.123456789 -0.123456789
diff --git a/tests/dimension/tokenizer.sh b/tests/dimension/tokenizer.sh
index d965741..edd3bb4 100755
--- a/tests/dimension/tokenizer.sh
+++ b/tests/dimension/tokenizer.sh
@@ -24,6 +24,15 @@ punctuation_exp='({ \( [ < + - * / , > ] \) })'
if [ "$punctuation" != "$punctuation_exp" ]; then
echo "punctuation.pov tokenized as \"$punctuation\"" >&2
- echo " -- expected \"$punctuation_exp\"" >&2
+ echo " -- expected \"$punctuation_exp\"" >&2
+ exit 1;
+fi
+
+numeric=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/numeric.pov)
+numeric_exp='((int "1") (int "123456789") (int "01234567") (int "0x123456789") - (int "0x01") (float ".1") (float "0.1") (float "1.0") (float "0.123456789") - (float "0.123456789"))'
+
+if [ "$numeric" != "$numeric_exp" ]; then
+ echo "numeric.pov tokenized as \"$numeric\"" >&2
+ echo " -- expected \"$numeric_exp\"" >&2
exit 1;
fi