From c66148484cad66972348ffe850fd23bc9dca60f9 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 17 Dec 2009 03:12:34 -0500 Subject: Tokenize on-the-fly. No more manual tokenization and parsing, giving a simpler interface. Also, this brings us closer to a dmnsn_realize_string() interface. --- dimension/lexer.l | 217 +++++++++++++++++++++++++++++------------------------- 1 file changed, 118 insertions(+), 99 deletions(-) (limited to 'dimension/lexer.l') diff --git a/dimension/lexer.l b/dimension/lexer.l index bfe5b8a..b8bfbb0 100644 --- a/dimension/lexer.l +++ b/dimension/lexer.l @@ -20,12 +20,15 @@ %option reentrant stack yylineno noyywrap prefix="dmnsn_yy" outfile="lex.yy.c" %{ -#define YY_DECL static int yylex(const char *filename, dmnsn_array *tokens, \ - yyscan_t yyscanner) +#include "parse.h" #include "tokenize.h" #include "utility.h" #include #include + +#define YY_DECL int yylex(dmnsn_parse_item *lvalp, \ + dmnsn_parse_location *llocp, \ + const char *filename, yyscan_t yyscanner) %} %x DMNSN_BLOCK_COMMENT @@ -38,34 +41,34 @@ %{ /* Some helpful macros that set fields of a token correctly, and other stuff */ -#define NEW_TOKEN(token_type) \ - do { \ - token.type = token_type; \ - token.filename = filename; \ - token.line = yylineno; \ - token.col = yycolumn; \ - token.value = NULL; \ +#define NEW_TOKEN(token_type) \ + do { \ + token = token_type; \ + lvalp->value = NULL; \ + llocp->first_filename = llocp->last_filename = filename; \ + llocp->first_line = llocp->last_line = yylineno; \ + llocp->first_column = llocp->last_column = yycolumn; \ } while (0) -#define CALCULATE_COLUMN() yycolumn += yyleng +#define CALCULATE_COLUMN() do { yycolumn += yyleng; } while (0) -#define PUSH() \ - do { \ - dmnsn_array_push(tokens, &token); \ - CALCULATE_COLUMN(); \ +#define RETURN() \ + do { \ + CALCULATE_COLUMN(); \ + return token; \ } while (0) -#define PUSH_TOKEN(token_type) \ +#define RETURN_TOKEN(token_type) \ do { \ NEW_TOKEN(token_type); \ - PUSH(); \ + RETURN(); \ } while (0) -#define PUSH_VALUE_TOKEN(token_type) \ +#define RETURN_VALUE_TOKEN(token_type) \ do { \ NEW_TOKEN(token_type); \ - token.value = strdup(yytext); \ - PUSH(); \ + lvalp->value = strdup(yytext); \ + RETURN(); \ } while (0) #define STRING_TOKEN() \ @@ -73,8 +76,8 @@ NEW_TOKEN(DMNSN_T_STRING); \ string_length = 0; \ string_extent = 8; \ - token.value = malloc(string_extent); \ - token.value[0] = '\0'; \ + lvalp->value = malloc(string_extent); \ + lvalp->value[0] = '\0'; \ CALCULATE_COLUMN(); \ } while (0) @@ -82,15 +85,15 @@ do { \ if (string_length + len + 1 >= string_length) { \ string_extent = 2*(string_length + len + 1); \ - token.value = realloc(token.value, string_extent); \ + lvalp->value = realloc(lvalp->value, string_extent); \ } \ \ - strncpy(token.value + string_length, str, len + 1); \ + strncpy(lvalp->value + string_length, str, len + 1); \ string_length += len; \ CALCULATE_COLUMN(); \ } while(0) -dmnsn_token token; +int token; size_t string_length, string_extent; unsigned long wchar; %} @@ -115,89 +118,89 @@ unsigned long wchar; [^\n]+ CALCULATE_COLUMN(); (?# Punctuation) -"{" PUSH_TOKEN(DMNSN_T_LBRACE); -"}" PUSH_TOKEN(DMNSN_T_RBRACE); -"(" PUSH_TOKEN(DMNSN_T_LPAREN); -")" PUSH_TOKEN(DMNSN_T_RPAREN); -"[" PUSH_TOKEN(DMNSN_T_LBRACKET); -"]" PUSH_TOKEN(DMNSN_T_RBRACKET); -"+" PUSH_TOKEN(DMNSN_T_PLUS); -"-" PUSH_TOKEN(DMNSN_T_MINUS); -"*" PUSH_TOKEN(DMNSN_T_STAR); -"/" PUSH_TOKEN(DMNSN_T_SLASH); -"," PUSH_TOKEN(DMNSN_T_COMMA); -";" PUSH_TOKEN(DMNSN_T_SEMICOLON); -"?" PUSH_TOKEN(DMNSN_T_QUESTION); -":" PUSH_TOKEN(DMNSN_T_COLON); -"&" PUSH_TOKEN(DMNSN_T_AND); -"." PUSH_TOKEN(DMNSN_T_DOT); -"|" PUSH_TOKEN(DMNSN_T_PIPE); -"<" PUSH_TOKEN(DMNSN_T_LESS); -">" PUSH_TOKEN(DMNSN_T_GREATER); -"!" PUSH_TOKEN(DMNSN_T_BANG); -"=" PUSH_TOKEN(DMNSN_T_EQUALS); -"<=" PUSH_TOKEN(DMNSN_T_LESS_EQUAL); -">=" PUSH_TOKEN(DMNSN_T_GREATER_EQUAL); -"!=" PUSH_TOKEN(DMNSN_T_NOT_EQUAL); +"{" RETURN_TOKEN(DMNSN_T_LBRACE); +"}" RETURN_TOKEN(DMNSN_T_RBRACE); +"(" RETURN_TOKEN(DMNSN_T_LPAREN); +")" RETURN_TOKEN(DMNSN_T_RPAREN); +"[" RETURN_TOKEN(DMNSN_T_LBRACKET); +"]" RETURN_TOKEN(DMNSN_T_RBRACKET); +"+" RETURN_TOKEN(DMNSN_T_PLUS); +"-" RETURN_TOKEN(DMNSN_T_MINUS); +"*" RETURN_TOKEN(DMNSN_T_STAR); +"/" RETURN_TOKEN(DMNSN_T_SLASH); +"," RETURN_TOKEN(DMNSN_T_COMMA); +";" RETURN_TOKEN(DMNSN_T_SEMICOLON); +"?" RETURN_TOKEN(DMNSN_T_QUESTION); +":" RETURN_TOKEN(DMNSN_T_COLON); +"&" RETURN_TOKEN(DMNSN_T_AND); +"." RETURN_TOKEN(DMNSN_T_DOT); +"|" RETURN_TOKEN(DMNSN_T_PIPE); +"<" RETURN_TOKEN(DMNSN_T_LESS); +">" RETURN_TOKEN(DMNSN_T_GREATER); +"!" RETURN_TOKEN(DMNSN_T_BANG); +"=" RETURN_TOKEN(DMNSN_T_EQUALS); +"<=" RETURN_TOKEN(DMNSN_T_LESS_EQUAL); +">=" RETURN_TOKEN(DMNSN_T_GREATER_EQUAL); +"!=" RETURN_TOKEN(DMNSN_T_NOT_EQUAL); (?# Integers) [[:digit:]]+ | -0(x|X)[[:digit:]aAbBcCdDeEfF]+ PUSH_VALUE_TOKEN(DMNSN_T_INTEGER); +0(x|X)[[:digit:]aAbBcCdDeEfF]+ RETURN_VALUE_TOKEN(DMNSN_T_INTEGER); (?# Floats) [[:digit:]]*\.?[[:digit:]]+((e|E)(\+|-)?[[:digit:]]+)? { - PUSH_VALUE_TOKEN(DMNSN_T_FLOAT); + RETURN_VALUE_TOKEN(DMNSN_T_FLOAT); } (?# Keywords) -"angle" PUSH_TOKEN(DMNSN_T_ANGLE); -"background" PUSH_TOKEN(DMNSN_T_BACKGROUND); -"box" PUSH_TOKEN(DMNSN_T_BOX); -"blue" PUSH_TOKEN(DMNSN_T_BLUE); -"camera" PUSH_TOKEN(DMNSN_T_CAMERA); -"color" PUSH_TOKEN(DMNSN_T_COLOR); -"colour" PUSH_TOKEN(DMNSN_T_COLOR); -"direction" PUSH_TOKEN(DMNSN_T_DIRECTION); -"filter" PUSH_TOKEN(DMNSN_T_FILTER); -"gray" PUSH_TOKEN(DMNSN_T_GRAY); -"grey" PUSH_TOKEN(DMNSN_T_GRAY); -"green" PUSH_TOKEN(DMNSN_T_GREEN); -"location" PUSH_TOKEN(DMNSN_T_LOCATION); -"look_at" PUSH_TOKEN(DMNSN_T_LOOK_AT); -"light_source" PUSH_TOKEN(DMNSN_T_LIGHT_SOURCE); -"perspective" PUSH_TOKEN(DMNSN_T_PERSPECTIVE); -"pigment" PUSH_TOKEN(DMNSN_T_PIGMENT); -"red" PUSH_TOKEN(DMNSN_T_RED); -"rgb" PUSH_TOKEN(DMNSN_T_RGB); -"rgbf" PUSH_TOKEN(DMNSN_T_RGBF); -"rgbft" PUSH_TOKEN(DMNSN_T_RGBFT); -"rgbt" PUSH_TOKEN(DMNSN_T_RGBT); -"right" PUSH_TOKEN(DMNSN_T_RIGHT); -"rotate" PUSH_TOKEN(DMNSN_T_ROTATE); -"sphere" PUSH_TOKEN(DMNSN_T_SPHERE); -"sky" PUSH_TOKEN(DMNSN_T_SKY); -"t" PUSH_TOKEN(DMNSN_T_T); -"texture" PUSH_TOKEN(DMNSN_T_TEXTURE); -"transmit" PUSH_TOKEN(DMNSN_T_TRANSMIT); -"u" PUSH_TOKEN(DMNSN_T_U); -"up" PUSH_TOKEN(DMNSN_T_UP); -"v" PUSH_TOKEN(DMNSN_T_V); -"x" PUSH_TOKEN(DMNSN_T_X); -"y" PUSH_TOKEN(DMNSN_T_Y); -"z" PUSH_TOKEN(DMNSN_T_Z); +"angle" RETURN_TOKEN(DMNSN_T_ANGLE); +"background" RETURN_TOKEN(DMNSN_T_BACKGROUND); +"box" RETURN_TOKEN(DMNSN_T_BOX); +"blue" RETURN_TOKEN(DMNSN_T_BLUE); +"camera" RETURN_TOKEN(DMNSN_T_CAMERA); +"color" RETURN_TOKEN(DMNSN_T_COLOR); +"colour" RETURN_TOKEN(DMNSN_T_COLOR); +"direction" RETURN_TOKEN(DMNSN_T_DIRECTION); +"filter" RETURN_TOKEN(DMNSN_T_FILTER); +"gray" RETURN_TOKEN(DMNSN_T_GRAY); +"grey" RETURN_TOKEN(DMNSN_T_GRAY); +"green" RETURN_TOKEN(DMNSN_T_GREEN); +"location" RETURN_TOKEN(DMNSN_T_LOCATION); +"look_at" RETURN_TOKEN(DMNSN_T_LOOK_AT); +"light_source" RETURN_TOKEN(DMNSN_T_LIGHT_SOURCE); +"perspective" RETURN_TOKEN(DMNSN_T_PERSPECTIVE); +"pigment" RETURN_TOKEN(DMNSN_T_PIGMENT); +"red" RETURN_TOKEN(DMNSN_T_RED); +"rgb" RETURN_TOKEN(DMNSN_T_RGB); +"rgbf" RETURN_TOKEN(DMNSN_T_RGBF); +"rgbft" RETURN_TOKEN(DMNSN_T_RGBFT); +"rgbt" RETURN_TOKEN(DMNSN_T_RGBT); +"right" RETURN_TOKEN(DMNSN_T_RIGHT); +"rotate" RETURN_TOKEN(DMNSN_T_ROTATE); +"sphere" RETURN_TOKEN(DMNSN_T_SPHERE); +"sky" RETURN_TOKEN(DMNSN_T_SKY); +"t" RETURN_TOKEN(DMNSN_T_T); +"texture" RETURN_TOKEN(DMNSN_T_TEXTURE); +"transmit" RETURN_TOKEN(DMNSN_T_TRANSMIT); +"u" RETURN_TOKEN(DMNSN_T_U); +"up" RETURN_TOKEN(DMNSN_T_UP); +"v" RETURN_TOKEN(DMNSN_T_V); +"x" RETURN_TOKEN(DMNSN_T_X); +"y" RETURN_TOKEN(DMNSN_T_Y); +"z" RETURN_TOKEN(DMNSN_T_Z); (?# Directives) -"#include" PUSH_TOKEN(DMNSN_T_INCLUDE); -"#declare" PUSH_TOKEN(DMNSN_T_DECLARE); +"#include" RETURN_TOKEN(DMNSN_T_INCLUDE); +"#declare" RETURN_TOKEN(DMNSN_T_DECLARE); (?# Identifiers) -[[:alpha:]][[:alnum:]_]* PUSH_VALUE_TOKEN(DMNSN_T_IDENTIFIER); +[[:alpha:]][[:alnum:]_]* RETURN_VALUE_TOKEN(DMNSN_T_IDENTIFIER); (?# Strings) "\"" STRING_TOKEN(); yy_push_state(DMNSN_STRING, yyscanner); [^\\\"\n]* STRCAT(yytext, yyleng); -"\"" PUSH(); yy_pop_state(yyscanner); +"\"" yy_pop_state(yyscanner); RETURN(); (?# String escape sequences) @@ -218,8 +221,8 @@ unsigned long wchar; "u"[[:digit:]aAbBcCdDeEfF]{4} { wchar = strtoul(yytext + 1, NULL, 16); STRCAT("", 2); - token.value[string_length - 2] = wchar/256; - token.value[string_length - 1] = wchar%256; + lvalp->value[string_length - 2] = wchar/256; + lvalp->value[string_length - 1] = wchar%256; yy_pop_state(yyscanner); } . { @@ -245,8 +248,11 @@ unsigned long wchar; %% dmnsn_array * -dmnsn_tokenize(const char *filename, FILE *file) +dmnsn_tokenize(FILE *file, const char *filename) { + dmnsn_token token; + dmnsn_parse_item item; + dmnsn_parse_location location; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); yyscan_t scanner; @@ -254,24 +260,37 @@ dmnsn_tokenize(const char *filename, FILE *file) yylex_init(&scanner); yyset_in(file, scanner); - if (yylex(filename, tokens, scanner) != 0) { - dmnsn_delete_tokens(tokens); - tokens = NULL; + while ((token.type = yylex(&item, &location, filename, scanner)) != 0) { + if (token.type == 1 || token.type == 2) { + dmnsn_delete_tokens(tokens); + tokens = NULL; + } else { + token.value = item.value; + token.filename = location.first_filename; + token.line = location.first_line; + token.col = location.first_column; + dmnsn_array_push(tokens, &token); + } } yylex_destroy(scanner); - return tokens; } +void +dmnsn_delete_token(dmnsn_token token) +{ + free(token.value); +} + void dmnsn_delete_tokens(dmnsn_array *tokens) { - dmnsn_token *token; + dmnsn_token token; unsigned int i; for (i = 0; i < dmnsn_array_size(tokens); ++i) { - token = dmnsn_array_at(tokens, i); - free(token->value); + dmnsn_array_get(tokens, i, &token); + dmnsn_delete_token(token); } dmnsn_delete_array(tokens); } -- cgit v1.2.3