diff options
Diffstat (limited to 'dimension/lexer.l')
-rw-r--r-- | dimension/lexer.l | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/dimension/lexer.l b/dimension/lexer.l new file mode 100644 index 0000000..2e2467c --- /dev/null +++ b/dimension/lexer.l @@ -0,0 +1,291 @@ +/************************************************************************* + * Copyright (C) 2009 Tavian Barnes <tavianator@gmail.com> * + * * + * This file is part of Dimension. * + * * + * Dimension is free software; you can redistribute it and/or modify it * + * under the terms of the GNU General Public License as published by the * + * Free Software Foundation; either version 3 of the License, or (at * + * your option) any later version. * + * * + * Dimension is distributed in the hope that it will be useful, but * + * WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * + * General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program. If not, see <http://www.gnu.org/licenses/>. * + *************************************************************************/ + +%option reentrant stack yylineno noyywrap + +%{ +#define YY_DECL static int yylex(const char *filename, dmnsn_array *tokens, \ + yyscan_t yyscanner) +#include "tokenize.h" +#include "utility.h" +#include <stdlib.h> +#include <stdio.h> +%} + +%x DMNSN_BLOCK_COMMENT +%x DMNSN_LINE_COMMENT +%x DMNSN_STRING +%x DMNSN_STRING_ESCAPE + +%% + +%{ +/* Some helpful macros that set fields of a token correctly, and other stuff */ + +#define NEW_TOKEN(token_type) \ + do { \ + token.type = token_type; \ + token.filename = filename; \ + token.line = yylineno; \ + token.col = yycolumn; \ + token.value = NULL; \ + } while (0) + +#define CALCULATE_COLUMN() yycolumn += yyleng + +#define PUSH() \ + do { \ + dmnsn_array_push(tokens, &token); \ + CALCULATE_COLUMN(); \ + } while (0) + +#define PUSH_TOKEN(token_type) \ + do { \ + NEW_TOKEN(token_type); \ + PUSH(); \ + } while (0) + +#define PUSH_VALUE_TOKEN(token_type) \ + do { \ + NEW_TOKEN(token_type); \ + token.value = strdup(yytext); \ + PUSH(); \ + } while (0) + +#define STRING_TOKEN() \ + do { \ + NEW_TOKEN(DMNSN_T_STRING); \ + string_length = 0; \ + string_extent = 8; \ + token.value = malloc(string_extent); \ + token.value[0] = '\0'; \ + CALCULATE_COLUMN(); \ + } while (0) + +#define STRCAT(str, len) \ + do { \ + if (string_length + len + 1 >= string_length) { \ + string_extent = 2*(string_length + len + 1); \ + token.value = realloc(token.value, string_extent); \ + } \ + \ + strncpy(token.value + string_length, str, len + 1); \ + string_length += len; \ + CALCULATE_COLUMN(); \ + } while(0) + +dmnsn_token token; +size_t string_length, string_extent; +unsigned long wchar; +%} + +(?# Comments) + +<INITIAL,DMNSN_BLOCK_COMMENT>"/*" { + yy_push_state(DMNSN_BLOCK_COMMENT, yyscanner); + CALCULATE_COLUMN(); +} +<DMNSN_BLOCK_COMMENT>"*/" CALCULATE_COLUMN(); yy_pop_state(yyscanner); +<DMNSN_BLOCK_COMMENT>[^*/\n]* CALCULATE_COLUMN(); +<DMNSN_BLOCK_COMMENT>"/" CALCULATE_COLUMN(); +<DMNSN_BLOCK_COMMENT>"*" CALCULATE_COLUMN(); +<DMNSN_BLOCK_COMMENT>\n ; + +"//" { + yy_push_state(DMNSN_LINE_COMMENT, yyscanner); + CALCULATE_COLUMN(); +} +<DMNSN_LINE_COMMENT>\n ; yy_pop_state(yyscanner); +<DMNSN_LINE_COMMENT>[^\n]+ CALCULATE_COLUMN(); + +(?# Punctuation) +"{" PUSH_TOKEN(DMNSN_T_LBRACE); +"}" PUSH_TOKEN(DMNSN_T_RBRACE); +"(" PUSH_TOKEN(DMNSN_T_LPAREN); +")" PUSH_TOKEN(DMNSN_T_RPAREN); +"[" PUSH_TOKEN(DMNSN_T_LBRACKET); +"]" PUSH_TOKEN(DMNSN_T_RBRACKET); +"+" PUSH_TOKEN(DMNSN_T_PLUS); +"-" PUSH_TOKEN(DMNSN_T_MINUS); +"*" PUSH_TOKEN(DMNSN_T_STAR); +"/" PUSH_TOKEN(DMNSN_T_SLASH); +"," PUSH_TOKEN(DMNSN_T_COMMA); +";" PUSH_TOKEN(DMNSN_T_SEMICOLON); +"?" PUSH_TOKEN(DMNSN_T_QUESTION); +":" PUSH_TOKEN(DMNSN_T_COLON); +"&" PUSH_TOKEN(DMNSN_T_AND); +"." PUSH_TOKEN(DMNSN_T_DOT); +"|" PUSH_TOKEN(DMNSN_T_PIPE); +"<" PUSH_TOKEN(DMNSN_T_LESS); +">" PUSH_TOKEN(DMNSN_T_GREATER); +"!" PUSH_TOKEN(DMNSN_T_BANG); +"=" PUSH_TOKEN(DMNSN_T_EQUALS); +"<=" PUSH_TOKEN(DMNSN_T_LESS_EQUAL); +">=" PUSH_TOKEN(DMNSN_T_GREATER_EQUAL); +"!=" PUSH_TOKEN(DMNSN_T_NOT_EQUAL); + +(?# Integers) +[[:digit:]]+ | +0(x|X)[[:digit:]aAbBcCdDeEfF]+ PUSH_VALUE_TOKEN(DMNSN_T_INTEGER); + +(?# Floats) +[[:digit:]]*\.?[[:digit:]]+((e|E)(\+|-)?[[:digit:]]+)? { + PUSH_VALUE_TOKEN(DMNSN_T_FLOAT); +} + +(?# Keywords) +"box" PUSH_TOKEN(DMNSN_T_BOX); +"camera" PUSH_TOKEN(DMNSN_T_CAMERA); +"color" PUSH_TOKEN(DMNSN_T_COLOR); +"colour" PUSH_TOKEN(DMNSN_T_COLOR); +"sphere" PUSH_TOKEN(DMNSN_T_SPHERE); + +(?# Directives) +"#include" PUSH_TOKEN(DMNSN_T_INCLUDE); +"#declare" PUSH_TOKEN(DMNSN_T_DECLARE); + +(?# Identifiers) +[[:alpha:]][[:alnum:]_]* PUSH_VALUE_TOKEN(DMNSN_T_IDENTIFIER); + +(?# Strings) + +"\"" STRING_TOKEN(); yy_push_state(DMNSN_STRING, yyscanner); +<DMNSN_STRING>[^\\\"\n]* STRCAT(yytext, yyleng); +<DMNSN_STRING>"\"" PUSH(); yy_pop_state(yyscanner); + +(?# String escape sequences) + +<DMNSN_STRING>"\\" { + yy_push_state(DMNSN_STRING_ESCAPE, yyscanner); + CALCULATE_COLUMN(); +} +<DMNSN_STRING_ESCAPE>"a" STRCAT("\a", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"b" STRCAT("\b", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"f" STRCAT("\f", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"n" STRCAT("\n", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"r" STRCAT("\r", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"t" STRCAT("\t", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"v" STRCAT("\v", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"\\" STRCAT("\\", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"'" STRCAT("'", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"\"" STRCAT("\"", 1); yy_pop_state(yyscanner); +<DMNSN_STRING_ESCAPE>"u"[[:digit:]aAbBcCdDeEfF]{4} { + wchar = strtoul(yytext + 1, NULL, 16); + STRCAT("", 2); + token.value[string_length - 2] = wchar/256; + token.value[string_length - 1] = wchar%256; + yy_pop_state(yyscanner); +} +<DMNSN_STRING_ESCAPE>. { + dmnsn_diagnostic(filename, yylineno, yycolumn, + "WARNING: unrecognised escape sequence '\\%c'", + (int)*yytext); + STRCAT(yytext, yyleng); + yy_pop_state(yyscanner); +} + +(?# Ignore whitespace) +[\b\r\t\v ]+ CALCULATE_COLUMN(); +\n ; + +(?# Fall-through) +. { + dmnsn_diagnostic(filename, yylineno, yycolumn, + "Unrecognized character '%c' (0x%X)", + (int)*yytext, (unsigned int)*yytext); + return 1; +} + +%% + +dmnsn_array * +dmnsn_tokenize(const char *filename, FILE *file) +{ + dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); + + yyscan_t scanner; + + yylex_init(&scanner); + yyset_in(file, scanner); + + if (yylex(filename, tokens, scanner) != 0) { + dmnsn_delete_tokens(tokens); + tokens = NULL; + } + + yylex_destroy(scanner); + + return tokens; +} + +void +dmnsn_delete_tokens(dmnsn_array *tokens) +{ + dmnsn_token *token; + unsigned int i; + for (i = 0; i < dmnsn_array_size(tokens); ++i) { + token = dmnsn_array_at(tokens, i); + free(token->value); + } + dmnsn_delete_array(tokens); +} + +static void +dmnsn_print_token(FILE *file, dmnsn_token token) +{ + const char *tname; + if (token.type == DMNSN_T_LPAREN) { + tname = "\\("; + } else if (token.type == DMNSN_T_RPAREN) { + tname = "\\)"; + } else { + tname = dmnsn_token_string(token.type); + } + + if (token.value) { + fprintf(file, "(%s \"%s\")", tname, token.value); + } else { + fprintf(file, "%s", tname); + } +} + +void +dmnsn_print_token_sexpr(FILE *file, const dmnsn_array *tokens) +{ + dmnsn_token token; + unsigned int i; + + if (dmnsn_array_size(tokens) == 0) { + fprintf(file, "()"); + } else { + fprintf(file, "("); + dmnsn_array_get(tokens, 0, &token); + dmnsn_print_token(file, token); + + for (i = 1; i < dmnsn_array_size(tokens); ++i) { + fprintf(file, " "); + dmnsn_array_get(tokens, i, &token); + dmnsn_print_token(file, token); + } + + fprintf(file, ")"); + } + + fprintf(file, "\n"); +} |