824 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			C++
		
	
	
			
		
		
	
	
			824 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			C++
		
	
	
| /*
 | |
|  * FSM table generator:
 | |
|  *  Generate FSM tables as ".c" files from FSM functions.
 | |
|  *
 | |
|  * 23.03.2016 (dd.mm.yyyy)
 | |
|  */
 | |
| 
 | |
| // TOP
 | |
| 
 | |
| #include <stdlib.h>
 | |
| #include <stdio.h>
 | |
| #include <stdint.h>
 | |
| 
 | |
| #define Assert(n) do{ if (!(n)) { *(int*)0 = 0xA11E; } }while(0)
 | |
| #define ArrayCount(a) (sizeof(a)/sizeof(*a))
 | |
| 
 | |
| #include "4cpp_lexer_types.h"
 | |
| #include "4cpp_lexer_fsms.h"
 | |
| #include "4ed_mem_ansi.c"
 | |
| 
 | |
| typedef struct Whitespace_FSM{
 | |
|     unsigned char pp_state;
 | |
|     unsigned char white_done;
 | |
| } Whitespace_FSM;
 | |
| 
 | |
| Whitespace_FSM
 | |
| whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
 | |
|     if (wfsm.pp_state != LSPP_default){
 | |
|         if (c == '\n') wfsm.pp_state = LSPP_default;
 | |
|     }
 | |
|     if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')){
 | |
|         wfsm.white_done = 1;
 | |
|     }
 | |
|     return(wfsm);
 | |
| }
 | |
| 
 | |
| Lex_FSM
 | |
| int_fsm(Lex_FSM fsm, char c){
 | |
|     switch (fsm.int_state){
 | |
|         case LSINT_default:
 | |
|         switch (c){
 | |
|             case 'u': case 'U': fsm.int_state = LSINT_u; break;
 | |
|             case 'l': fsm.int_state = LSINT_l; break;
 | |
|             case 'L': fsm.int_state = LSINT_L; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_u:
 | |
|         switch (c){
 | |
|             case 'l': fsm.int_state = LSINT_ul; break;
 | |
|             case 'L': fsm.int_state = LSINT_uL; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_l:
 | |
|         switch (c){
 | |
|             case 'l': fsm.int_state = LSINT_ll; break;
 | |
|             case 'U': case 'u': fsm.int_state = LSINT_extra; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_L:
 | |
|         switch (c){
 | |
|             case 'L': fsm.int_state = LSINT_ll; break;
 | |
|             case 'U': case 'u': fsm.int_state = LSINT_extra; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_ul:
 | |
|         switch (c){
 | |
|             case 'l': fsm.int_state = LSINT_extra; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_uL:
 | |
|         switch (c){
 | |
|             case 'L': fsm.int_state = LSINT_extra; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_ll:
 | |
|         switch (c){
 | |
|             case 'u': case 'U': fsm.int_state = LSINT_extra; break;
 | |
|             default: fsm.emit_token = 1; break;
 | |
|         }
 | |
|         break;
 | |
|         
 | |
|         case LSINT_extra:
 | |
|         fsm.emit_token = 1;
 | |
|         break;
 | |
|     }
 | |
|     return(fsm);
 | |
| }
 | |
| 
 | |
| Lex_FSM
 | |
| main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
 | |
|     if (c == 0){
 | |
|         fsm.emit_token = 1;
 | |
|     }
 | |
|     else{
 | |
|         switch (pp_state){
 | |
|             case LSPP_error:
 | |
|             fsm.state = LS_error_message;
 | |
|             if (c == '\n') fsm.emit_token = 1;
 | |
|             break;
 | |
|             
 | |
| #if 0
 | |
|             case LSPP_include:
 | |
|             switch (fsm.state){
 | |
|                 case LSINC_default:
 | |
|                 switch (c){
 | |
|                     case '"': fsm.state = LSINC_quotes; break;
 | |
|                     case '<': fsm.state = LSINC_pointy; break;
 | |
|                     case '/': fsm.state = LSINC_def_comment_pre; break;
 | |
|                     default: fsm.state = LSINC_junk; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_quotes:
 | |
|                 if (c == '"') fsm.emit_token = 1;
 | |
|                 else if (c == '\n'){
 | |
|                     fsm.emit_token = 1;
 | |
|                     fsm.state = LSINC_junk;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_pointy:
 | |
|                 if (c == '>') fsm.emit_token = 1;
 | |
|                 else if (c == '\n'){
 | |
|                     fsm.emit_token = 1;
 | |
|                     fsm.state = LSINC_junk;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_junk:
 | |
|                 switch (c){
 | |
|                     case '/': fsm.state = LSINC_junk_comment_pre; break;
 | |
|                     case '\n': fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_def_comment_pre:
 | |
|                 switch (c){
 | |
|                     case '/': fsm.state = LSINC_def_comment; break;
 | |
|                     case '*': fsm.state = LSINC_def_comment_block; break;
 | |
|                     case '\n': fsm.state = LSINC_junk; fsm.emit_token = 1; break;
 | |
|                     default: fsm.state = LSINC_junk; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_junk_comment_pre:
 | |
|                 switch (c){
 | |
|                     case '/': fsm.state = LSINC_junk_comment; break;
 | |
|                     case '*': fsm.state = LSINC_junk_comment_block; break;
 | |
|                     case '\n': fsm.state = LSINC_junk; fsm.emit_token = 1; break;
 | |
|                     default: fsm.state = LSINC_junk; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_def_comment:
 | |
|                 case LSINC_junk_comment:
 | |
|                 switch (c){
 | |
|                     
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_def_comment_slashed:
 | |
|                 case LSINC_junk_comment_slashed:
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_def_comment_block:
 | |
|                 case LSINC_junk_comment_block:
 | |
|                 break;
 | |
|                 
 | |
|                 case LSINC_def_comment_block_ending:
 | |
|                 case LSINC_junk_comment_block_ending:
 | |
|                 break;
 | |
|             }
 | |
|             break;
 | |
| #endif
 | |
|             
 | |
|             default:
 | |
|             switch (fsm.state){
 | |
|                 case LS_default:
 | |
|                 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'){
 | |
|                     fsm.state = LS_identifier;
 | |
|                 }
 | |
|                 else if (c >= '1' && c <= '9'){
 | |
|                     fsm.state = LS_number;
 | |
|                 }
 | |
|                 else if (c == '0'){
 | |
|                     fsm.state = LS_number0;
 | |
|                 }
 | |
|                 else{
 | |
|                     switch (c){
 | |
|                         case '\'': fsm.state = LS_char; break;
 | |
|                         case '"': fsm.state = LS_string; break;
 | |
|                         
 | |
|                         case '/': fsm.state = LS_comment_pre; break;
 | |
|                         
 | |
|                         case '.': fsm.state = LS_dot; break;
 | |
|                         
 | |
|                         case '<':
 | |
|                         if (pp_state == LSPP_include){
 | |
|                             fsm.state = LS_string;
 | |
|                         }
 | |
|                         else{
 | |
|                             fsm.state = LS_less;
 | |
|                         }
 | |
|                         break;
 | |
|                         
 | |
|                         case '>': fsm.state = LS_more; break;
 | |
|                         
 | |
|                         case '-': fsm.state = LS_minus; break;
 | |
|                         
 | |
|                         case '&': fsm.state = LS_and; break;
 | |
|                         case '|': fsm.state = LS_or; break;
 | |
|                         
 | |
|                         case '+': fsm.state = LS_plus; break;
 | |
|                         
 | |
|                         case ':': fsm.state = LS_colon; break;
 | |
|                         
 | |
|                         case '*': fsm.state = LS_star; break;
 | |
|                         
 | |
|                         case '%': fsm.state = LS_modulo; break;
 | |
|                         case '^': fsm.state = LS_caret; break;
 | |
|                         
 | |
|                         case '=': fsm.state = LS_eq; break;
 | |
|                         case '!': fsm.state = LS_bang; break;
 | |
|                         
 | |
|                         case '#':
 | |
|                         if (pp_state == LSPP_default){
 | |
|                             fsm.state = LS_pp;
 | |
|                         }
 | |
|                         else{
 | |
|                             fsm.state = LS_pound;
 | |
|                         }
 | |
|                         break;
 | |
|                         
 | |
| #define OperCase(op,type) case op: fsm.emit_token = 1; break;
 | |
|                         OperCase('{', CPP_TOKEN_BRACE_OPEN);
 | |
|                         OperCase('}', CPP_TOKEN_BRACE_CLOSE);
 | |
|                         
 | |
|                         OperCase('[', CPP_TOKEN_BRACKET_OPEN);
 | |
|                         OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
 | |
|                         
 | |
|                         OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
 | |
|                         OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
 | |
|                         
 | |
|                         OperCase('~', CPP_TOKEN_TILDE);
 | |
|                         OperCase(',', CPP_TOKEN_COMMA);
 | |
|                         OperCase(';', CPP_TOKEN_SEMICOLON);
 | |
|                         OperCase('?', CPP_TOKEN_TERNARY_QMARK);
 | |
|                         
 | |
|                         OperCase('@', CPP_TOKEN_JUNK);
 | |
|                         OperCase('\\', CPP_TOKEN_JUNK);
 | |
| #undef OperCase
 | |
|                     }
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_identifier:
 | |
|                 if (!((c >= '0' && c <= '9') ||
 | |
|                       (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
 | |
|                       c == '_' || c == '$')){
 | |
|                     fsm.emit_token = 1;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_pound:
 | |
|                 switch (c){
 | |
|                     case '#': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_pp:
 | |
|                 if (c == ' ' || c == '\r' || c == '\v' || c == '\f'){
 | |
|                     // NOTE(allen): do nothing
 | |
|                 }
 | |
|                 else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')){
 | |
|                     fsm.state = LS_ppdef;
 | |
|                 }
 | |
|                 else{
 | |
|                     fsm.emit_token = 1;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_ppdef:
 | |
|                 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))){
 | |
|                     fsm.emit_token = 1;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_char:
 | |
|                 case LS_char_multiline:
 | |
|                 switch(c){
 | |
|                     case '\n': case '\'': fsm.emit_token = 1; break;
 | |
|                     case '\\': fsm.state = LS_char_slashed; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_char_slashed:
 | |
|                 switch (c){
 | |
|                     case '\r': case '\f': case '\v': break;
 | |
|                     case '\n': fsm.state = LS_char_multiline; break;
 | |
|                     default: fsm.state = LS_char; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_string:
 | |
|                 case LS_string_multiline:
 | |
|                 switch(c){
 | |
|                     case '\n': case '\"': fsm.emit_token = 1; break;
 | |
|                     case '>':
 | |
|                     if (pp_state == LSPP_include){
 | |
|                         fsm.emit_token = 1;
 | |
|                     }
 | |
|                     else{
 | |
|                         fsm.state = LS_string_slashed;
 | |
|                     }
 | |
|                     break;
 | |
|                     case '\\': fsm.state = LS_string_slashed; break;
 | |
|                 } 
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_string_slashed:
 | |
|                 switch (c){
 | |
|                     case '\r': case '\f': case '\v': break;
 | |
|                     case '\n': fsm.state = LS_string_multiline; break;
 | |
|                     default: fsm.state = LS_string; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_number:
 | |
|                 if (c >= '0' && c <= '9'){
 | |
|                     fsm.state = LS_number;
 | |
|                 }
 | |
|                 else{
 | |
|                     switch (c){
 | |
|                         case '.': fsm.state = LS_float; break;
 | |
|                         default: fsm.emit_token = 1; break;
 | |
|                     }
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_number0:
 | |
|                 if (c >= '0' && c <= '9'){
 | |
|                     fsm.state = LS_number;
 | |
|                 }
 | |
|                 else if (c == 'x'){
 | |
|                     fsm.state = LS_hex;
 | |
|                 }
 | |
|                 else if (c == '.'){
 | |
|                     fsm.state = LS_float;
 | |
|                 }
 | |
|                 else{
 | |
|                     fsm.emit_token = 1;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_float:
 | |
|                 if (!(c >= '0' && c <= '9')){
 | |
|                     switch (c){
 | |
|                         case 'e': fsm.state = LS_crazy_float0; break;
 | |
|                         default: fsm.emit_token = 1; break;
 | |
|                     }
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_crazy_float0:
 | |
|                 {
 | |
|                     if ((c >= '0' && c <= '9') || c == '-'){
 | |
|                         fsm.state = LS_crazy_float1;
 | |
|                     }
 | |
|                     else{
 | |
|                         fsm.emit_token = 1;
 | |
|                     }
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_crazy_float1:
 | |
|                 {
 | |
|                     if (!(c >= '0' && c <= '9')){
 | |
|                         fsm.emit_token = 1;
 | |
|                     }
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_hex:
 | |
|                 if (!(c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F')){
 | |
|                     fsm.emit_token = 1;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_dot:
 | |
|                 if (c >= '0' && c <= '9'){
 | |
|                     fsm.state = LS_float;
 | |
|                 }
 | |
|                 else
 | |
|                     switch (c){
 | |
|                     case '.': fsm.state = LS_ellipsis; break;
 | |
|                     case '*': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_ellipsis: fsm.emit_token = 1; break;
 | |
|                 
 | |
|                 case LS_less:
 | |
|                 switch (c){
 | |
|                     case '<': fsm.state = LS_less_less; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_less_less:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_more:
 | |
|                 switch (c){
 | |
|                     case '>': fsm.state = LS_more_more; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_more_more:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_comment_pre:
 | |
|                 switch (c){
 | |
|                     case '/': fsm.state = LS_comment; break;
 | |
|                     case '*': fsm.state = LS_comment_block; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_comment:
 | |
|                 switch (c){
 | |
|                     case '\\': fsm.state = LS_comment_slashed; break;
 | |
|                     case '\n': fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_comment_slashed:
 | |
|                 switch (c){
 | |
|                     case '\r': case '\f': case '\v': break;
 | |
|                     default: fsm.state = LS_comment; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_comment_block:
 | |
|                 switch (c){
 | |
|                     case '*': fsm.state = LS_comment_block_ending; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_comment_block_ending:
 | |
|                 switch (c){
 | |
|                     case '*': fsm.state = LS_comment_block_ending; break;
 | |
|                     case '/': fsm.emit_token = 1; break;
 | |
|                     default: fsm.state = LS_comment_block; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_minus:
 | |
|                 switch (c){
 | |
|                     case '>': fsm.state = LS_arrow; break;
 | |
|                     case '-': fsm.emit_token = 1; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_arrow:
 | |
|                 switch (c){
 | |
|                     case '*': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_and:
 | |
|                 switch (c){
 | |
|                     case '&': fsm.emit_token = 1; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_or:
 | |
|                 switch (c){
 | |
|                     case '|': fsm.emit_token = 1; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_plus:
 | |
|                 switch (c){
 | |
|                     case '+': fsm.emit_token = 1; break;
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_colon:
 | |
|                 switch (c){
 | |
|                     case ':': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_star:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_modulo:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_caret:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_eq:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|                 
 | |
|                 case LS_bang:
 | |
|                 switch (c){
 | |
|                     case '=': fsm.emit_token = 1; break;
 | |
|                     default: fsm.emit_token = 1; break;
 | |
|                 }
 | |
|                 break;
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
|     return(fsm);
 | |
| }
 | |
| 
 | |
| static void
 | |
| begin_table(FILE *file, char *type, char *group_name, char *table_name){
 | |
|     fprintf(file, "%s %s_%s[] = {\n", type, group_name, table_name);
 | |
| }
 | |
| 
 | |
| static void
 | |
| begin_table(FILE *file, char *type, char *table_name){
 | |
|     fprintf(file, "%s %s[] = {\n", type, table_name);
 | |
| }
 | |
| 
 | |
| static void
 | |
| begin_ptr_table(FILE *file, char *type, char *table_name){
 | |
|     fprintf(file, "%s * %s[] = {\n", type, table_name);
 | |
| }
 | |
| 
 | |
| static void
 | |
| do_table_item(FILE *file, uint16_t item){
 | |
|     fprintf(file, "%2d,", (int32_t)item);
 | |
| }
 | |
| 
 | |
| static void
 | |
| do_table_item_direct(FILE *file, char *item, char *tail){
 | |
|     fprintf(file, "%s%s,", item, tail);
 | |
| }
 | |
| 
 | |
| static void
 | |
| end_row(FILE *file){
 | |
|     fprintf(file, "\n");
 | |
| }
 | |
| 
 | |
| static void
 | |
| end_table(FILE *file){
 | |
|     fprintf(file, "};\n\n");
 | |
| }
 | |
| 
 | |
| typedef struct FSM_Tables{
 | |
|     uint8_t *full_transition_table;
 | |
|     uint8_t *marks;
 | |
|     uint8_t *eq_class;
 | |
|     uint8_t *eq_class_rep;
 | |
|     uint8_t *reduced_transition_table;
 | |
|     
 | |
|     uint8_t eq_class_counter;
 | |
|     uint16_t state_count;
 | |
| } FSM_Tables;
 | |
| 
 | |
| static void
 | |
| allocate_full_tables(FSM_Tables *table, uint8_t state_count){
 | |
|     table->full_transition_table = (uint8_t*)malloc(state_count * 256);
 | |
|     table->marks = (uint8_t*)malloc(state_count * 256);
 | |
|     table->eq_class = (uint8_t*)malloc(state_count * 256);
 | |
|     table->eq_class_rep = (uint8_t*)malloc(state_count * 256);
 | |
|     table->state_count = state_count;
 | |
|     block_zero(table->marks, 256);
 | |
| }
 | |
| 
 | |
| static void
 | |
| do_table_reduction(FSM_Tables *table, uint16_t state_count){
 | |
|     {
 | |
|         table->eq_class_counter = 0;
 | |
|         uint8_t *c_line = table->full_transition_table;
 | |
|         for (uint16_t c = 0; c < 256; ++c){
 | |
|             if (table->marks[c] == 0){
 | |
|                 table->eq_class[c] = table->eq_class_counter;
 | |
|                 table->eq_class_rep[table->eq_class_counter] = (uint8_t)c;
 | |
|                 uint8_t *c2_line = c_line + state_count;
 | |
|                 for (uint16_t c2 = c + 1; c2 < 256; ++c2){
 | |
|                     if (block_compare(c_line, c2_line, state_count) == 0){
 | |
|                         table->marks[c2] = 1;
 | |
|                         table->eq_class[c2] = table->eq_class_counter;
 | |
|                     }
 | |
|                     c2_line += state_count;
 | |
|                 }
 | |
|                 ++table->eq_class_counter;
 | |
|             }
 | |
|             c_line += state_count;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     table->reduced_transition_table = (uint8_t*)malloc(state_count * table->eq_class_counter);
 | |
|     {
 | |
|         uint8_t *r_line = table->reduced_transition_table;
 | |
|         for (uint16_t eq = 0; eq < table->eq_class_counter; ++eq){
 | |
|             uint8_t *u_line = table->full_transition_table + state_count * table->eq_class_rep[eq];
 | |
|             block_copy(r_line, u_line, state_count);
 | |
|             r_line += state_count;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static FSM_Tables
 | |
| generate_whitespace_skip_table(){
 | |
|     uint8_t state_count = LSPP_count;
 | |
|     FSM_Tables table;
 | |
|     allocate_full_tables(&table, state_count);
 | |
|     
 | |
|     int32_t i = 0;
 | |
|     Whitespace_FSM wfsm = {0};
 | |
|     Whitespace_FSM new_wfsm;
 | |
|     for (uint16_t c = 0; c < 256; ++c){
 | |
|         for (uint8_t state = 0; state < state_count; ++state){
 | |
|             wfsm.pp_state = state;
 | |
|             wfsm.white_done = 0;
 | |
|             new_wfsm = whitespace_skip_fsm(wfsm, (uint8_t)c);
 | |
|             table.full_transition_table[i++] = new_wfsm.pp_state + state_count*new_wfsm.white_done;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     do_table_reduction(&table, state_count);
 | |
|     
 | |
|     return(table);
 | |
| }
 | |
| 
 | |
| static FSM_Tables
 | |
| generate_int_table(){
 | |
|     uint8_t state_count = LSINT_count;
 | |
|     FSM_Tables table;
 | |
|     allocate_full_tables(&table, state_count);
 | |
|     
 | |
|     int32_t i = 0;
 | |
|     Lex_FSM fsm = {0};
 | |
|     Lex_FSM new_fsm;
 | |
|     for (uint16_t c = 0; c < 256; ++c){
 | |
|         for (uint8_t state = 0; state < state_count; ++state){
 | |
|             fsm.int_state = state;
 | |
|             fsm.emit_token = 0;
 | |
|             new_fsm = int_fsm(fsm, (uint8_t)c);
 | |
|             table.full_transition_table[i++] = new_fsm.int_state + state_count*new_fsm.emit_token;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     do_table_reduction(&table, state_count);
 | |
|     
 | |
|     return(table);
 | |
| }
 | |
| 
 | |
| static FSM_Tables
 | |
| generate_fsm_table(uint8_t pp_state){
 | |
|     uint8_t state_count = LS_count;
 | |
|     FSM_Tables table;
 | |
|     allocate_full_tables(&table, state_count);
 | |
|     
 | |
|     int32_t i = 0;
 | |
|     Lex_FSM fsm = {0};
 | |
|     Lex_FSM new_fsm;
 | |
|     for (uint16_t c = 0; c < 256; ++c){
 | |
|         for (uint8_t state = 0; state < state_count; ++state){
 | |
|             fsm.state = state;
 | |
|             fsm.emit_token = 0;
 | |
|             new_fsm = main_fsm(fsm, pp_state, (uint8_t)c);
 | |
|             table.full_transition_table[i++] = new_fsm.state + state_count*new_fsm.emit_token;
 | |
|         }
 | |
|     }
 | |
|     
 | |
|     do_table_reduction(&table, state_count);
 | |
|     
 | |
|     return(table);
 | |
| }
 | |
| 
 | |
| static void
 | |
| render_fsm_table(FILE *file, FSM_Tables tables, char *group_name){
 | |
|     begin_table(file, "uint16_t", group_name, "eq_classes");
 | |
|     for (uint16_t c = 0; c < 256; ++c){
 | |
|         do_table_item(file, tables.eq_class[c]*tables.state_count);
 | |
|     }
 | |
|     end_row(file);
 | |
|     end_table(file);
 | |
|     
 | |
|     fprintf(file, "const int32_t num_%s_eq_classes = %d;\n\n", group_name, tables.eq_class_counter);
 | |
|     
 | |
|     int32_t i = 0;
 | |
|     begin_table(file, "uint8_t", group_name, "table");
 | |
|     for (uint16_t c = 0; c < tables.eq_class_counter; ++c){
 | |
|         for (uint8_t state = 0; state < tables.state_count; ++state){
 | |
|             do_table_item(file, tables.reduced_transition_table[i++]);
 | |
|         }
 | |
|         end_row(file);
 | |
|     }
 | |
|     end_table(file);
 | |
| }
 | |
| 
 | |
| static void
 | |
| render_variable(FILE *file, char *type, char *variable, uint32_t x){
 | |
|     fprintf(file, "%s %s = %d;\n\n", type, variable, x);
 | |
| }
 | |
| 
 | |
| static void
 | |
| render_comment(FILE *file, char *comment){
 | |
|     fprintf(file, "/*\n%s*/\n", comment);
 | |
| }
 | |
| 
 | |
| typedef struct PP_Names{
 | |
|     uint8_t pp_state;
 | |
|     char *name;
 | |
| }  PP_Names;
 | |
| 
 | |
| static PP_Names pp_names[] = {
 | |
|     {LSPP_default,          "main_fsm"},
 | |
|     {LSPP_include,          "pp_include_fsm"},
 | |
|     {LSPP_macro_identifier, "pp_macro_fsm"},
 | |
|     {LSPP_identifier,       "pp_identifier_fsm"},
 | |
|     {LSPP_body_if,          "pp_body_if_fsm"},
 | |
|     {LSPP_body,             "pp_body_fsm"},
 | |
|     {LSPP_number,           "pp_number_fsm"},
 | |
|     {LSPP_error,            "pp_error_fsm"},
 | |
|     {LSPP_junk,             "pp_junk_fsm"},
 | |
| };
 | |
| 
 | |
| int
 | |
| main(){
 | |
|     FILE *file;
 | |
|     file = fopen("4cpp_lexer_tables.c", "wb");
 | |
|     
 | |
|     FSM_Tables wtables = generate_whitespace_skip_table();
 | |
|     render_fsm_table(file, wtables, "whitespace_fsm");
 | |
|     
 | |
|     FSM_Tables itables = generate_int_table();
 | |
|     render_fsm_table(file, itables, "int_fsm");
 | |
|     
 | |
|     begin_table(file, "uint8_t", "multiline_state_table");
 | |
|     for (uint8_t state = 0; state < LS_count*2; ++state){
 | |
|         do_table_item(file, (state == LS_string_multiline || state == LS_char_multiline));
 | |
|     }
 | |
|     end_row(file);
 | |
|     end_table(file);
 | |
|     
 | |
|     for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
 | |
|         Assert(i == pp_names[i].pp_state);
 | |
|         FSM_Tables tables = generate_fsm_table(pp_names[i].pp_state);
 | |
|         render_fsm_table(file, tables, pp_names[i].name);
 | |
|     }
 | |
|     
 | |
|     begin_ptr_table(file, "uint16_t", "get_eq_classes");
 | |
|     for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
 | |
|         do_table_item_direct(file, pp_names[i].name, "_eq_classes");
 | |
|         end_row(file);
 | |
|     }
 | |
|     end_table(file);
 | |
|     
 | |
|     begin_ptr_table(file, "uint8_t", "get_table");
 | |
|     for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
 | |
|         do_table_item_direct(file, pp_names[i].name, "_table");
 | |
|         end_row(file);
 | |
|     }
 | |
|     end_table(file);
 | |
|     
 | |
|     fclose(file);
 | |
|     return(0);
 | |
| }
 | |
| 
 | |
| // BOTTOM
 | |
| 
 | |
| 
 |