2016-03-24 01:05:28 +00:00
|
|
|
/*
|
|
|
|
* FSM table generator:
|
|
|
|
* Generate FSM tables as ".c" files from FSM functions.
|
|
|
|
*
|
|
|
|
* 23.03.2016 (dd.mm.yyyy)
|
|
|
|
*/
|
|
|
|
|
|
|
|
// TOP
|
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
/* TODO(allen):
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
1. Eliminate the complicated preprocessor directive parsing tables
|
|
|
|
2. Establish a clean systematic way of maintaining whatever is left
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
2016-03-24 14:01:53 +00:00
|
|
|
#include <assert.h>
|
2016-08-29 01:03:26 +00:00
|
|
|
#include <stdint.h>
|
2016-03-24 14:01:53 +00:00
|
|
|
|
|
|
|
#define ArrayCount(a) (sizeof(a)/sizeof(*a))
|
2016-03-24 01:05:28 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
#include "4cpp_lexer_types.h"
|
2016-03-24 01:05:28 +00:00
|
|
|
#include "4cpp_lexer_fsms.h"
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
struct String_And_Flag{
|
|
|
|
char *str;
|
|
|
|
uint32_t flags;
|
|
|
|
};
|
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
static String_And_Flag preprop_strings[] = {
|
|
|
|
{"include", CPP_PP_INCLUDE},
|
|
|
|
{"INCLUDE", CPP_PP_INCLUDE},
|
|
|
|
{"ifndef", CPP_PP_IFNDEF},
|
|
|
|
{"IFNDEF", CPP_PP_IFNDEF},
|
|
|
|
{"define", CPP_PP_DEFINE},
|
|
|
|
{"DEFINE", CPP_PP_DEFINE},
|
|
|
|
{"import", CPP_PP_IMPORT},
|
|
|
|
{"IMPORT", CPP_PP_IMPORT},
|
|
|
|
{"pragma", CPP_PP_PRAGMA},
|
|
|
|
{"PRAGMA", CPP_PP_PRAGMA},
|
|
|
|
{"undef", CPP_PP_UNDEF},
|
|
|
|
{"UNDEF", CPP_PP_UNDEF},
|
|
|
|
{"endif", CPP_PP_ENDIF},
|
|
|
|
{"ENDIF", CPP_PP_ENDIF},
|
|
|
|
{"error", CPP_PP_ERROR},
|
|
|
|
{"ERROR", CPP_PP_ERROR},
|
|
|
|
{"ifdef", CPP_PP_IFDEF},
|
|
|
|
{"IFDEF", CPP_PP_IFDEF},
|
|
|
|
{"using", CPP_PP_USING},
|
|
|
|
{"USING", CPP_PP_USING},
|
|
|
|
{"else", CPP_PP_ELSE},
|
|
|
|
{"ELSE", CPP_PP_ELSE},
|
|
|
|
{"elif", CPP_PP_ELIF},
|
|
|
|
{"ELIF", CPP_PP_ELIF},
|
|
|
|
{"line", CPP_PP_LINE},
|
|
|
|
{"LINE", CPP_PP_LINE},
|
|
|
|
{"if", CPP_PP_IF},
|
|
|
|
{"IF", CPP_PP_IF},
|
|
|
|
};
|
|
|
|
static String_And_Flag keyword_strings[] = {
|
2016-04-08 00:21:24 +00:00
|
|
|
{"true", CPP_TOKEN_BOOLEAN_CONSTANT},
|
|
|
|
{"false", CPP_TOKEN_BOOLEAN_CONSTANT},
|
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
{"and", CPP_TOKEN_AND},
|
|
|
|
{"and_eq", CPP_TOKEN_ANDEQ},
|
|
|
|
{"bitand", CPP_TOKEN_BIT_AND},
|
|
|
|
{"bitor", CPP_TOKEN_BIT_OR},
|
|
|
|
{"or", CPP_TOKEN_OR},
|
|
|
|
{"or_eq", CPP_TOKEN_OREQ},
|
|
|
|
{"sizeof", CPP_TOKEN_SIZEOF},
|
|
|
|
{"alignof", CPP_TOKEN_ALIGNOF},
|
|
|
|
{"decltype", CPP_TOKEN_DECLTYPE},
|
|
|
|
{"throw", CPP_TOKEN_THROW},
|
|
|
|
{"new", CPP_TOKEN_NEW},
|
|
|
|
{"delete", CPP_TOKEN_DELETE},
|
|
|
|
{"xor", CPP_TOKEN_BIT_XOR},
|
|
|
|
{"xor_eq", CPP_TOKEN_XOREQ},
|
|
|
|
{"not", CPP_TOKEN_NOT},
|
|
|
|
{"not_eq", CPP_TOKEN_NOTEQ},
|
|
|
|
{"typeid", CPP_TOKEN_TYPEID},
|
|
|
|
{"compl", CPP_TOKEN_BIT_NOT},
|
|
|
|
|
|
|
|
{"void", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"bool", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"char", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"int", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"float", CPP_TOKEN_KEY_TYPE},
|
|
|
|
{"double", CPP_TOKEN_KEY_TYPE},
|
|
|
|
|
|
|
|
{"long", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
{"short", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
{"unsigned", CPP_TOKEN_KEY_MODIFIER},
|
|
|
|
|
|
|
|
{"const", CPP_TOKEN_KEY_QUALIFIER},
|
|
|
|
{"volatile", CPP_TOKEN_KEY_QUALIFIER},
|
|
|
|
|
|
|
|
{"asm", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"break", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"case", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"catch", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"continue", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"default", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"do", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"else", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"for", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"goto", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"if", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"return", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"switch", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"try", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"while", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
{"static_assert", CPP_TOKEN_KEY_CONTROL_FLOW},
|
|
|
|
|
|
|
|
{"const_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"dynamic_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"reinterpret_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
{"static_cast", CPP_TOKEN_KEY_CAST},
|
|
|
|
|
|
|
|
{"class", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"enum", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"struct", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"typedef", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"union", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"template", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
{"typename", CPP_TOKEN_KEY_TYPE_DECLARATION},
|
|
|
|
|
|
|
|
{"friend", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"namespace", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"private", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"protected", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"public", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
{"using", CPP_TOKEN_KEY_ACCESS},
|
|
|
|
|
|
|
|
{"extern", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"export", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"inline", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"static", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
{"virtual", CPP_TOKEN_KEY_LINKAGE},
|
|
|
|
|
|
|
|
{"alignas", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"explicit", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"noexcept", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"nullptr", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"operator", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"register", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"this", CPP_TOKEN_KEY_OTHER},
|
|
|
|
{"thread_local", CPP_TOKEN_KEY_OTHER},
|
|
|
|
};
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct FSM_State{
|
|
|
|
uint32_t transition_rule[256];
|
|
|
|
uint8_t override;
|
|
|
|
} FSM_State;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct FSM{
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM_State *states;
|
|
|
|
unsigned short count, max;
|
2016-04-08 00:21:24 +00:00
|
|
|
|
|
|
|
FSM_State *term_states;
|
|
|
|
unsigned short term_count, term_max;
|
|
|
|
|
|
|
|
unsigned char terminal_base;
|
|
|
|
|
|
|
|
char *comment;
|
2016-08-29 01:03:26 +00:00
|
|
|
} FSM;
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct FSM_Stack{
|
2016-04-08 00:21:24 +00:00
|
|
|
FSM *fsms;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t count, max;
|
2016-04-22 00:50:16 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t table_transition_state;
|
|
|
|
uint8_t final_state;
|
|
|
|
} FSM_Stack;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct Match_Node{
|
2016-03-31 04:05:47 +00:00
|
|
|
Match_Node *first_child;
|
|
|
|
Match_Node *next_sibling;
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t *words;
|
|
|
|
int32_t count, max;
|
|
|
|
int32_t index;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
FSM_State *state;
|
2016-08-29 01:03:26 +00:00
|
|
|
} Match_Node;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct Match_Tree{
|
2016-03-31 04:05:47 +00:00
|
|
|
Match_Node *nodes;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t count, max;
|
|
|
|
} Match_Tree;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct Match_Tree_Stack{
|
2016-04-08 00:21:24 +00:00
|
|
|
Match_Tree *trees;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t count, max;
|
|
|
|
} Match_Tree_Stack;
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct Future_FSM{
|
2016-04-08 00:21:24 +00:00
|
|
|
Match_Node *source;
|
2016-08-29 01:03:26 +00:00
|
|
|
} Future_FSM;
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct Future_FSM_Stack{
|
2016-04-08 00:21:24 +00:00
|
|
|
Future_FSM *futures;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t count, max;
|
|
|
|
} Future_FSM_Stack;
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM*
|
2016-04-08 00:21:24 +00:00
|
|
|
get_fsm(FSM_Stack *stack){
|
|
|
|
FSM* result = 0;
|
|
|
|
assert(stack->count < stack->max);
|
2016-04-22 00:50:16 +00:00
|
|
|
result = &stack->fsms[stack->count];
|
|
|
|
++stack->count;
|
2016-04-08 00:21:24 +00:00
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static Match_Tree*
|
2016-04-08 00:21:24 +00:00
|
|
|
get_tree(Match_Tree_Stack *stack){
|
|
|
|
Match_Tree* result = 0;
|
|
|
|
assert(stack->count < stack->max);
|
|
|
|
result = &stack->trees[stack->count++];
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM
|
|
|
|
fsm_init(uint16_t max, uint8_t terminal_base){
|
2016-04-08 00:21:24 +00:00
|
|
|
FSM fsm;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t memsize;
|
2016-04-08 00:21:24 +00:00
|
|
|
fsm.max = max;
|
|
|
|
fsm.count = 0;
|
|
|
|
memsize = sizeof(FSM_State)*fsm.max;
|
|
|
|
fsm.states = (FSM_State*)malloc(memsize);
|
|
|
|
|
|
|
|
fsm.term_max = max;
|
|
|
|
fsm.term_count = 0;
|
|
|
|
memsize = sizeof(FSM_State)*fsm.term_max;
|
|
|
|
fsm.term_states = (FSM_State*)malloc(memsize);
|
|
|
|
|
|
|
|
fsm.comment = 0;
|
2016-04-22 00:50:16 +00:00
|
|
|
fsm.terminal_base = terminal_base;
|
2016-04-08 00:21:24 +00:00
|
|
|
return(fsm);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-04-08 00:21:24 +00:00
|
|
|
fsm_add_comment(FSM *fsm, char *str){
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t comment_len;
|
|
|
|
int32_t str_len;
|
2016-04-08 00:21:24 +00:00
|
|
|
char *new_comment;
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
str_len = (int32_t)strlen(str);
|
2016-04-08 00:21:24 +00:00
|
|
|
|
|
|
|
if (fsm->comment != 0){
|
2016-08-29 01:03:26 +00:00
|
|
|
comment_len = (int32_t)strlen(fsm->comment);
|
2016-04-08 00:21:24 +00:00
|
|
|
new_comment = (char*)malloc(str_len + comment_len + 1);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-04-08 00:21:24 +00:00
|
|
|
memcpy(new_comment, fsm->comment, comment_len);
|
|
|
|
memcpy(new_comment + comment_len, str, str_len);
|
|
|
|
new_comment[comment_len + str_len] = 0;
|
|
|
|
|
|
|
|
free(fsm->comment);
|
|
|
|
fsm->comment = new_comment;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
fsm->comment = (char*)malloc(str_len + 1);
|
|
|
|
memcpy(fsm->comment, str, str_len);
|
|
|
|
fsm->comment[str_len] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static Match_Tree
|
|
|
|
tree_init(uint16_t max){
|
2016-04-08 00:21:24 +00:00
|
|
|
Match_Tree tree;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t memsize;
|
2016-04-08 00:21:24 +00:00
|
|
|
tree.max = max;
|
|
|
|
tree.count = 0;
|
|
|
|
memsize = sizeof(Match_Node)*tree.max;
|
|
|
|
tree.nodes = (Match_Node*)malloc(memsize);
|
|
|
|
return(tree);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static uint8_t
|
2016-04-08 00:21:24 +00:00
|
|
|
push_future_fsm(Future_FSM_Stack *stack, Match_Node *node){
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t index = 0;
|
2016-05-18 23:22:58 +00:00
|
|
|
Future_FSM *future = 0;
|
2016-04-08 00:21:24 +00:00
|
|
|
assert(stack->count < stack->max);
|
2016-05-18 23:22:58 +00:00
|
|
|
assert(stack->max < 256);
|
2016-08-29 01:03:26 +00:00
|
|
|
index = (uint8_t)(stack->count++);
|
2016-05-18 23:22:58 +00:00
|
|
|
future = &stack->futures[index];
|
2016-04-08 00:21:24 +00:00
|
|
|
future->source = node;
|
2016-05-18 23:22:58 +00:00
|
|
|
return(index);
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static Match_Node*
|
2016-03-31 04:05:47 +00:00
|
|
|
match_get_node(Match_Tree *tree){
|
|
|
|
Match_Node *result;
|
|
|
|
assert(tree->count < tree->max);
|
|
|
|
result = &tree->nodes[tree->count++];
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
|
|
|
match_init_node(Match_Node *node, int32_t match_count){
|
2016-03-31 04:05:47 +00:00
|
|
|
*node = {};
|
2016-08-29 01:03:26 +00:00
|
|
|
node->words = (int32_t*)malloc(sizeof(int32_t)*match_count);
|
2016-03-31 04:05:47 +00:00
|
|
|
node->max = match_count;
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-04-08 00:21:24 +00:00
|
|
|
match_copy_init_node(Match_Node *node, Match_Node *source){
|
|
|
|
*node = {};
|
|
|
|
node->max = source->count;
|
|
|
|
node->count = source->count;
|
2016-08-29 01:03:26 +00:00
|
|
|
node->words = (int32_t*)malloc(sizeof(int32_t)*source->count);
|
2016-04-08 00:21:24 +00:00
|
|
|
node->index = source->index;
|
2016-08-29 01:03:26 +00:00
|
|
|
memcpy(node->words, source->words, sizeof(int32_t)*source->count);
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
|
|
|
match_add_word(Match_Node *node, int32_t word){
|
2016-03-31 04:05:47 +00:00
|
|
|
assert(node->count < node->max);
|
|
|
|
node->words[node->count++] = word;
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_State*
|
|
|
|
fsm_get_state(FSM *fsm, uint32_t terminal_base){
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM_State *result;
|
2016-08-29 01:03:26 +00:00
|
|
|
uint16_t i;
|
2016-03-31 04:05:47 +00:00
|
|
|
assert(fsm->count < fsm->max);
|
|
|
|
result = &fsm->states[fsm->count++];
|
|
|
|
for (i = 0; i < 256; ++i){
|
2016-04-08 00:21:24 +00:00
|
|
|
result->transition_rule[i] = terminal_base;
|
2016-03-31 04:05:47 +00:00
|
|
|
}
|
2016-04-08 00:21:24 +00:00
|
|
|
result->override = 0;
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_State*
|
2016-05-18 23:22:58 +00:00
|
|
|
fsm_get_state(FSM *fsm){
|
|
|
|
FSM_State *result = fsm_get_state(fsm, fsm->terminal_base);
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_State*
|
2016-04-08 00:21:24 +00:00
|
|
|
fsm_get_term_state(FSM *fsm, unsigned char override){
|
|
|
|
FSM_State *result;
|
|
|
|
assert(fsm->term_count < fsm->term_max);
|
|
|
|
result = &fsm->term_states[fsm->term_count++];
|
|
|
|
result->override = override;
|
2016-03-31 04:05:47 +00:00
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static uint8_t
|
2016-03-31 04:05:47 +00:00
|
|
|
fsm_index(FSM *fsm, FSM_State *s){
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t result;
|
|
|
|
result = (uint8_t)(uint64_t)(s - fsm->states);
|
2016-04-08 00:21:24 +00:00
|
|
|
if (s->override){
|
|
|
|
result = fsm->terminal_base + s->override;
|
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-31 04:05:47 +00:00
|
|
|
fsm_add_transition(FSM_State *state, char c, unsigned char dest){
|
|
|
|
state->transition_rule[c] = dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Terminal_Lookup_Table{
|
2016-08-29 01:03:26 +00:00
|
|
|
uint32_t state_to_type[60];
|
|
|
|
uint8_t type_to_state[CPP_TOKEN_TYPE_COUNT];
|
|
|
|
uint8_t state_count;
|
2016-03-31 04:05:47 +00:00
|
|
|
};
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-05-18 23:22:58 +00:00
|
|
|
process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, FSM *fsm){
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t next_index = node->index + 1;
|
|
|
|
int32_t match_count = node->count;
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM_State *this_state = node->state;
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t terminal_base = fsm->terminal_base;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t i, j, *words = node->words;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
String_And_Flag saf;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t l;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
char c;
|
|
|
|
Match_Node *next_nodes[256];
|
|
|
|
Match_Node *newest_child = 0;
|
|
|
|
Match_Node *n;
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t unjunkify = 0;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
memset(next_nodes, 0, sizeof(next_nodes));
|
|
|
|
|
|
|
|
for (i = 0; i < match_count; ++i){
|
|
|
|
j = words[i];
|
|
|
|
saf = input[j];
|
2016-08-29 01:03:26 +00:00
|
|
|
l = (int32_t)strlen(saf.str);
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
if (next_index < l){
|
|
|
|
c = saf.str[next_index];
|
|
|
|
|
|
|
|
if (next_nodes[c] == 0){
|
|
|
|
next_nodes[c] = match_get_node(tree);
|
|
|
|
match_init_node(next_nodes[c], match_count);
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
next_nodes[c]->index = next_index;
|
2016-05-18 23:22:58 +00:00
|
|
|
next_nodes[c]->state = fsm_get_state(fsm);
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
if (newest_child == 0){
|
|
|
|
assert(node->first_child == 0);
|
|
|
|
node->first_child = next_nodes[c];
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
else{
|
|
|
|
assert(newest_child->next_sibling == 0);
|
|
|
|
newest_child->next_sibling = next_nodes[c];
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
newest_child = next_nodes[c];
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
match_add_word(next_nodes[c], j);
|
|
|
|
fsm_add_transition(this_state, c, fsm_index(fsm, next_nodes[c]->state));
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
else if (next_index == l){
|
2016-05-18 23:22:58 +00:00
|
|
|
assert(unjunkify == 0);
|
|
|
|
unjunkify = (unsigned char)saf.flags;
|
2016-03-31 04:05:47 +00:00
|
|
|
}
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
if (unjunkify){
|
|
|
|
for (i = 0; i < 256; ++i){
|
2016-04-08 00:21:24 +00:00
|
|
|
if (this_state->transition_rule[i] == terminal_base){
|
|
|
|
this_state->transition_rule[i] = terminal_base + unjunkify;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-05-18 23:22:58 +00:00
|
|
|
|
|
|
|
for (n = node->first_child; n; n = n->next_sibling){
|
|
|
|
process_match_node(input, n, tree, fsm);
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM
|
2016-03-31 04:05:47 +00:00
|
|
|
generate_pp_directive_fsm(){
|
|
|
|
Match_Tree tree;
|
|
|
|
FSM fsm;
|
2016-04-08 00:21:24 +00:00
|
|
|
Match_Node *root_node;
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM_State *root_state;
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t i;
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-04-22 00:50:16 +00:00
|
|
|
fsm = fsm_init(200, 200);
|
2016-04-08 00:21:24 +00:00
|
|
|
tree = tree_init(200);
|
2016-03-31 04:05:47 +00:00
|
|
|
|
2016-04-22 00:50:16 +00:00
|
|
|
root_state = fsm_get_state(&fsm);
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
root_node = match_get_node(&tree);
|
|
|
|
match_init_node(root_node, ArrayCount(preprop_strings));
|
2016-04-08 00:21:24 +00:00
|
|
|
for (i = 0; i < ArrayCount(preprop_strings); ++i){
|
2016-03-31 04:05:47 +00:00
|
|
|
root_node->words[i] = i;
|
|
|
|
}
|
|
|
|
root_node->count = ArrayCount(preprop_strings);
|
|
|
|
root_node->state = root_state;
|
|
|
|
root_node->index = -1;
|
2016-04-22 00:50:16 +00:00
|
|
|
process_match_node(preprop_strings, root_node, &tree, &fsm);
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
root_state->transition_rule[' '] = 0;
|
|
|
|
root_state->transition_rule['\t'] = 0;
|
|
|
|
root_state->transition_rule['\r'] = 0;
|
|
|
|
root_state->transition_rule['\v'] = 0;
|
|
|
|
root_state->transition_rule['\f'] = 0;
|
|
|
|
|
|
|
|
return(fsm);
|
|
|
|
}
|
|
|
|
|
2016-05-18 23:22:58 +00:00
|
|
|
/*
|
|
|
|
|
|
|
|
Each state needs a full set of transition rules. Most transitions should go into a
|
|
|
|
"not-a-keyword-state". The exceptions are:
|
|
|
|
1. When we see an alphanumeric character that is the next character of an actual keyword
|
|
|
|
i. May need to transition to a new table at this point.
|
|
|
|
2. When we have just seen an entire valid keyword, and the next thing we see is not alphanumeric.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define RealTerminateBase 65536
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static int32_t
|
2016-05-18 23:22:58 +00:00
|
|
|
char_is_alphanumeric(char x){
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t result = 0;
|
2016-05-18 23:22:58 +00:00
|
|
|
if ((x >= '0' && x <= '9') ||
|
|
|
|
(x >= 'A' && x <= 'Z') ||
|
|
|
|
(x >= 'a' && x <= 'z') ||
|
|
|
|
x == '_'){
|
|
|
|
result = 1;
|
|
|
|
}
|
|
|
|
return(result);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-05-18 23:22:58 +00:00
|
|
|
process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, FSM *fsm,
|
2016-08-29 01:03:26 +00:00
|
|
|
Terminal_Lookup_Table *terminal_table, int32_t levels_to_go,
|
2016-05-18 23:22:58 +00:00
|
|
|
Future_FSM_Stack *unfinished_fsms){
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t next_index = node->index + 1;
|
|
|
|
int32_t match_count = node->count;
|
|
|
|
int32_t *words = node->words;
|
2016-05-18 23:22:58 +00:00
|
|
|
FSM_State *this_state = node->state;
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
int32_t word_index = 0;
|
|
|
|
int32_t good_transition = 0;
|
|
|
|
int32_t len = 0;
|
|
|
|
int32_t i = 0;
|
2016-05-18 23:22:58 +00:00
|
|
|
|
|
|
|
String_And_Flag saf = {0};
|
|
|
|
|
|
|
|
Match_Node *next_nodes[256];
|
|
|
|
Match_Node *newest_child = 0;
|
|
|
|
Match_Node *n = 0;
|
|
|
|
char c = 0;
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t override = 0;
|
2016-05-18 23:22:58 +00:00
|
|
|
|
|
|
|
memset(next_nodes, 0, sizeof(next_nodes));
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-05-18 23:22:58 +00:00
|
|
|
for (i = 0; i < match_count; ++i){
|
|
|
|
word_index = words[i];
|
|
|
|
saf = input[word_index];
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
len = (int32_t)strlen(saf.str);
|
2016-05-18 23:22:58 +00:00
|
|
|
if (next_index < len){
|
|
|
|
c = saf.str[next_index];
|
|
|
|
|
|
|
|
if (next_nodes[c] == 0){
|
|
|
|
next_nodes[c] = match_get_node(tree);
|
|
|
|
match_init_node(next_nodes[c], match_count);
|
|
|
|
|
|
|
|
next_nodes[c]->index = next_index;
|
|
|
|
|
|
|
|
if (levels_to_go == 1){
|
|
|
|
override = push_future_fsm(unfinished_fsms, next_nodes[c]);
|
|
|
|
next_nodes[c]->state = fsm_get_term_state(fsm, override);
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
next_nodes[c]->state = fsm_get_state(fsm, RealTerminateBase);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (newest_child == 0){
|
|
|
|
assert(node->first_child == 0);
|
|
|
|
node->first_child = next_nodes[c];
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
assert(newest_child->next_sibling == 0);
|
|
|
|
newest_child->next_sibling = next_nodes[c];
|
|
|
|
}
|
|
|
|
newest_child = next_nodes[c];
|
|
|
|
}
|
|
|
|
|
|
|
|
match_add_word(next_nodes[c], word_index);
|
|
|
|
fsm_add_transition(this_state, c, fsm_index(fsm, next_nodes[c]->state));
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
assert(next_index == len);
|
|
|
|
assert(good_transition == 0);
|
|
|
|
good_transition = terminal_table->type_to_state[saf.flags] + RealTerminateBase;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (good_transition){
|
|
|
|
for (i = 0; i < 256; ++i){
|
|
|
|
if (!char_is_alphanumeric((char)i)){
|
|
|
|
this_state->transition_rule[i] = good_transition;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (levels_to_go != 1){
|
|
|
|
for (n = node->first_child; n; n = n->next_sibling){
|
|
|
|
process_match_node(input, n, tree, fsm, terminal_table, levels_to_go-1, unfinished_fsms);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
Whitespace_FSM
|
|
|
|
whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
|
|
|
|
if (wfsm.pp_state != LSPP_default){
|
|
|
|
if (c == '\n') wfsm.pp_state = LSPP_default;
|
|
|
|
}
|
|
|
|
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')){
|
|
|
|
wfsm.white_done = 1;
|
|
|
|
}
|
|
|
|
return(wfsm);
|
|
|
|
}
|
|
|
|
|
|
|
|
Lex_FSM
|
|
|
|
int_fsm(Lex_FSM fsm, char c){
|
|
|
|
switch (fsm.int_state){
|
|
|
|
case LSINT_default:
|
|
|
|
switch (c){
|
|
|
|
case 'u': case 'U': fsm.int_state = LSINT_u; break;
|
|
|
|
case 'l': fsm.int_state = LSINT_l; break;
|
|
|
|
case 'L': fsm.int_state = LSINT_L; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_u:
|
|
|
|
switch (c){
|
|
|
|
case 'l': fsm.int_state = LSINT_ul; break;
|
|
|
|
case 'L': fsm.int_state = LSINT_uL; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_l:
|
|
|
|
switch (c){
|
|
|
|
case 'l': fsm.int_state = LSINT_ll; break;
|
|
|
|
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_L:
|
|
|
|
switch (c){
|
|
|
|
case 'L': fsm.int_state = LSINT_ll; break;
|
|
|
|
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_ul:
|
|
|
|
switch (c){
|
|
|
|
case 'l': fsm.int_state = LSINT_extra; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_uL:
|
|
|
|
switch (c){
|
|
|
|
case 'L': fsm.int_state = LSINT_extra; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_ll:
|
|
|
|
switch (c){
|
|
|
|
case 'u': case 'U': fsm.int_state = LSINT_extra; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINT_extra:
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return(fsm);
|
|
|
|
}
|
|
|
|
|
|
|
|
Lex_FSM
|
|
|
|
main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
|
|
|
if (c == 0) fsm.emit_token = 1;
|
|
|
|
else
|
|
|
|
switch (pp_state){
|
|
|
|
case LSPP_error:
|
|
|
|
fsm.state = LS_error_message;
|
|
|
|
if (c == '\n') fsm.emit_token = 1;
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSPP_include:
|
|
|
|
switch (fsm.state){
|
|
|
|
case LSINC_default:
|
|
|
|
switch (c){
|
|
|
|
case '"': fsm.state = LSINC_quotes; break;
|
|
|
|
case '<': fsm.state = LSINC_pointy; break;
|
|
|
|
default: fsm.state = LSINC_junk; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINC_quotes:
|
|
|
|
if (c == '"') fsm.emit_token = 1;
|
2016-07-18 23:57:08 +00:00
|
|
|
else if (c == '\n'){
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
fsm.state = LSINC_junk;
|
|
|
|
}
|
2016-03-24 01:05:28 +00:00
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINC_pointy:
|
|
|
|
if (c == '>') fsm.emit_token = 1;
|
2016-07-18 23:57:08 +00:00
|
|
|
else if (c == '\n'){
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
fsm.state = LSINC_junk;
|
|
|
|
}
|
2016-03-24 01:05:28 +00:00
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LSINC_junk:
|
|
|
|
if (c == '\n') fsm.emit_token = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
default:
|
|
|
|
switch (fsm.state){
|
|
|
|
case LS_default:
|
|
|
|
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){
|
|
|
|
fsm.state = LS_identifier;
|
|
|
|
}
|
|
|
|
else if (c >= '1' && c <= '9'){
|
|
|
|
fsm.state = LS_number;
|
|
|
|
}
|
|
|
|
else if (c == '0'){
|
|
|
|
fsm.state = LS_number0;
|
|
|
|
}
|
|
|
|
else switch (c){
|
|
|
|
case '\'': fsm.state = LS_char; break;
|
|
|
|
case '"': fsm.state = LS_string; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '/': fsm.state = LS_comment_pre; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '.': fsm.state = LS_dot; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '<': fsm.state = LS_less; break;
|
|
|
|
case '>': fsm.state = LS_more; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '-': fsm.state = LS_minus; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '&': fsm.state = LS_and; break;
|
|
|
|
case '|': fsm.state = LS_or; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '+': fsm.state = LS_plus; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case ':': fsm.state = LS_colon; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '*': fsm.state = LS_star; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '%': fsm.state = LS_modulo; break;
|
|
|
|
case '^': fsm.state = LS_caret; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case '=': fsm.state = LS_eq; break;
|
|
|
|
case '!': fsm.state = LS_bang; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
case '#':
|
|
|
|
if (pp_state == LSPP_default){
|
|
|
|
fsm.state = LS_pp;
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
else{
|
2016-05-18 23:22:58 +00:00
|
|
|
fsm.state = LS_pound;
|
|
|
|
}
|
2016-03-31 04:05:47 +00:00
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
#define OperCase(op,type) case op: fsm.emit_token = 1; break;
|
|
|
|
OperCase('{', CPP_TOKEN_BRACE_OPEN);
|
|
|
|
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
|
|
|
|
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
|
|
|
|
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
OperCase('~', CPP_TOKEN_TILDE);
|
|
|
|
OperCase(',', CPP_TOKEN_COMMA);
|
|
|
|
OperCase(';', CPP_TOKEN_SEMICOLON);
|
|
|
|
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
OperCase('@', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('$', CPP_TOKEN_JUNK);
|
|
|
|
OperCase('\\', CPP_TOKEN_JUNK);
|
|
|
|
#undef OperCase
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_identifier:
|
|
|
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_pound:
|
2016-03-31 04:05:47 +00:00
|
|
|
switch (c){
|
|
|
|
case '#': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
2016-03-24 01:05:28 +00:00
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-04-08 00:21:24 +00:00
|
|
|
case LS_pp:break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_char:
|
2016-03-24 14:01:53 +00:00
|
|
|
case LS_char_multiline:
|
2016-03-24 01:05:28 +00:00
|
|
|
switch(c){
|
2016-07-18 23:57:08 +00:00
|
|
|
case '\n': case '\'': fsm.emit_token = 1; break;
|
2016-03-24 01:05:28 +00:00
|
|
|
case '\\': fsm.state = LS_char_slashed; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_char_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
2016-03-24 14:01:53 +00:00
|
|
|
case '\n': fsm.state = LS_char_multiline; break;
|
2016-03-24 01:05:28 +00:00
|
|
|
default: fsm.state = LS_char; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_string:
|
2016-03-24 14:01:53 +00:00
|
|
|
case LS_string_multiline:
|
2016-03-24 01:05:28 +00:00
|
|
|
switch(c){
|
2016-07-18 23:57:08 +00:00
|
|
|
case '\n': case '\"': fsm.emit_token = 1; break;
|
2016-03-24 01:05:28 +00:00
|
|
|
case '\\': fsm.state = LS_string_slashed; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
}
|
2016-03-24 01:05:28 +00:00
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_string_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
2016-03-24 14:01:53 +00:00
|
|
|
case '\n': fsm.state = LS_string_multiline; break;
|
2016-03-24 01:05:28 +00:00
|
|
|
default: fsm.state = LS_string; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_number:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
fsm.state = LS_number;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
switch (c){
|
|
|
|
case '.': fsm.state = LS_float; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_number0:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
fsm.state = LS_number;
|
|
|
|
}
|
|
|
|
else if (c == 'x'){
|
|
|
|
fsm.state = LS_hex;
|
|
|
|
}
|
|
|
|
else if (c == '.'){
|
|
|
|
fsm.state = LS_float;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_float:
|
|
|
|
if (!(c >= '0' && c <= '9')){
|
|
|
|
switch (c){
|
|
|
|
case 'e': fsm.state = LS_crazy_float0; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_crazy_float0:
|
|
|
|
{
|
|
|
|
if ((c >= '0' && c <= '9') || c == '-'){
|
|
|
|
fsm.state = LS_crazy_float1;
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_crazy_float1:
|
|
|
|
{
|
|
|
|
if (!(c >= '0' && c <= '9')){
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_hex:
|
|
|
|
if (!(c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F')){
|
|
|
|
fsm.emit_token = 1;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_dot:
|
|
|
|
if (c >= '0' && c <= '9'){
|
|
|
|
fsm.state = LS_float;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
switch (c){
|
|
|
|
case '.': fsm.state = LS_ellipsis; break;
|
|
|
|
case '*': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_ellipsis: fsm.emit_token = 1; break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_less:
|
|
|
|
switch (c){
|
|
|
|
case '<': fsm.state = LS_less_less; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_less_less:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_more:
|
|
|
|
switch (c){
|
|
|
|
case '>': fsm.state = LS_more_more; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_more_more:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_comment_pre:
|
|
|
|
switch (c){
|
|
|
|
case '/': fsm.state = LS_comment; break;
|
|
|
|
case '*': fsm.state = LS_comment_block; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_comment:
|
|
|
|
switch (c){
|
|
|
|
case '\\': fsm.state = LS_comment_slashed; break;
|
|
|
|
case '\n': fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_comment_slashed:
|
|
|
|
switch (c){
|
|
|
|
case '\r': case '\f': case '\v': break;
|
|
|
|
default: fsm.state = LS_comment; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_comment_block:
|
|
|
|
switch (c){
|
|
|
|
case '*': fsm.state = LS_comment_block_ending; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_comment_block_ending:
|
|
|
|
switch (c){
|
|
|
|
case '*': fsm.state = LS_comment_block_ending; break;
|
|
|
|
case '/': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.state = LS_comment_block; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_minus:
|
|
|
|
switch (c){
|
|
|
|
case '>': fsm.state = LS_arrow; break;
|
|
|
|
case '-': fsm.emit_token = 1; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_arrow:
|
|
|
|
switch (c){
|
|
|
|
case '*': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_and:
|
|
|
|
switch (c){
|
|
|
|
case '&': fsm.emit_token = 1; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_or:
|
|
|
|
switch (c){
|
|
|
|
case '|': fsm.emit_token = 1; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_plus:
|
|
|
|
switch (c){
|
|
|
|
case '+': fsm.emit_token = 1; break;
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_colon:
|
|
|
|
switch (c){
|
|
|
|
case ':': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_star:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_modulo:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_caret:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_eq:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
2016-07-18 23:57:08 +00:00
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
case LS_bang:
|
|
|
|
switch (c){
|
|
|
|
case '=': fsm.emit_token = 1; break;
|
|
|
|
default: fsm.emit_token = 1; break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return(fsm);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 14:01:53 +00:00
|
|
|
begin_table(FILE *file, char *type, char *group_name, char *table_name){
|
2016-08-29 01:03:26 +00:00
|
|
|
fprintf(file, "%s %s_%s[] = {\n", type, group_name, table_name);
|
2016-03-24 14:01:53 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 01:05:28 +00:00
|
|
|
begin_table(FILE *file, char *type, char *table_name){
|
2016-08-29 01:03:26 +00:00
|
|
|
fprintf(file, "%s %s[] = {\n", type, table_name);
|
2016-03-24 01:05:28 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 14:01:53 +00:00
|
|
|
begin_ptr_table(FILE *file, char *type, char *table_name){
|
2016-08-29 01:03:26 +00:00
|
|
|
fprintf(file, "%s * %s[] = {\n", type, table_name);
|
2016-03-24 14:01:53 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
|
|
|
do_table_item(FILE *file, uint16_t item){
|
|
|
|
fprintf(file, "%2d,", (int32_t)item);
|
2016-03-24 14:01:53 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item_direct(FILE *file, char *item, char *tail){
|
|
|
|
fprintf(file, "%s%s,", item, tail);
|
2016-03-24 01:05:28 +00:00
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 01:05:28 +00:00
|
|
|
end_row(FILE *file){
|
|
|
|
fprintf(file, "\n");
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 01:05:28 +00:00
|
|
|
end_table(FILE *file){
|
|
|
|
fprintf(file, "};\n\n");
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct FSM_Tables{
|
|
|
|
uint8_t *full_transition_table;
|
|
|
|
uint8_t *marks;
|
|
|
|
uint8_t *eq_class;
|
|
|
|
uint8_t *eq_class_rep;
|
|
|
|
uint8_t *reduced_transition_table;
|
|
|
|
|
|
|
|
uint8_t eq_class_counter;
|
|
|
|
uint16_t state_count;
|
|
|
|
} FSM_Tables;
|
|
|
|
|
|
|
|
static void
|
|
|
|
allocate_full_tables(FSM_Tables *table, uint8_t state_count){
|
|
|
|
table->full_transition_table = (uint8_t*)malloc(state_count * 256);
|
|
|
|
table->marks = (uint8_t*)malloc(state_count * 256);
|
|
|
|
table->eq_class = (uint8_t*)malloc(state_count * 256);
|
|
|
|
table->eq_class_rep = (uint8_t*)malloc(state_count * 256);
|
2016-03-31 04:05:47 +00:00
|
|
|
table->state_count = state_count;
|
|
|
|
memset(table->marks, 0, 256);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
|
|
|
do_table_reduction(FSM_Tables *table, uint16_t state_count){
|
2016-03-26 08:06:41 +00:00
|
|
|
{
|
|
|
|
table->eq_class_counter = 0;
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t *c_line = table->full_transition_table;
|
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
2016-03-26 08:06:41 +00:00
|
|
|
if (table->marks[c] == 0){
|
|
|
|
table->eq_class[c] = table->eq_class_counter;
|
2016-08-29 01:03:26 +00:00
|
|
|
table->eq_class_rep[table->eq_class_counter] = (uint8_t)c;
|
|
|
|
uint8_t *c2_line = c_line + state_count;
|
|
|
|
for (uint16_t c2 = c + 1; c2 < 256; ++c2){
|
2016-03-26 08:06:41 +00:00
|
|
|
if (memcmp(c_line, c2_line, state_count) == 0){
|
|
|
|
table->marks[c2] = 1;
|
|
|
|
table->eq_class[c2] = table->eq_class_counter;
|
|
|
|
}
|
|
|
|
c2_line += state_count;
|
|
|
|
}
|
|
|
|
++table->eq_class_counter;
|
|
|
|
}
|
|
|
|
c_line += state_count;
|
|
|
|
}
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
table->reduced_transition_table = (uint8_t*)malloc(state_count * table->eq_class_counter);
|
2016-03-26 08:06:41 +00:00
|
|
|
{
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t *r_line = table->reduced_transition_table;
|
|
|
|
for (uint16_t eq = 0; eq < table->eq_class_counter; ++eq){
|
|
|
|
uint8_t *u_line = table->full_transition_table + state_count * table->eq_class_rep[eq];
|
2016-03-26 08:06:41 +00:00
|
|
|
memcpy(r_line, u_line, state_count);
|
|
|
|
r_line += state_count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_Tables
|
2016-03-24 14:01:53 +00:00
|
|
|
generate_whitespace_skip_table(){
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t state_count = LSPP_count;
|
2016-03-24 14:01:53 +00:00
|
|
|
FSM_Tables table;
|
2016-03-31 04:05:47 +00:00
|
|
|
allocate_full_tables(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
int32_t i = 0;
|
2016-03-24 14:01:53 +00:00
|
|
|
Whitespace_FSM wfsm = {0};
|
|
|
|
Whitespace_FSM new_wfsm;
|
2016-08-29 01:03:26 +00:00
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
|
|
|
for (uint8_t state = 0; state < state_count; ++state){
|
2016-03-24 14:01:53 +00:00
|
|
|
wfsm.pp_state = state;
|
|
|
|
wfsm.white_done = 0;
|
2016-08-29 01:03:26 +00:00
|
|
|
new_wfsm = whitespace_skip_fsm(wfsm, (uint8_t)c);
|
2016-03-24 14:01:53 +00:00
|
|
|
table.full_transition_table[i++] = new_wfsm.pp_state + state_count*new_wfsm.white_done;
|
|
|
|
}
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-26 08:06:41 +00:00
|
|
|
do_table_reduction(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-26 08:06:41 +00:00
|
|
|
return(table);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_Tables
|
2016-03-26 08:06:41 +00:00
|
|
|
generate_int_table(){
|
2016-08-29 01:03:26 +00:00
|
|
|
uint8_t state_count = LSINT_count;
|
2016-03-26 08:06:41 +00:00
|
|
|
FSM_Tables table;
|
2016-03-31 04:05:47 +00:00
|
|
|
allocate_full_tables(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
int32_t i = 0;
|
2016-03-26 08:06:41 +00:00
|
|
|
Lex_FSM fsm = {0};
|
|
|
|
Lex_FSM new_fsm;
|
2016-08-29 01:03:26 +00:00
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
|
|
|
for (uint8_t state = 0; state < state_count; ++state){
|
2016-03-26 08:06:41 +00:00
|
|
|
fsm.int_state = state;
|
|
|
|
fsm.emit_token = 0;
|
2016-08-29 01:03:26 +00:00
|
|
|
new_fsm = int_fsm(fsm, (uint8_t)c);
|
2016-03-26 08:06:41 +00:00
|
|
|
table.full_transition_table[i++] = new_fsm.int_state + state_count*new_fsm.emit_token;
|
2016-03-24 14:01:53 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-26 08:06:41 +00:00
|
|
|
do_table_reduction(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-24 14:01:53 +00:00
|
|
|
return(table);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_Tables
|
|
|
|
generate_fsm_table(uint8_t pp_state){
|
|
|
|
uint8_t state_count = LS_count;
|
2016-03-24 14:01:53 +00:00
|
|
|
FSM_Tables table;
|
2016-03-31 04:05:47 +00:00
|
|
|
allocate_full_tables(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
int32_t i = 0;
|
2016-03-24 01:05:28 +00:00
|
|
|
Lex_FSM fsm = {0};
|
|
|
|
Lex_FSM new_fsm;
|
2016-08-29 01:03:26 +00:00
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
|
|
|
for (uint8_t state = 0; state < state_count; ++state){
|
2016-03-24 01:05:28 +00:00
|
|
|
fsm.state = state;
|
|
|
|
fsm.emit_token = 0;
|
2016-08-29 01:03:26 +00:00
|
|
|
new_fsm = main_fsm(fsm, pp_state, (uint8_t)c);
|
2016-03-24 14:01:53 +00:00
|
|
|
table.full_transition_table[i++] = new_fsm.state + state_count*new_fsm.emit_token;
|
2016-03-24 01:05:28 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-26 08:06:41 +00:00
|
|
|
do_table_reduction(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-24 14:01:53 +00:00
|
|
|
return(table);
|
|
|
|
}
|
2016-03-24 01:05:28 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-03-24 14:01:53 +00:00
|
|
|
render_fsm_table(FILE *file, FSM_Tables tables, char *group_name){
|
2016-08-29 01:03:26 +00:00
|
|
|
begin_table(file, "uint16_t", group_name, "eq_classes");
|
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item(file, tables.eq_class[c]*tables.state_count);
|
2016-03-24 01:05:28 +00:00
|
|
|
}
|
|
|
|
end_row(file);
|
|
|
|
end_table(file);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
fprintf(file, "const int32_t num_%s_eq_classes = %d;\n\n", group_name, tables.eq_class_counter);
|
|
|
|
|
|
|
|
int32_t i = 0;
|
|
|
|
begin_table(file, "uint8_t", group_name, "table");
|
|
|
|
for (uint16_t c = 0; c < tables.eq_class_counter; ++c){
|
|
|
|
for (uint8_t state = 0; state < tables.state_count; ++state){
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item(file, tables.reduced_transition_table[i++]);
|
|
|
|
}
|
|
|
|
end_row(file);
|
|
|
|
}
|
|
|
|
end_table(file);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
|
|
|
render_variable(FILE *file, char *type, char *variable, uint32_t x){
|
2016-03-31 04:05:47 +00:00
|
|
|
fprintf(file, "%s %s = %d;\n\n", type, variable, x);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static void
|
2016-04-08 00:21:24 +00:00
|
|
|
render_comment(FILE *file, char *comment){
|
|
|
|
fprintf(file, "/*\n%s*/\n", comment);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
typedef struct PP_Names{
|
|
|
|
uint8_t pp_state;
|
2016-03-24 14:01:53 +00:00
|
|
|
char *name;
|
2016-08-29 01:03:26 +00:00
|
|
|
} PP_Names;
|
2016-03-24 14:01:53 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static PP_Names pp_names[] = {
|
2016-03-24 14:01:53 +00:00
|
|
|
{LSPP_default, "main_fsm"},
|
|
|
|
{LSPP_include, "pp_include_fsm"},
|
|
|
|
{LSPP_macro_identifier, "pp_macro_fsm"},
|
|
|
|
{LSPP_identifier, "pp_identifier_fsm"},
|
|
|
|
{LSPP_body_if, "pp_body_if_fsm"},
|
|
|
|
{LSPP_body, "pp_body_fsm"},
|
|
|
|
{LSPP_number, "pp_number_fsm"},
|
|
|
|
{LSPP_error, "pp_error_fsm"},
|
|
|
|
{LSPP_junk, "pp_junk_fsm"},
|
|
|
|
};
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
static FSM_Tables
|
|
|
|
generate_table_from_abstract_fsm(FSM fsm, uint8_t real_term_base){
|
|
|
|
uint8_t state_count = (uint8_t )fsm.count;
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM_Tables table;
|
2016-05-18 23:22:58 +00:00
|
|
|
|
2016-04-08 00:21:24 +00:00
|
|
|
allocate_full_tables(&table, state_count);
|
2016-08-29 01:03:26 +00:00
|
|
|
|
|
|
|
int32_t i = 0;
|
|
|
|
uint32_t new_state;
|
|
|
|
for (uint16_t c = 0; c < 256; ++c){
|
|
|
|
for (uint8_t state = 0; state < state_count; ++state){
|
2016-03-31 04:05:47 +00:00
|
|
|
new_state = fsm.states[state].transition_rule[c];
|
2016-05-18 23:22:58 +00:00
|
|
|
if (new_state >= RealTerminateBase){
|
|
|
|
new_state = new_state - RealTerminateBase + real_term_base;
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
table.full_transition_table[i++] = (uint8_t)new_state;
|
2016-04-08 00:21:24 +00:00
|
|
|
}
|
|
|
|
}
|
2016-08-29 01:03:26 +00:00
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
do_table_reduction(&table, state_count);
|
|
|
|
|
|
|
|
return(table);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(){
|
2016-03-24 14:01:53 +00:00
|
|
|
FILE *file;
|
|
|
|
file = fopen("4cpp_lexer_tables.c", "wb");
|
2016-03-24 01:05:28 +00:00
|
|
|
|
2016-03-24 14:01:53 +00:00
|
|
|
FSM_Tables wtables = generate_whitespace_skip_table();
|
|
|
|
render_fsm_table(file, wtables, "whitespace_fsm");
|
2016-03-24 01:05:28 +00:00
|
|
|
|
2016-03-26 08:06:41 +00:00
|
|
|
FSM_Tables itables = generate_int_table();
|
|
|
|
render_fsm_table(file, itables, "int_fsm");
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
begin_table(file, "uint8_t", "multiline_state_table");
|
|
|
|
for (uint8_t state = 0; state < LS_count; ++state){
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item(file, (state == LS_string_multiline || state == LS_char_multiline));
|
|
|
|
}
|
|
|
|
end_row(file);
|
|
|
|
end_table(file);
|
2016-04-08 00:21:24 +00:00
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
|
2016-03-24 14:01:53 +00:00
|
|
|
assert(i == pp_names[i].pp_state);
|
|
|
|
FSM_Tables tables = generate_fsm_table(pp_names[i].pp_state);
|
|
|
|
render_fsm_table(file, tables, pp_names[i].name);
|
|
|
|
}
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
begin_ptr_table(file, "uint16_t", "get_eq_classes");
|
|
|
|
for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item_direct(file, pp_names[i].name, "_eq_classes");
|
2016-03-24 01:05:28 +00:00
|
|
|
end_row(file);
|
2016-03-24 14:01:53 +00:00
|
|
|
}
|
2016-03-24 01:05:28 +00:00
|
|
|
end_table(file);
|
|
|
|
|
2016-08-29 01:03:26 +00:00
|
|
|
begin_ptr_table(file, "uint8_t", "get_table");
|
|
|
|
for (int32_t i = 0; i < ArrayCount(pp_names); ++i){
|
2016-03-24 14:01:53 +00:00
|
|
|
do_table_item_direct(file, pp_names[i].name, "_table");
|
2016-03-24 01:05:28 +00:00
|
|
|
end_row(file);
|
|
|
|
}
|
|
|
|
end_table(file);
|
|
|
|
|
2016-03-31 04:05:47 +00:00
|
|
|
FSM pp_directive_fsm = generate_pp_directive_fsm();
|
2016-05-18 23:22:58 +00:00
|
|
|
FSM_Tables pp_directive_tables = generate_table_from_abstract_fsm(pp_directive_fsm, 0);
|
2016-03-31 04:05:47 +00:00
|
|
|
|
|
|
|
render_fsm_table(file, pp_directive_tables, "pp_directive");
|
2016-08-29 01:03:26 +00:00
|
|
|
render_variable(file, "uint8_t", "LSDIR_default", 0);
|
|
|
|
render_variable(file, "uint8_t", "LSDIR_count", pp_directive_fsm.count);
|
|
|
|
render_variable(file, "uint8_t", "pp_directive_terminal_base", pp_directive_fsm.terminal_base);
|
|
|
|
|
2016-03-24 01:05:28 +00:00
|
|
|
fclose(file);
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// BOTTOM
|
|
|
|
|
|
|
|
|