work on new lexer
parent
e1a03017e8
commit
5e56483ec0
18
4cpp_lexer.h
18
4cpp_lexer.h
|
@ -65,6 +65,19 @@ NOTES ON USE:
|
|||
|
||||
#include "4cpp_lexer_types.h"
|
||||
|
||||
struct Cpp_Lex_Data{
|
||||
Cpp_Preprocessor_State pp_state;
|
||||
int pos;
|
||||
int complete;
|
||||
};
|
||||
|
||||
struct Cpp_Read_Result{
|
||||
Cpp_Token token;
|
||||
int pos;
|
||||
char newline;
|
||||
char has_result;
|
||||
};
|
||||
|
||||
Cpp_File
|
||||
data_as_cpp_file(Data data){
|
||||
Cpp_File result;
|
||||
|
@ -140,7 +153,6 @@ FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token);
|
|||
FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token);
|
||||
|
||||
inline Cpp_Lex_Data cpp_lex_data_zero() { Cpp_Lex_Data data = {(Cpp_Preprocessor_State)0}; return(data); }
|
||||
inline Cpp_Token_Stack cpp_token_stack_zero() { Cpp_Token_Stack stack={0}; return(stack); }
|
||||
|
||||
FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex);
|
||||
|
||||
|
@ -1266,10 +1278,10 @@ cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex_data){
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
result.token.state_flags = state_flags;
|
||||
result.has_result = has_result;
|
||||
|
||||
|
||||
*lex_data = lex;
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -209,23 +209,15 @@ enum Cpp_Preprocessor_State{
|
|||
CPP_LEX_PP_COUNT
|
||||
};
|
||||
|
||||
struct Cpp_Lex_Data{
|
||||
Cpp_Preprocessor_State pp_state;
|
||||
int pos;
|
||||
int complete;
|
||||
};
|
||||
|
||||
struct Cpp_Read_Result{
|
||||
Cpp_Token token;
|
||||
int pos;
|
||||
char newline;
|
||||
char has_result;
|
||||
};
|
||||
|
||||
struct Cpp_Token_Stack{
|
||||
Cpp_Token *tokens;
|
||||
int count, max_count;
|
||||
};
|
||||
inline Cpp_Token_Stack
|
||||
cpp_token_stack_zero(){
|
||||
Cpp_Token_Stack stack={0};
|
||||
return(stack);
|
||||
}
|
||||
|
||||
struct Cpp_Token_Merge{
|
||||
Cpp_Token new_token;
|
||||
|
|
|
@ -809,6 +809,40 @@ Job_Callback_Sig(job_full_lex){
|
|||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
tokens.count = 0;
|
||||
|
||||
#if 0
|
||||
|
||||
b32 still_lexing = 1;
|
||||
|
||||
Lex_Data lex = {0};
|
||||
|
||||
do{
|
||||
i32 result =
|
||||
cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048);
|
||||
|
||||
switch (result){
|
||||
case LexNeedChunk: Assert(!"Invalid Path"); break;
|
||||
|
||||
case LexNeedTokenMemory:
|
||||
if (system->check_cancel(thread)){
|
||||
return;
|
||||
}
|
||||
system->grow_thread_memory(memory);
|
||||
tokens.tokens = (Cpp_Token*)memory->data;
|
||||
tokens.max_count = memory->size / sizeof(Cpp_Token);
|
||||
break;
|
||||
|
||||
case LexHitTokenLimit:
|
||||
if (system->check_cancel(thread)){
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
case LexFinished: still_lexing = 0; break;
|
||||
}
|
||||
} while (still_lexing);
|
||||
|
||||
#else
|
||||
|
||||
Cpp_Lex_Data status = {};
|
||||
|
||||
do{
|
||||
|
@ -836,6 +870,10 @@ Job_Callback_Sig(job_full_lex){
|
|||
}
|
||||
} while(!status.complete);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1));
|
||||
|
||||
system->acquire_lock(FRAME_LOCK);
|
||||
|
@ -948,9 +986,9 @@ file_relex_parallel(System_Functions *system,
|
|||
i32 shift_amount = relex_space.count - delete_amount;
|
||||
|
||||
if (shift_amount != 0){
|
||||
int new_count = stack->count + shift_amount;
|
||||
i32 new_count = stack->count + shift_amount;
|
||||
if (new_count > stack->max_count){
|
||||
int new_max = LargeRoundUp(new_count, Kbytes(1));
|
||||
i32 new_max = LargeRoundUp(new_count, Kbytes(1));
|
||||
stack->tokens = (Cpp_Token*)
|
||||
general_memory_reallocate(general, stack->tokens,
|
||||
stack->count*sizeof(Cpp_Token),
|
||||
|
@ -958,7 +996,7 @@ file_relex_parallel(System_Functions *system,
|
|||
stack->max_count = new_max;
|
||||
}
|
||||
|
||||
int shift_size = stack->count - relex_end;
|
||||
i32 shift_size = stack->count - relex_end;
|
||||
if (shift_size > 0){
|
||||
Cpp_Token *old_base = stack->tokens + relex_end;
|
||||
memmove(old_base + shift_amount, old_base,
|
||||
|
@ -3238,7 +3276,6 @@ try_kill_file(System_Functions *system, Models *models,
|
|||
}
|
||||
else{
|
||||
kill_file(system, models, file, string_zero());
|
||||
view_show_file(view);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3286,6 +3323,7 @@ interactive_view_complete(System_Functions *system, View *view, String dest, i32
|
|||
|
||||
case IAct_Kill:
|
||||
try_kill_file(system, models, 0, 0, dest);
|
||||
view_show_file(view);
|
||||
break;
|
||||
|
||||
case IAct_Sure_To_Close:
|
||||
|
|
3
TODO.txt
3
TODO.txt
|
@ -99,6 +99,9 @@
|
|||
; [X] feedback messages
|
||||
; [X] feedback message API
|
||||
; [X] kill rect
|
||||
; [X] add high DPI support
|
||||
;
|
||||
; [] OS font rendering
|
||||
;
|
||||
; [] file status in custom API
|
||||
; [] user file bar string
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,7 @@
|
|||
#ifndef FCPP_NEW_LEXER_INC
|
||||
#define FCPP_NEW_LEXER_INC
|
||||
|
||||
#include "..\4cpp_lexer_types.h"
|
||||
#include "4cpp_lexer_fsms.h"
|
||||
#include "4cpp_lexer_tables.c"
|
||||
|
||||
|
@ -286,22 +287,35 @@ cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){
|
|||
return result;
|
||||
}
|
||||
|
||||
lexer_link void
|
||||
cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){
|
||||
lexer_link int
|
||||
cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){
|
||||
Cpp_Token_Merge merge = {(Cpp_Token_Type)0};
|
||||
Cpp_Token prev_token = {(Cpp_Token_Type)0};
|
||||
|
||||
if (*token_i > 0){
|
||||
prev_token = out_tokens[*token_i - 1];
|
||||
if (token_i > 0){
|
||||
prev_token = out_tokens[token_i - 1];
|
||||
merge = new_lex::cpp_attempt_token_merge(prev_token, token);
|
||||
if (merge.did_merge){
|
||||
out_tokens[*token_i - 1] = merge.new_token;
|
||||
out_tokens[token_i - 1] = merge.new_token;
|
||||
}
|
||||
}
|
||||
|
||||
if (!merge.did_merge){
|
||||
out_tokens[(*token_i)++] = token;
|
||||
out_tokens[token_i++] = token;
|
||||
}
|
||||
|
||||
return(token_i);
|
||||
}
|
||||
|
||||
lexer_link bool
|
||||
cpp_push_token_nonalloc(Cpp_Token_Stack *out_tokens, Cpp_Token token){
|
||||
bool result = 0;
|
||||
if (out_tokens->count == out_tokens->max_count){
|
||||
out_tokens->count =
|
||||
cpp_place_token_nonalloc(out_tokens->tokens, out_tokens->count, token);
|
||||
result = 1;
|
||||
}
|
||||
return(result);
|
||||
}
|
||||
|
||||
struct Lex_Data{
|
||||
|
@ -311,15 +325,13 @@ struct Lex_Data{
|
|||
|
||||
int pos;
|
||||
int pos_overide;
|
||||
int chunk_pos;
|
||||
|
||||
Lex_FSM fsm;
|
||||
Whitespace_FSM wfsm;
|
||||
unsigned char pp_state;
|
||||
unsigned char completed;
|
||||
|
||||
unsigned short *key_eq_classes;
|
||||
unsigned char *key_table;
|
||||
|
||||
Cpp_Token token;
|
||||
|
||||
int __pc__;
|
||||
|
@ -335,20 +347,27 @@ struct Lex_Data{
|
|||
token_stack_out->count = token_i;\
|
||||
*S_ptr = S; S_ptr->__pc__ = -1; return(n); }
|
||||
|
||||
enum Lex_Result{
|
||||
LexFinished,
|
||||
LexNeedChunk,
|
||||
LexNeedTokenMemory,
|
||||
LexHitTokenLimit
|
||||
};
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
|
||||
Lex_Data S = *S_ptr;
|
||||
|
||||
|
||||
Cpp_Token *out_tokens = token_stack_out->tokens;
|
||||
int token_i = token_stack_out->count;
|
||||
int max_token_i = token_stack_out->max_count;
|
||||
|
||||
|
||||
Pos_Update_Rule pos_update_rule = PUR_none;
|
||||
|
||||
|
||||
char c = 0;
|
||||
|
||||
int end_pos = size + S.pos;
|
||||
chunk -= S.pos;
|
||||
|
||||
int end_pos = size + S.chunk_pos;
|
||||
chunk -= S.chunk_pos;
|
||||
|
||||
switch (S.__pc__){
|
||||
DrCase(1);
|
||||
|
@ -357,7 +376,6 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
DrCase(4);
|
||||
DrCase(5);
|
||||
DrCase(6);
|
||||
DrCase(7);
|
||||
}
|
||||
|
||||
for (;;){
|
||||
|
@ -372,7 +390,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.wfsm.white_done = (S.wfsm.pp_state >= LSPP_count);
|
||||
|
||||
if (S.wfsm.white_done == 0){
|
||||
DrYield(4, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(4, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -380,7 +399,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.pp_state = S.wfsm.pp_state;
|
||||
if (S.pp_state >= LSPP_count){
|
||||
S.pp_state -= LSPP_count;
|
||||
}
|
||||
}
|
||||
|
||||
S.token_start = S.pos;
|
||||
S.tb_pos = 0;
|
||||
|
@ -388,19 +407,20 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
for(;;){
|
||||
unsigned short *eq_classes = get_eq_classes[S.pp_state];
|
||||
unsigned char *fsm_table = get_table[S.pp_state];
|
||||
|
||||
|
||||
for (; S.fsm.state < LS_count && S.pos < end_pos;){
|
||||
c = chunk[S.pos++];
|
||||
S.tb[S.tb_pos++] = c;
|
||||
|
||||
|
||||
int i = S.fsm.state + eq_classes[c];
|
||||
S.fsm.state = fsm_table[i];
|
||||
S.fsm.multi_line |= multiline_state_table[S.fsm.state];
|
||||
}
|
||||
S.fsm.emit_token = (S.fsm.state >= LS_count);
|
||||
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(3, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(3, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -413,13 +433,13 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
if (S.pp_state == LSPP_include){
|
||||
switch (S.fsm.state){
|
||||
case LSINC_default:break;
|
||||
|
||||
|
||||
case LSINC_quotes:
|
||||
case LSINC_pointy:
|
||||
S.token.type = CPP_TOKEN_INCLUDE_FILE;
|
||||
S.token.flags = 0;
|
||||
break;
|
||||
|
||||
|
||||
case LSINC_junk:
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
S.token.flags = 0;
|
||||
|
@ -433,22 +453,22 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
#define OperCase(op,t) case op: S.token.type = t; break;
|
||||
OperCase('{', CPP_TOKEN_BRACE_OPEN);
|
||||
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
|
||||
|
||||
|
||||
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
|
||||
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
|
||||
|
||||
|
||||
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
|
||||
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
|
||||
|
||||
|
||||
OperCase('~', CPP_TOKEN_TILDE);
|
||||
OperCase(',', CPP_TOKEN_COMMA);
|
||||
OperCase(';', CPP_TOKEN_SEMICOLON);
|
||||
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
|
||||
|
||||
|
||||
OperCase('@', CPP_TOKEN_JUNK);
|
||||
OperCase('$', CPP_TOKEN_JUNK);
|
||||
#undef OperCase
|
||||
|
||||
|
||||
case '\\':
|
||||
if (S.pp_state == LSPP_default){
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
|
@ -461,13 +481,14 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
c = chunk[S.pos++];
|
||||
if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1;
|
||||
}
|
||||
|
||||
|
||||
if (S.wfsm.white_done == 0){
|
||||
DrYield(1, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(1, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
|
||||
if (c == '\n'){
|
||||
S.fsm.emit_token = 0;
|
||||
S.pos_overide = 0;
|
||||
|
@ -485,46 +506,10 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
|
||||
case LS_identifier:
|
||||
{
|
||||
S.fsm.state = 0;
|
||||
S.fsm.emit_token = 0;
|
||||
S.fsm.sub_machine = 0;
|
||||
--S.pos;
|
||||
for (;;){
|
||||
// TODO(allen): Need to drop down to the instructions to optimize
|
||||
// this correctly I think. This looks like it will have more branches
|
||||
// than it needs unless I am very careful.
|
||||
for (; S.fsm.state < LSKEY_totally_finished && S.pos < end_pos;){
|
||||
// TODO(allen): Rebase these super tables so that we don't have
|
||||
// to do a subtract on the state.
|
||||
S.key_table = key_tables[S.fsm.sub_machine];
|
||||
S.key_eq_classes = key_eq_class_tables[S.fsm.sub_machine];
|
||||
for (; S.fsm.state < LSKEY_table_transition && S.pos < end_pos;){
|
||||
c = chunk[S.pos++];
|
||||
S.fsm.state = S.key_table[S.fsm.state + S.key_eq_classes[c]];
|
||||
}
|
||||
if (S.fsm.state >= LSKEY_table_transition && S.fsm.state < LSKEY_totally_finished){
|
||||
S.fsm.sub_machine = S.fsm.state - LSKEY_table_transition;
|
||||
S.fsm.state = 0;
|
||||
}
|
||||
}
|
||||
S.fsm.emit_token = (S.fsm.int_state >= LSKEY_totally_finished);
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(7, 1);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
--S.pos;
|
||||
|
||||
// TODO(allen): do stuff regarding the actual type of the token
|
||||
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
||||
S.token.flags = 0;
|
||||
|
||||
#if 0
|
||||
--S.pos;
|
||||
|
||||
int word_size = S.pos - S.token_start;
|
||||
|
||||
|
||||
if (S.pp_state == LSPP_body_if){
|
||||
if (match(make_string(S.tb, word_size), make_lit_string("defined"))){
|
||||
S.token.type = CPP_TOKEN_DEFINED;
|
||||
|
@ -532,17 +517,17 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Sub_Match_List_Result sub_match;
|
||||
sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size);
|
||||
|
||||
|
||||
if (sub_match.index != -1){
|
||||
S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT;
|
||||
S.token.flags = CPP_TFLAG_IS_KEYWORD;
|
||||
}
|
||||
else{
|
||||
sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size);
|
||||
|
||||
|
||||
if (sub_match.index != -1){
|
||||
String_And_Flag data = keywords.data[sub_match.index];
|
||||
S.token.type = (Cpp_Token_Type)data.flags;
|
||||
|
@ -553,10 +538,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.token.flags = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}break;
|
||||
|
||||
|
||||
case LS_pound:
|
||||
S.token.flags = 0;
|
||||
switch (c){
|
||||
|
@ -567,7 +550,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_pp:
|
||||
{
|
||||
S.fsm.directive_state = LSDIR_default;
|
||||
|
@ -578,9 +561,10 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.fsm.directive_state = pp_directive_table[S.fsm.directive_state + pp_directive_eq_classes[c]];
|
||||
}
|
||||
S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count);
|
||||
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(6, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(6, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
@ -590,13 +574,13 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.token.type = type;
|
||||
if (type == CPP_TOKEN_JUNK){
|
||||
S.token.flags = 0;
|
||||
}
|
||||
}
|
||||
else{
|
||||
S.token.flags = CPP_TFLAG_PP_DIRECTIVE;
|
||||
S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type);
|
||||
}
|
||||
}
|
||||
}break;
|
||||
|
||||
|
||||
case LS_number:
|
||||
case LS_number0:
|
||||
case LS_hex:
|
||||
|
@ -609,18 +593,19 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.fsm.int_state = int_fsm_table[S.fsm.int_state + int_fsm_eq_classes[c]];
|
||||
}
|
||||
S.fsm.emit_token = (S.fsm.int_state >= LSINT_count);
|
||||
|
||||
|
||||
if (S.fsm.emit_token == 0){
|
||||
DrYield(5, 1);
|
||||
S.chunk_pos += size;
|
||||
DrYield(5, LexNeedChunk);
|
||||
}
|
||||
else break;
|
||||
}
|
||||
--S.pos;
|
||||
|
||||
|
||||
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
|
||||
S.token.flags = 0;
|
||||
break;
|
||||
|
||||
|
||||
case LS_float:
|
||||
case LS_crazy_float0:
|
||||
case LS_crazy_float1:
|
||||
|
@ -634,27 +619,27 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_char:
|
||||
S.token.type = CPP_TOKEN_CHARACTER_CONSTANT;
|
||||
S.token.flags = 0;
|
||||
break;
|
||||
|
||||
|
||||
case LS_char_multiline:
|
||||
S.token.type = CPP_TOKEN_CHARACTER_CONSTANT;
|
||||
S.token.flags = CPP_TFLAG_MULTILINE;
|
||||
break;
|
||||
|
||||
|
||||
case LS_string:
|
||||
S.token.type = CPP_TOKEN_STRING_CONSTANT;
|
||||
S.token.flags = 0;
|
||||
break;
|
||||
|
||||
|
||||
case LS_string_multiline:
|
||||
S.token.type = CPP_TOKEN_STRING_CONSTANT;
|
||||
S.token.flags = CPP_TFLAG_MULTILINE;
|
||||
break;
|
||||
|
||||
|
||||
case LS_comment_pre:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -665,19 +650,19 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_comment: case LS_comment_block_ending:
|
||||
S.token.type = CPP_TOKEN_COMMENT;
|
||||
S.token.flags = 0;
|
||||
pos_update_rule = PUR_unget_whitespace;
|
||||
break;
|
||||
|
||||
|
||||
case LS_error_message:
|
||||
S.token.type = CPP_TOKEN_ERROR_MESSAGE;
|
||||
S.token.flags = 0;
|
||||
pos_update_rule = PUR_unget_whitespace;
|
||||
break;
|
||||
|
||||
|
||||
case LS_dot:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -688,21 +673,21 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_ellipsis:
|
||||
switch (c){
|
||||
case '.':
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
S.token.type = CPP_TOKEN_ELLIPSIS;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
pos_update_rule = PUR_back_one;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_less:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -713,7 +698,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_less_less:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -724,7 +709,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_more:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -735,7 +720,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_more_more:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -746,7 +731,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_minus:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -758,7 +743,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_arrow:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -769,7 +754,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_and:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -781,7 +766,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_or:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -793,7 +778,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_plus:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -805,7 +790,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_colon:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -816,7 +801,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_star:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -827,7 +812,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_modulo:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -838,7 +823,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_caret:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -849,7 +834,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_eq:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -860,7 +845,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LS_bang:
|
||||
S.token.flags = CPP_TFLAG_IS_OPERATOR;
|
||||
switch (c){
|
||||
|
@ -872,12 +857,12 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
switch (pos_update_rule){
|
||||
case PUR_back_one:
|
||||
--S.pos;
|
||||
break;
|
||||
|
||||
|
||||
case PUR_unget_whitespace:
|
||||
c = chunk[--S.pos];
|
||||
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
|
||||
|
@ -886,7 +871,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
++S.pos;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
|
||||
switch (S.pp_state){
|
||||
case LSPP_include:
|
||||
|
@ -895,7 +880,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
S.pp_state = LSPP_junk;
|
||||
break;
|
||||
|
||||
|
||||
case LSPP_macro_identifier:
|
||||
if (S.fsm.state != LS_identifier){
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
|
@ -905,14 +890,14 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.pp_state = LSPP_body;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LSPP_identifier:
|
||||
if (S.fsm.state != LS_identifier){
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
}
|
||||
S.pp_state = LSPP_junk;
|
||||
break;
|
||||
|
||||
|
||||
case LSPP_number:
|
||||
if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
|
@ -922,14 +907,14 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
S.pp_state = LSPP_include;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case LSPP_junk:
|
||||
S.token.type = CPP_TOKEN_JUNK;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (S.fsm.emit_token){
|
||||
S.token.start = S.token_start;
|
||||
if (S.pos_overide){
|
||||
|
@ -944,9 +929,9 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
S.token.state_flags = S.pp_state;
|
||||
|
||||
cpp_push_token_nonalloc(out_tokens, &token_i, S.token);
|
||||
token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token);
|
||||
if (token_i == max_token_i){
|
||||
DrYield(2, 2);
|
||||
DrYield(2, LexNeedTokenMemory);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -957,13 +942,199 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
|
|||
}
|
||||
}
|
||||
|
||||
DrReturn(0);
|
||||
DrReturn(LexFinished);
|
||||
}
|
||||
|
||||
#undef DrYield
|
||||
#undef DrReturn
|
||||
#undef DrCase
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
|
||||
Cpp_Token_Stack *token_stack_out, int max_tokens){
|
||||
Cpp_Token_Stack temp_stack = *token_stack_out;
|
||||
if (temp_stack.max_count > temp_stack.count + max_tokens){
|
||||
temp_stack.max_count = temp_stack.count + max_tokens;
|
||||
}
|
||||
|
||||
int result = cpp_lex_nonalloc(S_ptr, chunk, size, &temp_stack);
|
||||
|
||||
token_stack_out->count = temp_stack.count;
|
||||
|
||||
if (result == LexNeedTokenMemory){
|
||||
if (token_stack_out->count < token_stack_out->max_count){
|
||||
result = LexHitTokenLimit;
|
||||
}
|
||||
}
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
|
||||
Cpp_Token_Stack *token_stack_out){
|
||||
int result = 0;
|
||||
if (S_ptr->pos >= full_size){
|
||||
char end_null = 0;
|
||||
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
|
||||
}
|
||||
else{
|
||||
result = cpp_lex_nonalloc(S_ptr, chunk, size, token_stack_out);
|
||||
if (result == LexNeedChunk){
|
||||
if (S_ptr->pos >= full_size){
|
||||
char end_null = 0;
|
||||
result = cpp_lex_nonalloc(S_ptr, &end_null, 1, token_stack_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
return(result);
|
||||
}
|
||||
|
||||
lexer_link int
|
||||
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
|
||||
Cpp_Token_Stack *token_stack_out, int max_tokens){
|
||||
Cpp_Token_Stack temp_stack = *token_stack_out;
|
||||
if (temp_stack.max_count > temp_stack.count + max_tokens){
|
||||
temp_stack.max_count = temp_stack.count + max_tokens;
|
||||
}
|
||||
|
||||
int result = cpp_lex_size_nonalloc(S_ptr, chunk, size, full_size,
|
||||
&temp_stack);
|
||||
|
||||
token_stack_out->count = temp_stack.count;
|
||||
|
||||
if (result == LexNeedTokenMemory){
|
||||
if (token_stack_out->count < token_stack_out->max_count){
|
||||
result = LexHitTokenLimit;
|
||||
}
|
||||
}
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
#if 0
|
||||
lexer_link Cpp_Relex_State
|
||||
cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
|
||||
int start, int end, int amount, int tolerance){
|
||||
Cpp_Relex_State state;
|
||||
state.file = file;
|
||||
state.stack = stack;
|
||||
state.start = start;
|
||||
state.end = end;
|
||||
state.amount = amount;
|
||||
state.tolerance = tolerance;
|
||||
|
||||
Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start);
|
||||
if (result.token_index <= 0){
|
||||
state.start_token_i = 0;
|
||||
}
|
||||
else{
|
||||
state.start_token_i = result.token_index-1;
|
||||
}
|
||||
|
||||
result = new_lex::cpp_get_token(stack, end);
|
||||
if (result.token_index < 0) result.token_index = 0;
|
||||
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
|
||||
state.end_token_i = result.token_index;
|
||||
|
||||
state.relex_start = stack->tokens[state.start_token_i].start;
|
||||
if (start < state.relex_start) state.relex_start = start;
|
||||
|
||||
state.space_request = state.end_token_i - state.start_token_i + tolerance + 1;
|
||||
|
||||
return(state);
|
||||
}
|
||||
|
||||
// TODO(allen): Eliminate this once we actually store the EOF token
|
||||
// in the token stack.
|
||||
inline Cpp_Token
|
||||
cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){
|
||||
Cpp_Token result;
|
||||
if (index < stack->count){
|
||||
result = tokens[index];
|
||||
}
|
||||
else{
|
||||
result.start = size;
|
||||
result.size = 0;
|
||||
result.type = CPP_TOKEN_EOF;
|
||||
result.flags = 0;
|
||||
result.state_flags = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
FCPP_LINK bool
|
||||
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){
|
||||
Cpp_Token_Stack *stack = state->stack;
|
||||
Cpp_Token *tokens = stack->tokens;
|
||||
|
||||
new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount);
|
||||
|
||||
Lex_Data lex = {};
|
||||
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
|
||||
lex.pos = state->relex_start;
|
||||
|
||||
int relex_end_i = state->end_token_i;
|
||||
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
|
||||
Cpp_Token end_token = match_token;
|
||||
bool went_too_far = 0;
|
||||
|
||||
for (;;){
|
||||
Cpp_Read_Result read = cpp_lex_step(state->file, &lex);
|
||||
if (read.has_result){
|
||||
if (read.token.start == end_token.start &&
|
||||
read.token.size == end_token.size &&
|
||||
read.token.flags == end_token.flags &&
|
||||
read.token.state_flags == end_token.state_flags){
|
||||
break;
|
||||
}
|
||||
cpp_push_token_nonalloc(relex_stack, read.token);
|
||||
|
||||
while (lex.pos > end_token.start && relex_end_i < stack->count){
|
||||
++relex_end_i;
|
||||
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
|
||||
}
|
||||
if (relex_stack->count == relex_stack->max_count){
|
||||
went_too_far = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lex.pos >= state->file.size) break;
|
||||
}
|
||||
|
||||
if (!went_too_far){
|
||||
if (relex_stack->count > 0){
|
||||
if (state->start_token_i > 0){
|
||||
Cpp_Token_Merge merge =
|
||||
cpp_attempt_token_merge(tokens[state->start_token_i - 1],
|
||||
relex_stack->tokens[0]);
|
||||
if (merge.did_merge){
|
||||
--state->start_token_i;
|
||||
relex_stack->tokens[0] = merge.new_token;
|
||||
}
|
||||
}
|
||||
|
||||
if (relex_end_i < state->stack->count){
|
||||
Cpp_Token_Merge merge =
|
||||
cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1],
|
||||
tokens[relex_end_i]);
|
||||
if (merge.did_merge){
|
||||
++relex_end_i;
|
||||
relex_stack->tokens[relex_stack->count-1] = merge.new_token;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*relex_end = relex_end_i;
|
||||
}
|
||||
else{
|
||||
cpp_shift_token_starts(stack, state->end_token_i, -state->amount);
|
||||
}
|
||||
|
||||
return went_too_far;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// BOTTOM
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// TODO(allen): In what corner cases, such as invalid files
|
||||
// does the new lexer suffer???
|
||||
|
||||
// TOP
|
||||
|
||||
#include "../4ed_meta.h"
|
||||
|
@ -204,104 +207,166 @@ end_t(Times *t){
|
|||
}
|
||||
|
||||
static void
|
||||
run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
||||
run_experiment(Experiment *exp, char *filename, int verbose,
|
||||
int chunks, int max_tokens){
|
||||
String extension = {};
|
||||
Data file_data;
|
||||
Cpp_File file_cpp;
|
||||
new_lex::Lex_Data ld = {0};
|
||||
int pass;
|
||||
int k, chunk_size, is_last;
|
||||
|
||||
|
||||
extension = file_extension(make_string_slowly(filename));
|
||||
|
||||
|
||||
if (match(extension, "cpp") || match(extension, "h")){
|
||||
file_data = dump_file(filename);
|
||||
if (file_data.size < (100 << 10)){
|
||||
pass = 1;
|
||||
if (verbose >= 0) printf("testing on file: %s\n", filename);
|
||||
exp->test_total++;
|
||||
|
||||
|
||||
exp->correct_stack.count = 0;
|
||||
exp->testing_stack.count = 0;
|
||||
|
||||
memset(exp->correct_stack.tokens, TOKEN_ARRAY_SIZE, 0);
|
||||
memset(exp->testing_stack.tokens, TOKEN_ARRAY_SIZE, 0);
|
||||
|
||||
|
||||
memset(exp->correct_stack.tokens, 0, TOKEN_ARRAY_SIZE);
|
||||
memset(exp->testing_stack.tokens, 0, TOKEN_ARRAY_SIZE);
|
||||
|
||||
file_cpp.data = (char*)file_data.data;
|
||||
file_cpp.size = file_data.size;
|
||||
|
||||
|
||||
ld.tb = (char*)malloc(file_data.size + 1);
|
||||
|
||||
|
||||
{
|
||||
i64 start;
|
||||
|
||||
|
||||
start = __rdtsc();
|
||||
cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data);
|
||||
time.handcoded += (__rdtsc() - start);
|
||||
|
||||
start = __rdtsc();
|
||||
if (chunks){
|
||||
int relevant_size = file_data.size + 1;
|
||||
is_last = 0;
|
||||
for (k = 0; k < relevant_size; k += chunks){
|
||||
chunk_size = chunks;
|
||||
if (chunk_size + k >= relevant_size){
|
||||
chunk_size = relevant_size - k;
|
||||
is_last = 1;
|
||||
|
||||
if (max_tokens == 0){
|
||||
if (chunks){
|
||||
start = __rdtsc();
|
||||
int relevant_size = file_data.size + 1;
|
||||
is_last = 0;
|
||||
for (k = 0; k < relevant_size; k += chunks){
|
||||
chunk_size = chunks;
|
||||
if (chunk_size + k >= relevant_size){
|
||||
chunk_size = relevant_size - k;
|
||||
is_last = 1;
|
||||
}
|
||||
|
||||
int result =
|
||||
new_lex::cpp_lex_nonalloc(&ld,
|
||||
(char*)file_data.data + k, chunk_size,
|
||||
&exp->testing_stack);
|
||||
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory) break;
|
||||
}
|
||||
|
||||
int result = new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data + k, chunk_size, &exp->testing_stack);
|
||||
if (result == 0 || result == 2) break;
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
else{
|
||||
start = __rdtsc();
|
||||
new_lex::cpp_lex_nonalloc(&ld,
|
||||
(char*)file_data.data, file_data.size,
|
||||
&exp->testing_stack);
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
}
|
||||
else{
|
||||
new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data, file_data.size, &exp->testing_stack);
|
||||
if (chunks){
|
||||
start = __rdtsc();
|
||||
int relevant_size = file_data.size + 1;
|
||||
is_last = 0;
|
||||
for (k = 0; k < relevant_size; k += chunks){
|
||||
chunk_size = chunks;
|
||||
if (chunk_size + k >= relevant_size){
|
||||
chunk_size = relevant_size - k;
|
||||
is_last = 1;
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
int still_lexing = 1;
|
||||
do{
|
||||
result =
|
||||
new_lex::cpp_lex_size_nonalloc(&ld,
|
||||
(char*)file_data.data + k, chunk_size, file_data.size,
|
||||
&exp->testing_stack,
|
||||
max_tokens);
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory ||
|
||||
result == new_lex::LexNeedChunk){
|
||||
still_lexing = 0;
|
||||
}
|
||||
} while(still_lexing);
|
||||
|
||||
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory) break;
|
||||
}
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
else{
|
||||
start = __rdtsc();
|
||||
int still_lexing = 1;
|
||||
do{
|
||||
int result =
|
||||
new_lex::cpp_lex_size_nonalloc(&ld,
|
||||
(char*)file_data.data, file_data.size, file_data.size,
|
||||
&exp->testing_stack,
|
||||
max_tokens);
|
||||
if (result == new_lex::LexFinished ||
|
||||
result == new_lex::LexNeedTokenMemory){
|
||||
still_lexing = 0;
|
||||
}
|
||||
} while(still_lexing);
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
}
|
||||
time.fsm += (__rdtsc() - start);
|
||||
}
|
||||
|
||||
|
||||
free(ld.tb);
|
||||
|
||||
|
||||
if (exp->correct_stack.count != exp->testing_stack.count){
|
||||
pass = 0;
|
||||
if (verbose >= 0){
|
||||
printf("error: stack size mismatch %d original and %d testing\n",
|
||||
exp->correct_stack.count, exp->testing_stack.count);
|
||||
exp->correct_stack.count, exp->testing_stack.count);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int min_count = exp->correct_stack.count;
|
||||
if (min_count > exp->testing_stack.count) min_count = exp->testing_stack.count;
|
||||
|
||||
|
||||
for (int j = 0; j < min_count; ++j){
|
||||
Cpp_Token *correct, *testing;
|
||||
correct = exp->correct_stack.tokens + j;
|
||||
testing = exp->testing_stack.tokens + j;
|
||||
|
||||
|
||||
if (correct->type != testing->type){
|
||||
pass = 0;
|
||||
if (verbose >= 1) printf("type mismatch at token %d\n", j);
|
||||
}
|
||||
|
||||
|
||||
if (correct->start != testing->start || correct->size != testing->size){
|
||||
pass = 0;
|
||||
if (verbose >= 1){
|
||||
printf("token range mismatch at token %d\n"
|
||||
" %d:%d original %d:%d testing\n"
|
||||
" %.*s original %.*s testing\n",
|
||||
j,
|
||||
correct->start, correct->size, testing->start, testing->size,
|
||||
correct->size, file_cpp.data + correct->start,
|
||||
testing->size, file_cpp.data + testing->start);
|
||||
" %d:%d original %d:%d testing\n"
|
||||
" %.*s original %.*s testing\n",
|
||||
j,
|
||||
correct->start, correct->size, testing->start, testing->size,
|
||||
correct->size, file_cpp.data + correct->start,
|
||||
testing->size, file_cpp.data + testing->start);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (correct->flags != testing->flags){
|
||||
pass = 0;
|
||||
if (verbose >= 1) printf("token flag mismatch at token %d\n", j);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (pass){
|
||||
exp->passed_total++;
|
||||
if (verbose >= 0) printf("test passed!\n\n");
|
||||
|
@ -310,7 +375,7 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
|
|||
if (verbose >= 0) printf("test failed, you failed, fix it now!\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
free(file_data.data);
|
||||
}
|
||||
}
|
||||
|
@ -338,12 +403,13 @@ show_time(Times t, int repeats, char *type){
|
|||
|
||||
int main(){
|
||||
int repeats = 1;
|
||||
int verbose_level = 1;
|
||||
int chunk_start = 0;
|
||||
int chunk_end = 0;
|
||||
int verbose_level = 0;
|
||||
int chunk_start = 32;
|
||||
int chunk_end = 64;
|
||||
#define TEST_FILE "parser_test1.cpp"
|
||||
#define SINGLE_ITEM 1
|
||||
|
||||
#define SINGLE_ITEM 0
|
||||
int token_limit = 2;
|
||||
|
||||
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
|
||||
int c = 0;
|
||||
|
||||
|
@ -371,14 +437,14 @@ int main(){
|
|||
begin_t(&chunk_exp_t);
|
||||
printf("With chunks of %d\n", chunks);
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c);
|
||||
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c, token_limit);
|
||||
}
|
||||
end_t(&chunk_exp_t);
|
||||
}
|
||||
|
||||
begin_t(&exp_t);
|
||||
printf("Unchunked\n");
|
||||
run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0);
|
||||
run_experiment(&exp, BASE_DIR TEST_FILE, 1, 0, token_limit);
|
||||
end_t(&exp_t);
|
||||
|
||||
#else
|
||||
|
@ -391,19 +457,19 @@ int main(){
|
|||
if (chunks){
|
||||
begin_t(&chunk_exp_t);
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c);
|
||||
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c, token_limit);
|
||||
}
|
||||
end_t(&chunk_exp_t);
|
||||
}
|
||||
|
||||
|
||||
begin_t(&exp_t);
|
||||
if (verbose_level == -1 && chunks){
|
||||
for (c = chunk_start; c <= chunk_end; ++c){
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
|
||||
}
|
||||
}
|
||||
else{
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
|
||||
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0, token_limit);
|
||||
}
|
||||
end_t(&exp_t);
|
||||
}
|
||||
|
|
|
@ -576,97 +576,6 @@ process_match_node(String_And_Flag *input, Match_Node *node, Match_Tree *tree, F
|
|||
}
|
||||
}
|
||||
|
||||
FSM_Stack
|
||||
generate_keyword_fsms(){
|
||||
Terminal_Lookup_Table terminal_table;
|
||||
Cpp_Token_Type type;
|
||||
|
||||
Future_FSM_Stack unfinished_futures;
|
||||
Match_Tree_Stack tree_stack;
|
||||
FSM_Stack fsm_stack;
|
||||
Match_Tree *tree;
|
||||
FSM *fsm;
|
||||
Future_FSM *future;
|
||||
Match_Node *root_node;
|
||||
FSM_State *root_state;
|
||||
int i, j;
|
||||
|
||||
memset(terminal_table.type_to_state, 0, sizeof(terminal_table.type_to_state));
|
||||
memset(terminal_table.state_to_type, 0, sizeof(terminal_table.state_to_type));
|
||||
|
||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||
type = (Cpp_Token_Type)keyword_strings[i].flags;
|
||||
if (terminal_table.type_to_state[type] == 0){
|
||||
terminal_table.type_to_state[type] = terminal_table.state_count;
|
||||
terminal_table.state_to_type[terminal_table.state_count] = type;
|
||||
++terminal_table.state_count;
|
||||
}
|
||||
}
|
||||
|
||||
fsm_stack.max = 255;
|
||||
fsm_stack.count = 0;
|
||||
fsm_stack.fsms = (FSM*)malloc(sizeof(FSM)*fsm_stack.max);
|
||||
fsm_stack.table_transition_state = 26;
|
||||
|
||||
tree_stack.max = 255;
|
||||
tree_stack.count = 0;
|
||||
tree_stack.trees = (Match_Tree*)malloc(sizeof(Match_Tree)*tree_stack.max);
|
||||
|
||||
unfinished_futures.max = 255;
|
||||
unfinished_futures.count = 0;
|
||||
unfinished_futures.futures = (Future_FSM*)malloc(sizeof(Future_FSM)*unfinished_futures.max);
|
||||
|
||||
fsm = get_fsm(&fsm_stack);
|
||||
tree = get_tree(&tree_stack);
|
||||
|
||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||
*tree = tree_init(200);
|
||||
|
||||
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||
root_node = match_get_node(tree);
|
||||
match_init_node(root_node, ArrayCount(keyword_strings));
|
||||
for (i = 0; i < ArrayCount(keyword_strings); ++i){
|
||||
root_node->words[i] = i;
|
||||
}
|
||||
|
||||
root_node->count = ArrayCount(keyword_strings);
|
||||
root_node->state = root_state;
|
||||
root_node->index = -1;
|
||||
|
||||
push_future_fsm(&unfinished_futures, root_node);
|
||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 2, &unfinished_futures);
|
||||
|
||||
for (i = 1; i < unfinished_futures.count; ++i){
|
||||
future = unfinished_futures.futures + i;
|
||||
|
||||
fsm = get_fsm(&fsm_stack);
|
||||
tree = get_tree(&tree_stack);
|
||||
|
||||
assert((int)(fsm - fsm_stack.fsms) == i);
|
||||
|
||||
*fsm = fsm_init(200, fsm_stack.table_transition_state);
|
||||
*tree = tree_init(200);
|
||||
|
||||
root_state = fsm_get_state(fsm, RealTerminateBase);
|
||||
root_node = match_get_node(tree);
|
||||
match_copy_init_node(root_node, future->source);
|
||||
root_node->state = root_state;
|
||||
|
||||
for (j = 0; j < root_node->count; ++j){
|
||||
char space[1024];
|
||||
sprintf(space, "%s\n", keyword_strings[root_node->words[j]].str);
|
||||
fsm_add_comment(fsm, space);
|
||||
}
|
||||
|
||||
process_match_node(keyword_strings, root_node, tree, fsm, &terminal_table, 12, &unfinished_futures);
|
||||
}
|
||||
|
||||
assert(fsm_stack.count < 255);
|
||||
fsm_stack.final_state = fsm_stack.table_transition_state + (unsigned char)fsm_stack.count;
|
||||
|
||||
return(fsm_stack);
|
||||
}
|
||||
|
||||
Whitespace_FSM
|
||||
whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
|
||||
if (wfsm.pp_state != LSPP_default){
|
||||
|
@ -781,7 +690,6 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
|||
case LS_default:
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'){
|
||||
fsm.state = LS_identifier;
|
||||
fsm.emit_token = 1;
|
||||
}
|
||||
else if (c >= '1' && c <= '9'){
|
||||
fsm.state = LS_number;
|
||||
|
@ -849,13 +757,11 @@ main_fsm(Lex_FSM fsm, unsigned char pp_state, unsigned char c){
|
|||
}
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case LS_identifier:
|
||||
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')){
|
||||
fsm.emit_token = 1;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
||||
case LS_pound:
|
||||
switch (c){
|
||||
|
@ -1405,39 +1311,6 @@ main(){
|
|||
render_variable(file, "unsigned char", "LSDIR_count", pp_directive_fsm.count);
|
||||
render_variable(file, "unsigned char", "pp_directive_terminal_base", pp_directive_fsm.terminal_base);
|
||||
|
||||
FSM_Stack keyword_fsms = generate_keyword_fsms();
|
||||
|
||||
char name[1024];
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
FSM_Tables partial_keywords_table =
|
||||
generate_table_from_abstract_fsm(keyword_fsms.fsms[i], keyword_fsms.final_state);
|
||||
if (keyword_fsms.fsms[i].comment){
|
||||
render_comment(file, keyword_fsms.fsms[i].comment);
|
||||
}
|
||||
|
||||
sprintf(name, "keyword_part_%d_table", i);
|
||||
render_fsm_table(file, partial_keywords_table, name);
|
||||
}
|
||||
|
||||
begin_ptr_table(file, "short", "key_eq_class_tables");
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
sprintf(name, "keyword_part_%d_table_eq_classes", i);
|
||||
do_table_item_direct(file, name, "");
|
||||
end_row(file);
|
||||
}
|
||||
end_table(file);
|
||||
|
||||
begin_ptr_table(file, "char", "key_tables");
|
||||
for (int i = 0; i < keyword_fsms.count; ++i){
|
||||
sprintf(name, "keyword_part_%d_table_table", i);
|
||||
do_table_item_direct(file, name, "");
|
||||
end_row(file);
|
||||
}
|
||||
end_table(file);
|
||||
|
||||
fprintf(file, "#define LSKEY_table_transition %d\n", (int)(keyword_fsms.table_transition_state));
|
||||
fprintf(file, "#define LSKEY_totally_finished %d\n", (int)(keyword_fsms.final_state));
|
||||
|
||||
fclose(file);
|
||||
return(0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue