From c9e40e492ce116e2984a96e5432b19dc59f6a348 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Sat, 4 Jun 2016 02:35:57 -0400 Subject: [PATCH] new lexer is sort of in there ... relexing still needs work --- 4coder_default_include.cpp | 2 + 4cpp_lexer.h | 44 ++++++++------- 4ed_app_target.cpp | 4 +- 4ed_file_view.cpp | 29 +++++++--- test/4cpp_lexer_fsms.h | 5 ++ test/4cpp_new_lexer.h | 112 ++++++++++++++++++++++++++++--------- 6 files changed, 141 insertions(+), 55 deletions(-) diff --git a/4coder_default_include.cpp b/4coder_default_include.cpp index a5f49440..71524163 100644 --- a/4coder_default_include.cpp +++ b/4coder_default_include.cpp @@ -84,6 +84,8 @@ CUSTOM_COMMAND_SIG(set_mark){ View_Summary view = app->get_active_view(app); app->view_set_mark(app, &view, seek_pos(view.cursor.pos)); + // TODO(allen): Just expose the preferred_x seperately + app->view_set_cursor(app, &view, seek_pos(view.cursor.pos), 1); } CUSTOM_COMMAND_SIG(delete_range){ diff --git a/4cpp_lexer.h b/4cpp_lexer.h index 3f86a2ff..da136e44 100644 --- a/4cpp_lexer.h +++ b/4cpp_lexer.h @@ -1404,7 +1404,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, state.end = end; state.amount = amount; state.tolerance = tolerance; - + Cpp_Get_Token_Result result = cpp_get_token(stack, start); if (result.token_index <= 0){ state.start_token_i = 0; @@ -1412,18 +1412,24 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, else{ state.start_token_i = result.token_index-1; } - + result = cpp_get_token(stack, end); - if (result.token_index < 0) result.token_index = 0; - else if (end > stack->tokens[result.token_index].start) ++result.token_index; + if (result.token_index < 0){ + result.token_index = 0; + } + else if (end > stack->tokens[result.token_index].start){ + ++result.token_index; + } state.end_token_i = result.token_index; - + state.relex_start = stack->tokens[state.start_token_i].start; - if (start < state.relex_start) state.relex_start = start; - + if (start < state.relex_start){ + state.relex_start = start; + } + state.space_request = state.end_token_i - state.start_token_i + tolerance + 1; - - return state; + + return(state); } inline Cpp_Token @@ -1446,18 +1452,18 @@ FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ Cpp_Token_Stack *stack = state->stack; Cpp_Token *tokens = stack->tokens; - + cpp_shift_token_starts(stack, state->end_token_i, state->amount); - + Cpp_Lex_Data lex = {}; lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); lex.pos = state->relex_start; - + int relex_end_i = state->end_token_i; Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); Cpp_Token end_token = match_token; bool went_too_far = 0; - + for (;;){ Cpp_Read_Result read = cpp_lex_step(state->file, &lex); if (read.has_result){ @@ -1468,7 +1474,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in break; } cpp_push_token_nonalloc(relex_stack, read.token); - + while (lex.pos > end_token.start && relex_end_i < stack->count){ ++relex_end_i; end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); @@ -1480,7 +1486,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in } if (lex.pos >= state->file.size) break; } - + if (!went_too_far){ if (relex_stack->count > 0){ if (state->start_token_i > 0){ @@ -1492,7 +1498,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in relex_stack->tokens[0] = merge.new_token; } } - + if (relex_end_i < state->stack->count){ Cpp_Token_Merge merge = cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1], @@ -1503,14 +1509,14 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in } } } - + *relex_end = relex_end_i; } else{ cpp_shift_token_starts(stack, state->end_token_i, -state->amount); } - - return went_too_far; + + return(went_too_far); } #ifndef FCPP_FORBID_MALLOC diff --git a/4ed_app_target.cpp b/4ed_app_target.cpp index d3f4b609..45738325 100644 --- a/4ed_app_target.cpp +++ b/4ed_app_target.cpp @@ -29,8 +29,8 @@ #include "4tech_table.cpp" #define FCPP_LEXER_IMPLEMENTATION -//#include "test/4cpp_new_lexer.h" -#include "4cpp_lexer.h" +#include "test/4cpp_new_lexer.h" +//#include "4cpp_lexer.h" #include "4ed_template.cpp" diff --git a/4ed_file_view.cpp b/4ed_file_view.cpp index 5571cf0f..98b44605 100644 --- a/4ed_file_view.cpp +++ b/4ed_file_view.cpp @@ -804,20 +804,31 @@ Job_Callback_Sig(job_full_lex){ cpp_file.data = file->state.buffer.data; cpp_file.size = file->state.buffer.size; + i32 buffer_size = file->state.buffer.size; + buffer_size = (buffer_size + 3)&(~3); + + while (memory->size < buffer_size*2){ + system->grow_thread_memory(memory); + } + + char *tb = (char*)memory->data; + Cpp_Token_Stack tokens; - tokens.tokens = (Cpp_Token*)memory->data; - tokens.max_count = memory->size / sizeof(Cpp_Token); + tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size); + tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token); tokens.count = 0; -#if 0 +#if 1 b32 still_lexing = 1; - Lex_Data lex = {0}; + Lex_Data lex = lex_data_init(tb); do{ i32 result = - cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048); + cpp_lex_size_nonalloc(&lex, + cpp_file.data, cpp_file.size, cpp_file.size, + &tokens, 2048); switch (result){ case LexNeedChunk: Assert(!"Invalid Path"); break; @@ -843,7 +854,7 @@ Job_Callback_Sig(job_full_lex){ #else - Cpp_Lex_Data status = {}; + Cpp_Lex_Data status = {0}; do{ for (i32 r = 2048; r > 0 && status.pos < cpp_file.size; --r){ @@ -978,7 +989,10 @@ file_relex_parallel(System_Functions *system, relex_space.count = 0; relex_space.max_count = state.space_request; relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count); - if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end)){ + + char *spare = push_array(part, char, cpp_file.size); + + if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){ inline_lex = 0; } else{ @@ -3088,6 +3102,7 @@ view_show_file(View *view){ } view->showing_ui = VUI_None; view->current_scroll = &view->recent->scroll; + view->recent->scroll.max_y = view_compute_max_target_y(view); } internal void diff --git a/test/4cpp_lexer_fsms.h b/test/4cpp_lexer_fsms.h index 3efa622a..c6f3270e 100644 --- a/test/4cpp_lexer_fsms.h +++ b/test/4cpp_lexer_fsms.h @@ -104,6 +104,11 @@ struct Lex_FSM{ unsigned char emit_token; unsigned char multi_line; }; +inline Lex_FSM +zero_lex_fsm(){ + Lex_FSM fsm = {0}; + return(fsm); +} // BOTTOM diff --git a/test/4cpp_new_lexer.h b/test/4cpp_new_lexer.h index 67861d54..34b95753 100644 --- a/test/4cpp_new_lexer.h +++ b/test/4cpp_new_lexer.h @@ -4,7 +4,7 @@ #ifndef FCPP_NEW_LEXER_INC #define FCPP_NEW_LEXER_INC -#include "..\4cpp_lexer_types.h" +#include "../4cpp_lexer_types.h" #include "4cpp_lexer_fsms.h" #include "4cpp_lexer_tables.c" @@ -294,7 +294,7 @@ cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){ if (token_i > 0){ prev_token = out_tokens[token_i - 1]; - merge = new_lex::cpp_attempt_token_merge(prev_token, token); + merge = cpp_attempt_token_merge(prev_token, token); if (merge.did_merge){ out_tokens[token_i - 1] = merge.new_token; } @@ -336,6 +336,13 @@ struct Lex_Data{ int __pc__; }; +inline Lex_Data +lex_data_init(char *tb){ + Lex_Data data = {0}; + data.tb = tb; + return(data); +} + #define DrCase(PC) case PC: goto resumespot_##PC @@ -355,7 +362,9 @@ enum Lex_Result{ }; lexer_link int -cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){ +cpp_lex_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, + Cpp_Token_Stack *token_stack_out){ Lex_Data S = *S_ptr; Cpp_Token *out_tokens = token_stack_out->tokens; @@ -403,20 +412,22 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_ S.token_start = S.pos; S.tb_pos = 0; - S.fsm = {0}; + S.fsm = zero_lex_fsm(); for(;;){ - unsigned short *eq_classes = get_eq_classes[S.pp_state]; - unsigned char *fsm_table = get_table[S.pp_state]; - - for (; S.fsm.state < LS_count && S.pos < end_pos;){ - c = chunk[S.pos++]; - S.tb[S.tb_pos++] = c; + { + unsigned short *eq_classes = get_eq_classes[S.pp_state]; + unsigned char *fsm_table = get_table[S.pp_state]; - int i = S.fsm.state + eq_classes[c]; - S.fsm.state = fsm_table[i]; - S.fsm.multi_line |= multiline_state_table[S.fsm.state]; + for (; S.fsm.state < LS_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.tb[S.tb_pos++] = c; + + int i = S.fsm.state + eq_classes[c]; + S.fsm.state = fsm_table[i]; + S.fsm.multi_line |= multiline_state_table[S.fsm.state]; + } + S.fsm.emit_token = (S.fsm.state >= LS_count); } - S.fsm.emit_token = (S.fsm.state >= LS_count); if (S.fsm.emit_token == 0){ S.chunk_pos += size; @@ -950,7 +961,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_ #undef DrCase lexer_link int -cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, +cpp_lex_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, Cpp_Token_Stack *token_stack_out, int max_tokens){ Cpp_Token_Stack temp_stack = *token_stack_out; if (temp_stack.max_count > temp_stack.count + max_tokens){ @@ -971,7 +983,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, } lexer_link int -cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, +cpp_lex_size_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, int full_size, Cpp_Token_Stack *token_stack_out){ int result = 0; if (S_ptr->pos >= full_size){ @@ -991,7 +1004,8 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, } lexer_link int -cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, +cpp_lex_size_nonalloc(Lex_Data *S_ptr, + char *chunk, int size, int full_size, Cpp_Token_Stack *token_stack_out, int max_tokens){ Cpp_Token_Stack temp_stack = *token_stack_out; if (temp_stack.max_count > temp_stack.count + max_tokens){ @@ -1012,7 +1026,6 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, return(result); } -#if 0 lexer_link Cpp_Relex_State cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance){ @@ -1024,7 +1037,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, state.amount = amount; state.tolerance = tolerance; - Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start); + Cpp_Get_Token_Result result = cpp_get_token(stack, start); if (result.token_index <= 0){ state.start_token_i = 0; } @@ -1032,7 +1045,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, state.start_token_i = result.token_index-1; } - result = new_lex::cpp_get_token(stack, end); + result = cpp_get_token(stack, end); if (result.token_index < 0) result.token_index = 0; else if (end > stack->tokens[result.token_index].start) ++result.token_index; state.end_token_i = result.token_index; @@ -1045,6 +1058,11 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, return(state); } +inline Cpp_Preprocessor_State +cpp_token_get_pp_state(fcpp_u16 bitfield){ + return (Cpp_Preprocessor_State)(bitfield); +} + // TODO(allen): Eliminate this once we actually store the EOF token // in the token stack. inline Cpp_Token @@ -1064,23 +1082,62 @@ cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){ } FCPP_LINK bool -cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ +cpp_relex_nonalloc_main(Cpp_Relex_State *state, + Cpp_Token_Stack *relex_stack, + int *relex_end, + char *spare){ Cpp_Token_Stack *stack = state->stack; Cpp_Token *tokens = stack->tokens; - new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount); + cpp_shift_token_starts(stack, state->end_token_i, state->amount); - Lex_Data lex = {}; + Lex_Data lex = lex_data_init(spare); lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); lex.pos = state->relex_start; int relex_end_i = state->end_token_i; Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); Cpp_Token end_token = match_token; - bool went_too_far = 0; + int went_too_far = false; + // TODO(allen): This can be better now I suspect. for (;;){ - Cpp_Read_Result read = cpp_lex_step(state->file, &lex); + int result = + cpp_lex_nonalloc(&lex, + state->file.data, state->file.size, + stack, 1); + + switch (result){ + case LexHitTokenLimit: + { + Cpp_Token token = + stack->tokens[stack->count-1]; + if (token.start == end_token.start && + token.size == end_token.size && + token.flags == end_token.flags && + token.state_flags == end_token.state_flags){ + --stack->count; + goto double_break; + } + + while (lex.pos > end_token.start && relex_end_i < stack->count){ + ++relex_end_i; + end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); + } + } + break; + + case LexNeedChunk: Assert(!"Invalid path"); break; + + case LexNeedTokenMemory: + went_too_far = true; + goto double_break; + + case LexFinished: + goto double_break; + } + +#if 0 if (read.has_result){ if (read.token.start == end_token.start && read.token.size == end_token.size && @@ -1095,12 +1152,14 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); } if (relex_stack->count == relex_stack->max_count){ - went_too_far = 1; + went_too_far = true; break; } } if (lex.pos >= state->file.size) break; +#endif } + double_break:; if (!went_too_far){ if (relex_stack->count > 0){ @@ -1133,7 +1192,6 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in return went_too_far; } -#endif #endif