new lexer is sort of in there ... relexing still needs work

master
Allen Webster 2016-06-04 02:35:57 -04:00
parent c38c0f5082
commit c9e40e492c
6 changed files with 141 additions and 55 deletions

View File

@ -84,6 +84,8 @@ CUSTOM_COMMAND_SIG(set_mark){
View_Summary view = app->get_active_view(app); View_Summary view = app->get_active_view(app);
app->view_set_mark(app, &view, seek_pos(view.cursor.pos)); app->view_set_mark(app, &view, seek_pos(view.cursor.pos));
// TODO(allen): Just expose the preferred_x seperately
app->view_set_cursor(app, &view, seek_pos(view.cursor.pos), 1);
} }
CUSTOM_COMMAND_SIG(delete_range){ CUSTOM_COMMAND_SIG(delete_range){

View File

@ -1414,16 +1414,22 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
} }
result = cpp_get_token(stack, end); result = cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0; if (result.token_index < 0){
else if (end > stack->tokens[result.token_index].start) ++result.token_index; result.token_index = 0;
}
else if (end > stack->tokens[result.token_index].start){
++result.token_index;
}
state.end_token_i = result.token_index; state.end_token_i = result.token_index;
state.relex_start = stack->tokens[state.start_token_i].start; state.relex_start = stack->tokens[state.start_token_i].start;
if (start < state.relex_start) state.relex_start = start; if (start < state.relex_start){
state.relex_start = start;
}
state.space_request = state.end_token_i - state.start_token_i + tolerance + 1; state.space_request = state.end_token_i - state.start_token_i + tolerance + 1;
return state; return(state);
} }
inline Cpp_Token inline Cpp_Token
@ -1510,7 +1516,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
cpp_shift_token_starts(stack, state->end_token_i, -state->amount); cpp_shift_token_starts(stack, state->end_token_i, -state->amount);
} }
return went_too_far; return(went_too_far);
} }
#ifndef FCPP_FORBID_MALLOC #ifndef FCPP_FORBID_MALLOC

View File

@ -29,8 +29,8 @@
#include "4tech_table.cpp" #include "4tech_table.cpp"
#define FCPP_LEXER_IMPLEMENTATION #define FCPP_LEXER_IMPLEMENTATION
//#include "test/4cpp_new_lexer.h" #include "test/4cpp_new_lexer.h"
#include "4cpp_lexer.h" //#include "4cpp_lexer.h"
#include "4ed_template.cpp" #include "4ed_template.cpp"

View File

@ -804,20 +804,31 @@ Job_Callback_Sig(job_full_lex){
cpp_file.data = file->state.buffer.data; cpp_file.data = file->state.buffer.data;
cpp_file.size = file->state.buffer.size; cpp_file.size = file->state.buffer.size;
i32 buffer_size = file->state.buffer.size;
buffer_size = (buffer_size + 3)&(~3);
while (memory->size < buffer_size*2){
system->grow_thread_memory(memory);
}
char *tb = (char*)memory->data;
Cpp_Token_Stack tokens; Cpp_Token_Stack tokens;
tokens.tokens = (Cpp_Token*)memory->data; tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size);
tokens.max_count = memory->size / sizeof(Cpp_Token); tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token);
tokens.count = 0; tokens.count = 0;
#if 0 #if 1
b32 still_lexing = 1; b32 still_lexing = 1;
Lex_Data lex = {0}; Lex_Data lex = lex_data_init(tb);
do{ do{
i32 result = i32 result =
cpp_lex_nonalloc(&lex, cpp_file.data, cpp_file.size, &tokens, 2048); cpp_lex_size_nonalloc(&lex,
cpp_file.data, cpp_file.size, cpp_file.size,
&tokens, 2048);
switch (result){ switch (result){
case LexNeedChunk: Assert(!"Invalid Path"); break; case LexNeedChunk: Assert(!"Invalid Path"); break;
@ -843,7 +854,7 @@ Job_Callback_Sig(job_full_lex){
#else #else
Cpp_Lex_Data status = {}; Cpp_Lex_Data status = {0};
do{ do{
for (i32 r = 2048; r > 0 && status.pos < cpp_file.size; --r){ for (i32 r = 2048; r > 0 && status.pos < cpp_file.size; --r){
@ -978,7 +989,10 @@ file_relex_parallel(System_Functions *system,
relex_space.count = 0; relex_space.count = 0;
relex_space.max_count = state.space_request; relex_space.max_count = state.space_request;
relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count); relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count);
if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end)){
char *spare = push_array(part, char, cpp_file.size);
if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){
inline_lex = 0; inline_lex = 0;
} }
else{ else{
@ -3088,6 +3102,7 @@ view_show_file(View *view){
} }
view->showing_ui = VUI_None; view->showing_ui = VUI_None;
view->current_scroll = &view->recent->scroll; view->current_scroll = &view->recent->scroll;
view->recent->scroll.max_y = view_compute_max_target_y(view);
} }
internal void internal void

View File

@ -104,6 +104,11 @@ struct Lex_FSM{
unsigned char emit_token; unsigned char emit_token;
unsigned char multi_line; unsigned char multi_line;
}; };
inline Lex_FSM
zero_lex_fsm(){
Lex_FSM fsm = {0};
return(fsm);
}
// BOTTOM // BOTTOM

View File

@ -4,7 +4,7 @@
#ifndef FCPP_NEW_LEXER_INC #ifndef FCPP_NEW_LEXER_INC
#define FCPP_NEW_LEXER_INC #define FCPP_NEW_LEXER_INC
#include "..\4cpp_lexer_types.h" #include "../4cpp_lexer_types.h"
#include "4cpp_lexer_fsms.h" #include "4cpp_lexer_fsms.h"
#include "4cpp_lexer_tables.c" #include "4cpp_lexer_tables.c"
@ -294,7 +294,7 @@ cpp_place_token_nonalloc(Cpp_Token *out_tokens, int token_i, Cpp_Token token){
if (token_i > 0){ if (token_i > 0){
prev_token = out_tokens[token_i - 1]; prev_token = out_tokens[token_i - 1];
merge = new_lex::cpp_attempt_token_merge(prev_token, token); merge = cpp_attempt_token_merge(prev_token, token);
if (merge.did_merge){ if (merge.did_merge){
out_tokens[token_i - 1] = merge.new_token; out_tokens[token_i - 1] = merge.new_token;
} }
@ -336,6 +336,13 @@ struct Lex_Data{
int __pc__; int __pc__;
}; };
inline Lex_Data
lex_data_init(char *tb){
Lex_Data data = {0};
data.tb = tb;
return(data);
}
#define DrCase(PC) case PC: goto resumespot_##PC #define DrCase(PC) case PC: goto resumespot_##PC
@ -355,7 +362,9 @@ enum Lex_Result{
}; };
lexer_link int lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){ cpp_lex_nonalloc(Lex_Data *S_ptr,
char *chunk, int size,
Cpp_Token_Stack *token_stack_out){
Lex_Data S = *S_ptr; Lex_Data S = *S_ptr;
Cpp_Token *out_tokens = token_stack_out->tokens; Cpp_Token *out_tokens = token_stack_out->tokens;
@ -403,20 +412,22 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
S.token_start = S.pos; S.token_start = S.pos;
S.tb_pos = 0; S.tb_pos = 0;
S.fsm = {0}; S.fsm = zero_lex_fsm();
for(;;){ for(;;){
unsigned short *eq_classes = get_eq_classes[S.pp_state]; {
unsigned char *fsm_table = get_table[S.pp_state]; unsigned short *eq_classes = get_eq_classes[S.pp_state];
unsigned char *fsm_table = get_table[S.pp_state];
for (; S.fsm.state < LS_count && S.pos < end_pos;){ for (; S.fsm.state < LS_count && S.pos < end_pos;){
c = chunk[S.pos++]; c = chunk[S.pos++];
S.tb[S.tb_pos++] = c; S.tb[S.tb_pos++] = c;
int i = S.fsm.state + eq_classes[c]; int i = S.fsm.state + eq_classes[c];
S.fsm.state = fsm_table[i]; S.fsm.state = fsm_table[i];
S.fsm.multi_line |= multiline_state_table[S.fsm.state]; S.fsm.multi_line |= multiline_state_table[S.fsm.state];
}
S.fsm.emit_token = (S.fsm.state >= LS_count);
} }
S.fsm.emit_token = (S.fsm.state >= LS_count);
if (S.fsm.emit_token == 0){ if (S.fsm.emit_token == 0){
S.chunk_pos += size; S.chunk_pos += size;
@ -950,7 +961,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_
#undef DrCase #undef DrCase
lexer_link int lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, cpp_lex_nonalloc(Lex_Data *S_ptr,
char *chunk, int size,
Cpp_Token_Stack *token_stack_out, int max_tokens){ Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out; Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){ if (temp_stack.max_count > temp_stack.count + max_tokens){
@ -971,7 +983,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size,
} }
lexer_link int lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, cpp_lex_size_nonalloc(Lex_Data *S_ptr,
char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out){ Cpp_Token_Stack *token_stack_out){
int result = 0; int result = 0;
if (S_ptr->pos >= full_size){ if (S_ptr->pos >= full_size){
@ -991,7 +1004,8 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
} }
lexer_link int lexer_link int
cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size, cpp_lex_size_nonalloc(Lex_Data *S_ptr,
char *chunk, int size, int full_size,
Cpp_Token_Stack *token_stack_out, int max_tokens){ Cpp_Token_Stack *token_stack_out, int max_tokens){
Cpp_Token_Stack temp_stack = *token_stack_out; Cpp_Token_Stack temp_stack = *token_stack_out;
if (temp_stack.max_count > temp_stack.count + max_tokens){ if (temp_stack.max_count > temp_stack.count + max_tokens){
@ -1012,7 +1026,6 @@ cpp_lex_size_nonalloc(Lex_Data *S_ptr, char *chunk, int size, int full_size,
return(result); return(result);
} }
#if 0
lexer_link Cpp_Relex_State lexer_link Cpp_Relex_State
cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
int start, int end, int amount, int tolerance){ int start, int end, int amount, int tolerance){
@ -1024,7 +1037,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
state.amount = amount; state.amount = amount;
state.tolerance = tolerance; state.tolerance = tolerance;
Cpp_Get_Token_Result result = new_lex::cpp_get_token(stack, start); Cpp_Get_Token_Result result = cpp_get_token(stack, start);
if (result.token_index <= 0){ if (result.token_index <= 0){
state.start_token_i = 0; state.start_token_i = 0;
} }
@ -1032,7 +1045,7 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
state.start_token_i = result.token_index-1; state.start_token_i = result.token_index-1;
} }
result = new_lex::cpp_get_token(stack, end); result = cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0; if (result.token_index < 0) result.token_index = 0;
else if (end > stack->tokens[result.token_index].start) ++result.token_index; else if (end > stack->tokens[result.token_index].start) ++result.token_index;
state.end_token_i = result.token_index; state.end_token_i = result.token_index;
@ -1045,6 +1058,11 @@ cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack,
return(state); return(state);
} }
inline Cpp_Preprocessor_State
cpp_token_get_pp_state(fcpp_u16 bitfield){
return (Cpp_Preprocessor_State)(bitfield);
}
// TODO(allen): Eliminate this once we actually store the EOF token // TODO(allen): Eliminate this once we actually store the EOF token
// in the token stack. // in the token stack.
inline Cpp_Token inline Cpp_Token
@ -1064,23 +1082,62 @@ cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){
} }
FCPP_LINK bool FCPP_LINK bool
cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ cpp_relex_nonalloc_main(Cpp_Relex_State *state,
Cpp_Token_Stack *relex_stack,
int *relex_end,
char *spare){
Cpp_Token_Stack *stack = state->stack; Cpp_Token_Stack *stack = state->stack;
Cpp_Token *tokens = stack->tokens; Cpp_Token *tokens = stack->tokens;
new_lex::cpp_shift_token_starts(stack, state->end_token_i, state->amount); cpp_shift_token_starts(stack, state->end_token_i, state->amount);
Lex_Data lex = {}; Lex_Data lex = lex_data_init(spare);
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
lex.pos = state->relex_start; lex.pos = state->relex_start;
int relex_end_i = state->end_token_i; int relex_end_i = state->end_token_i;
Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
Cpp_Token end_token = match_token; Cpp_Token end_token = match_token;
bool went_too_far = 0; int went_too_far = false;
// TODO(allen): This can be better now I suspect.
for (;;){ for (;;){
Cpp_Read_Result read = cpp_lex_step(state->file, &lex); int result =
cpp_lex_nonalloc(&lex,
state->file.data, state->file.size,
stack, 1);
switch (result){
case LexHitTokenLimit:
{
Cpp_Token token =
stack->tokens[stack->count-1];
if (token.start == end_token.start &&
token.size == end_token.size &&
token.flags == end_token.flags &&
token.state_flags == end_token.state_flags){
--stack->count;
goto double_break;
}
while (lex.pos > end_token.start && relex_end_i < stack->count){
++relex_end_i;
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
}
}
break;
case LexNeedChunk: Assert(!"Invalid path"); break;
case LexNeedTokenMemory:
went_too_far = true;
goto double_break;
case LexFinished:
goto double_break;
}
#if 0
if (read.has_result){ if (read.has_result){
if (read.token.start == end_token.start && if (read.token.start == end_token.start &&
read.token.size == end_token.size && read.token.size == end_token.size &&
@ -1095,12 +1152,14 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i);
} }
if (relex_stack->count == relex_stack->max_count){ if (relex_stack->count == relex_stack->max_count){
went_too_far = 1; went_too_far = true;
break; break;
} }
} }
if (lex.pos >= state->file.size) break; if (lex.pos >= state->file.size) break;
#endif
} }
double_break:;
if (!went_too_far){ if (!went_too_far){
if (relex_stack->count > 0){ if (relex_stack->count > 0){
@ -1133,7 +1192,6 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, in
return went_too_far; return went_too_far;
} }
#endif
#endif #endif