cleaned up the relex API, in theory it handles chunks now

master
Allen Webster 2016-09-18 09:09:43 -04:00
parent 3bd8fd5147
commit 6168528c0e
7 changed files with 376 additions and 141 deletions

File diff suppressed because one or more lines are too long

View File

@ -266,6 +266,7 @@ cpp_pp_directive_to_state(Cpp_Token_Type type){
return(result);
}
// duff-routine defines
#define DrCase(PC) case PC: goto resumespot_##PC
#define DrYield(PC, n) { \
@ -1094,18 +1095,43 @@ DOC_SEE(cpp_lex_data_temp_read)
lex_data->tb = new_buffer;
}
// TODO(allen): Get the relex system ready to work in chunks.
FCPP_INTERNAL char
cpp_token_get_pp_state(uint16_t bitfield){
return (char)(bitfield);
}
FCPP_INTERNAL void
cpp_shift_token_starts(Cpp_Token_Array *array, int32_t from_token_i, int32_t shift_amount){
Cpp_Token *token = array->tokens + from_token_i;
int32_t count = array->count, i = 0;
for (i = from_token_i; i < count; ++i, ++token){
token->start += shift_amount;
}
}
FCPP_INTERNAL Cpp_Token
cpp_index_array(Cpp_Token_Array *array, int32_t file_size, int32_t index){
Cpp_Token result;
if (index < array->count){
result = array->tokens[index];
}
else{
result.start = file_size;
result.size = 0;
result.type = CPP_TOKEN_EOF;
result.flags = 0;
result.state_flags = 0;
}
return(result);
}
#if 0
FCPP_INTERNAL Cpp_Relex_State
cpp_relex_nonalloc_start(char *data, int32_t size, Cpp_Token_Array *array,
int32_t start, int32_t end, int32_t amount, int32_t tolerance){
cpp_relex_nonalloc_start(Cpp_Token_Array *array, int32_t start, int32_t end, int32_t tolerance){
Cpp_Relex_State state;
state.data = data;
state.size = size;
state.array = array;
state.start = start;
state.end = end;
state.amount = amount;
state.tolerance = tolerance;
Cpp_Get_Token_Result result = cpp_get_token(array, start);
@ -1133,70 +1159,222 @@ cpp_relex_nonalloc_start(char *data, int32_t size, Cpp_Token_Array *array,
return(state);
}
#endif
FCPP_INTERNAL char
cpp_token_get_pp_state(uint16_t bitfield){
return (char)(bitfield);
FCPP_INTERNAL Cpp_Relex_Range
cpp_get_relex_range(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos){
Cpp_Relex_Range range = {0};
Cpp_Get_Token_Result get_result = {0};
get_result = cpp_get_token(array, start_pos);
range.start_token_index = get_result.token_index-1;
if (range.start_token_index < 0){
range.start_token_index = 0;
}
get_result = cpp_get_token(array, end_pos);
range.end_token_index = get_result.token_index;
if (end_pos > array->tokens[range.end_token_index].start){
++range.end_token_index;
}
if (range.end_token_index < 0){
range.end_token_index = 0;
}
return(range);
}
// TODO(allen): Eliminate this once we actually store the EOF token
// in the token stack.
FCPP_INTERNAL Cpp_Token
cpp_index_array(Cpp_Token_Array *array, int32_t file_size, int32_t index){
Cpp_Token result;
if (index < array->count){
result = array->tokens[index];
FCPP_LINK Cpp_Relex_Data
cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount, char *spare){
Cpp_Relex_Data state = {0};
Cpp_Relex_Range range = cpp_get_relex_range(array, start_pos, end_pos);
state.start_token_index = range.start_token_index;
state.end_token_index = range.end_token_index;
state.original_end_token_index = range.end_token_index;
state.relex_start_position = array->tokens[state.start_token_index].start;
if (start_pos < state.relex_start_position){
state.relex_start_position = start_pos;
}
else{
result.start = file_size;
result.size = 0;
result.type = CPP_TOKEN_EOF;
result.flags = 0;
result.state_flags = 0;
state.character_shift_amount = character_shift_amount;
state.lex = cpp_lex_data_init(spare);
state.lex.pp_state = cpp_token_get_pp_state(array->tokens[state.start_token_index].state_flags);
state.lex.pos = state.relex_start_position;
return(state);
}
// duff-routine defines
#define DrCase(PC) case PC: goto resumespot_##PC
#define DrYield(PC, n) { \
S_ptr->result_state = n; \
*S_ptr = S; S_ptr->__pc__ = PC; return(n); resumespot_##PC:; }
#define DrReturn(n) { \
S_ptr->result_state = n; \
*S_ptr = S; S_ptr->__pc__ = -1; return(n); }
FCPP_LINK Cpp_Lex_Result
cpp_relex_step(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size, int32_t full_size,
Cpp_Token_Array *array, Cpp_Token_Array *relex_array){
Cpp_Relex_Data S = *S_ptr;
switch (S.__pc__){
DrCase(1);
DrCase(2);
}
cpp_shift_token_starts(array, S.end_token_index, S.character_shift_amount);
S.end_token = cpp_index_array(array, full_size, S.end_token_index);
if (S.relex_start_position < full_size){
// TODO(allen): This can be better I suspect.
for (;;){
Cpp_Lex_Result step_result =
cpp_lex_nonalloc_no_null_out_limit(&S.lex, chunk, chunk_size, full_size,
relex_array, 1);
switch (step_result){
case LexResult_HitTokenLimit:
{
Cpp_Token token = relex_array->tokens[relex_array->count-1];
if (token.type == S.end_token.type &&
token.start == S.end_token.start &&
token.size == S.end_token.size &&
token.flags == S.end_token.flags &&
token.state_flags == S.end_token.state_flags){
--relex_array->count;
goto double_break;
}
while (S.lex.pos > S.end_token.start && S.end_token_index < array->count){
++S.end_token_index;
S.end_token = cpp_index_array(array, full_size, S.end_token_index);
}
}
break;
case LexResult_NeedChunk: DrYield(1, LexResult_NeedChunk); break;
case LexResult_NeedTokenMemory: DrYield(2, LexResult_NeedTokenMemory); break;
case LexResult_Finished: goto double_break;
}
}
}
double_break:;
DrReturn(LexResult_Finished);
}
#undef DrYield
#undef DrReturn
#undef DrCase
FCPP_LINK int32_t
cpp_relex_get_new_count(Cpp_Relex_Data *S_ptr, int32_t current_count, Cpp_Token_Array *relex_array){
int32_t result = -1;
if (S_ptr->result_state == LexResult_Finished){
int32_t delete_amount = S_ptr->end_token_index - S_ptr->start_token_index;
int32_t shift_amount = relex_array->count - delete_amount;
result = current_count + shift_amount;
}
return(result);
}
#if !defined(FCPP_FORBID_MEMCPY)
#include <string.h>
#endif
FCPP_INTERNAL void
cpp_shift_token_starts(Cpp_Token_Array *array, int32_t from_token_i, int32_t shift_amount){
Cpp_Token *token = array->tokens + from_token_i;
int32_t count = array->count, i = 0;
for (i = from_token_i; i < count; ++i, ++token){
token->start += shift_amount;
cpp__block_move(void *dst, void *src, int32_t size){
#if !defined(FCPP_FORBID_MEMCPY)
memmove(dst, src, size);
#else
// TODO(allen): find a way to write a fast one of these.
uint8_t *d = (uint8_t*)dst, *s = (uint8_t*)src;
if (d < s || d >= s + size){
for (; size > 0; --size){
*(d++) = *(s++);
}
}
else{
d += size - 1;
s += size - 1;
for (; size > 0; --size){
*(d--) = *(s--);
}
}
#endif
}
// TODO(allen): This relex system is a little bit broken. It doesn't allow for the
// data chunks and it doesn't actually set up the state mid-data stream properly.
FCPP_INTERNAL int32_t
cpp_relex_nonalloc_main(Cpp_Relex_State *state,
FCPP_LINK void
cpp_relex_complete(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array, Cpp_Token_Array *relex_array){
int32_t delete_amount = S_ptr->end_token_index - S_ptr->start_token_index;
int32_t shift_amount = relex_array->count - delete_amount;
if (shift_amount != 0){
int32_t shift_size = array->count - S_ptr->end_token_index;
if (shift_size > 0){
Cpp_Token *old_base = array->tokens + S_ptr->end_token_index;
cpp__block_move(old_base + shift_amount, old_base, sizeof(Cpp_Token)*shift_size);
}
array->count += shift_amount;
}
cpp__block_move(array->tokens + S_ptr->start_token_index, relex_array->tokens,
sizeof(Cpp_Token)*relex_array->count);
}
FCPP_LINK void
cpp_relex_abort(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array){
cpp_shift_token_starts(array, S_ptr->original_end_token_index, -S_ptr->character_shift_amount);
}
#if 0
// TODO(allen): rename shift_amount to character_shift_amount
FCPP_INTERNAL Cpp_Lex_Result
cpp_relex_nonalloc_main(Cpp_Relex_Data *S_ptr,
Cpp_Relex_Range range, int32_t shift_amount,
char *chunk, int32_t chunk_size, int32_t size,
Cpp_Token_Array *array,
Cpp_Token_Array *relex_array,
int32_t *relex_end,
int32_t *relex_end_out,
char *spare){
Cpp_Token_Array *array = state->array;
Cpp_Token *tokens = array->tokens;
cpp_shift_token_starts(array, state->end_token_i, state->amount);
int32_t relex_end_i = state->end_token_i;
Cpp_Token match_token = cpp_index_array(array, state->size, relex_end_i);
cpp_shift_token_starts(array, range.end_token_index, shift_amount);
Cpp_Token match_token = cpp_index_array(array, size, range.end_token_index);
Cpp_Token end_token = match_token;
int32_t went_too_far = false;
if (state->relex_start < state->size){
Cpp_Relex_State result = LexResult_Finished;
Cpp_Token *tokens = array->tokens;
int32_t relex_end_index = range.end_token_index;
if (state->relex_start < size){
Cpp_Lex_Data lex = cpp_lex_data_init(spare);
lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags);
lex.pos = state->relex_start;
// TODO(allen): This can be better I suspect.
for (;;){
int32_t result =
cpp_lex_nonalloc_no_null_out_limit(&lex, state->data,
state->size, state->size,
int32_t step_result =
cpp_lex_nonalloc_no_null_out_limit(&lex, data, size, size,
relex_array, 1);
switch (result){
switch (step_result){
case LexResult_HitTokenLimit:
{
Cpp_Token token = relex_array->tokens[relex_array->count-1];
@ -1210,7 +1388,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state,
while (lex.pos > end_token.start && relex_end_i < array->count){
++relex_end_i;
end_token = cpp_index_array(array, state->size, relex_end_i);
end_token = cpp_index_array(array, size, relex_end_i);
}
}
break;
@ -1218,7 +1396,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state,
case LexResult_NeedChunk: Assert(!"Invalid path"); break;
case LexResult_NeedTokenMemory:
went_too_far = true;
result = LexResult_NeedTokenMemory;
goto double_break;
case LexResult_Finished:
@ -1228,16 +1406,23 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state,
double_break:;
}
if (!went_too_far){
*relex_end = relex_end_i;
}
else{
cpp_shift_token_starts(array, state->end_token_i, -state->amount);
switch (result){
case LexResult_Finished:
{
*relex_end_out = relex_end_i;
}break;
case LexResult_NeedTokenMemory:
{
cpp_shift_token_starts(array, state->end_token_i, -shift_amount);
}break;
default: Assert(!"Invalid path");
}
return(went_too_far);
return(result);
}
#endif
#if !defined(FCPP_FORBID_MALLOC)

View File

@ -305,18 +305,21 @@ struct Cpp_Get_Token_Result{
int32_t in_whitespace;
};
#if 0
struct_internal Cpp_Relex_State{
char *data;
int32_t size;
Cpp_Token_Array *array;
int32_t start, end, amount;
int32_t start, end;
int32_t start_token_i;
int32_t end_token_i;
int32_t relex_start;
int32_t tolerance;
int32_t space_request;
};
#endif
struct Cpp_Relex_Range{
int32_t start_token_index;
int32_t end_token_index;
};
struct_internal Cpp_Lex_FSM{
uint8_t state;
@ -370,6 +373,23 @@ ENUM(int32_t, Cpp_Lex_Result){
LexResult_HitTokenLimit = 3,
};
struct Cpp_Relex_Data{
Cpp_Lex_Data lex;
Cpp_Token end_token;
int32_t relex_start_position;
int32_t start_token_index;
int32_t end_token_index;
int32_t original_end_token_index;
int32_t character_shift_amount;
Cpp_Lex_Result result_state;
int32_t __pc__;
};
ENUM_INTERNAL(uint16_t, Cpp_Preprocessor_State){
CPP_LEX_PP_DEFAULT,
CPP_LEX_PP_IDENTIFIER,

View File

@ -2553,7 +2553,7 @@ App_Step_Sig(app_step){
"-mouse release events in customization system\n"
"\n"
"New in alpha 4.0.10:\n"
"-<control F> list all locations of a string across all open buffers\n"
"-<ctrl F> list all locations of a string across all open buffers\n"
"-Build now finds build.sh and Makefile on Linux\n"
"-<alt n> goes to the next error if the *compilation* buffer is open\n"
"-<alt N> goes to the previous error\n"

View File

@ -1248,7 +1248,7 @@ file_first_lex_parallel(System_Functions *system,
internal b32
file_relex_parallel(System_Functions *system,
Mem_Options *mem, Editing_File *file,
i32 start_i, i32 end_i, i32 amount){
i32 start_i, i32 end_i, i32 shift_amount){
General_Memory *general = &mem->general;
Partition *part = &mem->part;
@ -1260,27 +1260,61 @@ file_relex_parallel(System_Functions *system,
b32 result = true;
b32 inline_lex = !file->state.still_lexing;
if (inline_lex){
char *data = file->state.buffer.data;
i32 size = file->state.buffer.size;
i32 extra_tolerance = 100;
Cpp_Token_Array *array = &file->state.token_array;
Cpp_Relex_Range relex_range =
cpp_get_relex_range(array, start_i, end_i);
Cpp_Relex_State state =
cpp_relex_nonalloc_start(data, size, array,
start_i, end_i, amount, 100);
i32 relex_space_size =
relex_range.end_token_index - relex_range.start_token_index + extra_tolerance;
Temp_Memory temp = begin_temp_memory(part);
i32 relex_end;
Cpp_Token_Array relex_space;
relex_space.count = 0;
relex_space.max_count = state.space_request;
relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count);
Cpp_Token_Array relex_array;
relex_array.count = 0;
relex_array.max_count = relex_space_size;
relex_array.tokens = push_array(part, Cpp_Token, relex_array.max_count);
char *spare = push_array(part, char, size+1);
if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){
inline_lex = 0;
i32 size = file->state.buffer.size;
char *spare = push_array(part, char, size);
Cpp_Relex_Data state = cpp_relex_init(array, start_i, end_i, shift_amount, spare);
char *chunk = file->state.buffer.data;
i32 chunk_size = size;
for(;;){
Cpp_Lex_Result lex_result =
cpp_relex_step(&state, chunk, chunk_size, size, array, &relex_array);
switch (lex_result){
case LexResult_NeedChunk:
Assert(!"There is only one chunk in the current system.");
break;
case LexResult_NeedTokenMemory:
inline_lex = 0;
goto doublebreak;
case LexResult_Finished:
goto doublebreak;
}
}
else{
doublebreak:;
if (inline_lex){
i32 new_count = cpp_relex_get_new_count(&state, array->count, &relex_array);
if (new_count > array->max_count){
i32 new_max = LargeRoundUp(new_count, Kbytes(1));
array->tokens = (Cpp_Token*)
general_memory_reallocate(general, array->tokens,
array->count*sizeof(Cpp_Token),
new_max*sizeof(Cpp_Token));
array->max_count = new_max;
}
cpp_relex_complete(&state, array, &relex_array);
#if 0
i32 delete_amount = relex_end - state.start_token_i;
i32 shift_amount = relex_space.count - delete_amount;
@ -1307,6 +1341,10 @@ file_relex_parallel(System_Functions *system,
memcpy(state.array->tokens + state.start_token_i, relex_space.tokens,
sizeof(Cpp_Token)*relex_space.count);
#endif
}
else{
cpp_relex_abort(&state, array);
}
end_temp_memory(temp);
@ -1324,12 +1362,12 @@ file_relex_parallel(System_Functions *system,
++end_token_i;
}
cpp_shift_token_starts(array, end_token_i, amount);
cpp_shift_token_starts(array, end_token_i, shift_amount);
--end_token_i;
if (end_token_i >= 0){
Cpp_Token *token = array->tokens + end_token_i;
if (token->start < end_i && token->start + token->size > end_i){
token->size += amount;
token->size += shift_amount;
}
}

View File

@ -3099,7 +3099,7 @@ generate_custom_headers(){
append_ss (&out, name);
append_sc (&out, "</h4><div style='"CODE_STYLE" "DESCRIPT_SECTION_STYLE"'>");
print_function_html(&out, &used_links, item->cpp_name, item->ret, "app->", name, item->breakdown);
print_function_html(&out, &used_links, item->cpp_name, item->ret, "", name, item->breakdown);
append_sc(&out, "</div>");
print_function_docs(&out, part, name, item->doc_string);

View File

@ -131,6 +131,7 @@
; [X] expose dirty flags
; [X] why are command line files not loading any more?
; [X] use strange theme
; [X] cuber's return to previous buffer idea
; [X] tokens in the custom API
; [X] token seeking on custom side
@ -141,8 +142,7 @@
; [] more built in options for auto indenting
;
; [] binary buffers
; [] commands for resizing panels
; [] miblo's various number editors
; [] user file bar string
; [] API docs as text file
; [] read only files
@ -151,9 +151,6 @@
; [] control over how mouse effects panel focus
; [] option to not open *messages* every startup
;
; [] support full length unicode file names
; [] switch based word complete
;
; [] query buffer font info
; [] break buffer name ties by adding parent directories instead of <#>
; [] undo groups
@ -168,6 +165,11 @@
; [] multi-cursor editing
;
; buffer behavior cleanup
; [] show all characters as \# if they can't be rendered
; [] support full length unicode file names
; [] binary buffers
; meta programming system
; [X] condense system into single meta compiler
; [] formalize the rewriter for the 4coder_string.h so it can be used for other single header libs
@ -225,10 +227,8 @@
; [] fancy code presentation mode
;
; [X] cuber's return to previous buffer idea
; [] miblo's various number editors
;
; [] keep copy of unedited orignal, somewhere (compressed? restore by history?)
; [] switch based word complete
; [] keep copy of unedited orignal maybe? (compressed? restore by history?)
;
; [] diff
; [] cloc
@ -271,14 +271,6 @@
;
;
; FANCY-PANTS IDEAS
; [] pass messages to 'jobs' to try to avoid cancelling them
; if the job still thinks it should be cancelled it will say so
; but otherwise the job can try to incorporate the new info
; without throwing away the progress it has made so far.
;
;
; PORTING TODOS
; [X] command line parameters