documentation for relex system, and some cleanup on buffer rendering

2016-09-19 20:51:28 -04:00 · 2016-09-19 20:51:28 -04:00 · 7531353153
parent 7d081dd7f5
commit 7531353153
6 changed files with 268 additions and 93 deletions
--- a/4coder_API.html
+++ b/4coder_API.html
--- a/4cpp_lexer.h
+++ b/4cpp_lexer.h
@ -1011,6 +1011,7 @@ Cpp_Token_Array lex_file(char *file_name){
 )

 DOC_SEE(Cpp_Lex_Data)
+DOC_SEE(Cpp_Lex_Result)
 */{
    Cpp_Lex_Result result = 0;
    if (full_size == HAS_NULL_TERM){
@ -1127,8 +1128,18 @@ cpp_index_array(Cpp_Token_Array *array, int32_t file_size, int32_t index){
    return(result);
 }

-FCPP_INTERNAL Cpp_Relex_Range
-cpp_get_relex_range(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos){
+FCPP_LINK Cpp_Relex_Range
+cpp_get_relex_range(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos)
+/*
+DOC_PARAM(array, A pointer to the token array that will be modified by the relex,
+this array should already contain the tokens for the previous state of the file.)
+DOC_PARAM(start_pos, The start position of the edited region of the file.
+The start and end points are based on the edited region of the file before the edit.)
+DOC_PARAM(end_pos, The end position of the edited region of the file.
+In particular, end_pos is the first character after the edited region not effected by the edit.
+Thus if the edited region contained one character end_pos - start_pos should equal 1.
+The start and end points are based on the edited region of the file before the edit.)
+*/{
    Cpp_Relex_Range range = {0};
    Cpp_Get_Token_Result get_result = {0};
    
@ -1152,7 +1163,29 @@ cpp_get_relex_range(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos){

 FCPP_LINK Cpp_Relex_Data
 cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount, char *spare)
-{
+/*
+DOC_PARAM(array, A pointer to the token array that will be modified by the relex,
+this array should already contain the tokens for the previous state of the file.)
+DOC_PARAM(start_pos, The start position of the edited region of the file.
+The start and end points are based on the edited region of the file before the edit.)
+DOC_PARAM(end_pos, The end position of the edited region of the file.
+In particular, end_pos is the first character after the edited region not effected by the edit.
+Thus if the edited region contained one character end_pos - start_pos should equal 1.
+The start and end points are based on the edited region of the file before the edit.)
+DOC_PARAM(character_shift_amount, The shift in the characters after the edited region.)
+DOC_PARAM(spare, The spare space for the lexing state. 
+Should be big enough to store the largest token in the file.)
+DOC_RETURN(Returns a partially initialized relex state.)
+
+DOC(This call does the first setup step of initializing a relex state.  To finish initializing the relex state
+you must tell the state about the positioning of the first chunk it will be fed.  There are two methods of doing
+this, the direct method is with cpp_relex_declare_first_chunk_position, the method that is often more convenient
+is with cpp_relex_is_start_chunk.  If the file is not chunked the second step of initialization can be skipped.)
+
+DOC_SEE(cpp_relex_declare_first_chunk_position)
+DOC_SEE(cpp_relex_is_start_chunk)
+
+*/{
    Cpp_Relex_Data state = {0};
    
    Cpp_Relex_Range range = cpp_get_relex_range(array, start_pos, end_pos);
@ -1175,18 +1208,61 @@ cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32
 }

 FCPP_LINK int32_t
-cpp_relex_start_position(Cpp_Relex_Data *S_ptr){
+cpp_relex_start_position(Cpp_Relex_Data *S_ptr)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that is done with the first stage of initialization (cpp_relex_init))
+DOC_RETURN(Returns the first position in the file the relexer wants to read.  This is usually a position slightly
+earlier than the start_pos provided as the edit range.)
+
+DOC(After doing the first stage of initialization this call is useful for figuring out what chunk
+of the file to feed to the lexer first.  It should be a chunk that contains the position returned
+by this call.)
+
+DOC_SEE(cpp_relex_init)
+DOC_SEE(cpp_relex_declare_first_chunk_position)
+
+*/{
    int32_t result = S_ptr->relex_start_position;
    return(result);
 }

 FCPP_LINK void
-cpp_relex_declare_first_chunk_position(Cpp_Relex_Data *S_ptr, int32_t position){
+cpp_relex_declare_first_chunk_position(Cpp_Relex_Data *S_ptr, int32_t position)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that is done with the first stage of initialization (cpp_relex_init))
+DOC_PARAM(position, The start position of the first chunk that will be fed to the relex process.)
+
+DOC(To initialize the relex system completely, the system needs to know how the characters in the
+first file line up with the file's absolute layout.  This call declares where the first chunk's start
+position is in the absolute file layout, and the system infers the alignment from that.  For this method
+to work the starting position of the relexing needs to be inside the first chunk.  To get the relexers
+starting position call cpp_relex_start_position.)
+
+DOC_SEE(cpp_relex_init)
+DOC_SEE(cpp_relex_start_position)
+
+*/{
    S_ptr->lex.chunk_pos = position;
 }

 FCPP_LINK int32_t
-cpp_relex_is_start_chunk(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size){
+cpp_relex_is_start_chunk(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that is done with the first stage of initialization (cpp_relex_init))
+DOC_PARAM(chunk, The chunk to check.)
+DOC_PARAM(chunk_size, The size of the chunk to check.)
+
+DOC_RETURN(Returns non-zero if the passed in chunk should be used as the first chunk for lexing.)
+
+DOC(With this method, once a state is initialized, each chunk can be fed in one after the other in
+the order they appear in the absolute file layout.  When this call returns non-zero it means that
+the chunk that was passed in on that call should be used in the first call to cpp_relex_step.  If,
+after trying all of the chunks, they all return zero, pass in NULL for chunk and 0 for chunk_size
+to tell the system that all possible chunks have already been tried, and then use those values again
+in the one and only call to cpp_relex_step.)
+
+DOC_SEE(cpp_relex_init)
+*/{
    int32_t pos = S_ptr->relex_start_position;
    int32_t start = S_ptr->lex.chunk_pos;
    int32_t end = start + chunk_size;
@ -1221,7 +1297,55 @@ cpp_relex_is_start_chunk(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size)

 FCPP_LINK Cpp_Lex_Result
 cpp_relex_step(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size, int32_t full_size,
-               Cpp_Token_Array *array, Cpp_Token_Array *relex_array){
+               Cpp_Token_Array *array, Cpp_Token_Array *relex_array)
+/*
+DOC_PARAM(S_ptr, A pointer to a fully initiazed relex state.)
+DOC_PARAM(chunk, A chunk of the edited file being relexed.)
+DOC_PARAM(chunk_size, The size of the current chunk.)
+DOC_PARAM(full_size, The full size of the edited file.)
+DOC_PARAM(array, A pointer to a token array that contained the original tokens before the edit.)
+DOC_PARAM(relex_array, A pointer to a token array for spare space.  The capacity of the
+relex_array determines how far the relex process can go.  If it runs out, the process
+can be continued if the same relex_array is extended without losing the tokens it contains.
+
+To get an appropriate capacity for relex_array, you can get the range of tokens that the relex
+operation is likely to traverse by looking at the result from cpp_get_relex_range.)
+
+DOC(When a file has already been lexed, and then it is edited in a small local way,
+rather than lexing the new file all over again, cpp_relex_step can try to find just
+the range of tokens that need to be updated and fix them in.
+
+First the lex state must be initialized (cpp_relex_init).  Then one or more calls to
+cpp_relex_step will start editing the array and filling out the relex_array.  The return
+value of cpp_relex_step indicates whether the relex was successful or was interrupted
+and if it was interrupted, what the system needs to resume.
+
+LexResult_Finished indicates that the relex engine finished successfully.
+
+LexResult_NeedChunk indicates that the system needs the next chunk of the file.
+
+LexResult_NeedTokenMemory indicates that the relex_array has reached capacity, and that
+it needs to be extended if it is going to continue.  Sometimes in this case it is better
+to stop and just lex the entire file normally, because there are a few cases where a small
+local change effects a long range of the lexers output.
+
+The relex operation can be closed in one of two ways.  If the LexResult_Finished
+value has been returned by this call, then to complete the edits to the array make
+sure the original array has enough capacity to store the final result by calling
+cpp_relex_get_new_count.  Then the operation can be finished successfully by calling
+cpp_relex_complete.
+
+Whether or not the relex process finished with LexResult_Finished the process can be
+finished by calling cpp_relex_abort, which puts the array back into it's original state.
+No close is necessary if getting the original array state back is not necessary.)
+
+DOC_SEE(cpp_relex_init)
+DOC_SEE(cpp_get_relex_range)
+DOC_SEE(Cpp_Lex_Result)
+DOC_SEE(cpp_relex_get_new_count)
+DOC_SEE(cpp_relex_complete)
+DOC_SEE(cpp_relex_abort)
+*/{
    
    Cpp_Relex_Data S = *S_ptr;
    
@ -1276,7 +1400,16 @@ cpp_relex_step(Cpp_Relex_Data *S_ptr, char *chunk, int32_t chunk_size, int32_t f
 #undef DrCase

 FCPP_LINK int32_t
-cpp_relex_get_new_count(Cpp_Relex_Data *S_ptr, int32_t current_count, Cpp_Token_Array *relex_array){
+cpp_relex_get_new_count(Cpp_Relex_Data *S_ptr, int32_t current_count, Cpp_Token_Array *relex_array)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that has gone through cpp_relex_step with a LexResult_Finished return.)
+DOC_PARAM(current_count, The count of tokens in the original array before the edit.)
+DOC_PARAM(relex_array, The relex_array that was used in the cpp_relex_step call/calls.)
+
+DOC(After getting a LexResult_Finished from cpp_relex_step, this call can be used to get
+the size the new array will have.  If the original array doesn't have enough capacity to store
+the new array, it's capacity should be increased before passing to cpp_relex_complete.)
+*/{
    int32_t result = -1;
    
    if (S_ptr->result_state == LexResult_Finished){
@ -1315,7 +1448,16 @@ cpp__block_move(void *dst, void *src, int32_t size){
 }

 FCPP_LINK void
-cpp_relex_complete(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array, Cpp_Token_Array *relex_array){
+cpp_relex_complete(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array, Cpp_Token_Array *relex_array)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that has gone through cpp_relex_step with a LexResult_Finished return.)
+DOC_PARAM(array, The original array being edited by cpp_relex_step calls.)
+DOC_PARAM(relex_array, The relex_array that was filled by cpp_relex_step.)
+
+DOC(After getting a LexResult_Finished from cpp_relex_step, and ensuring that
+array has a large enough capacity by calling cpp_relex_get_new_count, this call
+does the necessary replacement of tokens in the array to make it match the new file.)
+*/{
    int32_t delete_amount = S_ptr->end_token_index - S_ptr->start_token_index;
    int32_t shift_amount = relex_array->count - delete_amount;
    
@ -1333,8 +1475,17 @@ cpp_relex_complete(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array, Cpp_Token_Arra
 }

 FCPP_LINK void
-cpp_relex_abort(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array){
+cpp_relex_abort(Cpp_Relex_Data *S_ptr, Cpp_Token_Array *array)
+/*
+DOC_PARAM(S_ptr, A pointer to a state that has gone through at least one cpp_relex_step.)
+DOC_PARAM(array, The original array that went through cpp_relex_step to be edited.)
+
+DOC(After the first call to cpp_relex_step, the array's contents may have been changed,
+this call assures the array is in it's original state.  After this call the relex state
+is dead.)
+*/{
    cpp_shift_token_starts(array, S_ptr->original_end_token_index, -S_ptr->character_shift_amount);
+    S_ptr->__pc__ = -1;
 }


--- a/4cpp_lexer_types.h
+++ b/4cpp_lexer_types.h
@ -305,19 +305,14 @@ struct Cpp_Get_Token_Result{
 	int32_t in_whitespace;
 };

-#if 0
-struct_internal Cpp_Relex_State{
-    Cpp_Token_Array *array;
-    int32_t start, end;
-    int32_t start_token_i;
-    int32_t end_token_i;
-    int32_t relex_start;
-    int32_t space_request;
-};
-#endif
-
+/* DOC(Cpp_Relex_Range is the return result of the cpp_get_relex_range call.)
+DOC_SEE(cpp_get_relex_range) */
 struct Cpp_Relex_Range{
+    /* DOC(The index of the first token in the unedited array that needs to be relexed.) */
    int32_t start_token_index;
+    /* DOC(The index of the first token in the unedited array after the edited range
+    that may not need to be relexed.  Sometimes a relex operation has to lex past this
+    position to find a token that is not effected by the edit.) */
    int32_t end_token_index;
 };

@ -331,7 +326,7 @@ static Cpp_Lex_FSM null_lex_fsm = {0};

 /* DOC(Cpp_Lex_Data represents the state of the lexer so that the system may be resumable
 and the user can manage the lexer state and decide when to resume lexing with it.  To create
-a new lexer state that has not begun doing any lexing work call cpp_lex_data_init.
+a new lexer state call cpp_lex_data_init.

 The internals of the lex state should not be treated as a part of the public API.)
 DOC_SEE(cpp_lex_data_init)
@ -373,6 +368,10 @@ ENUM(int32_t, Cpp_Lex_Result){
    LexResult_HitTokenLimit = 3,
 };

+/* DOC(Cpp_Relex_Data represents the state of the relexer so that the system may be resumable.
+To create a new relex state call cpp_relex_init.)
+DOC_SEE(cpp_relex_init)
+HIDE_MEMBERS()*/
 struct Cpp_Relex_Data{
    Cpp_Lex_Data lex;
    
--- a/4ed_file_view.cpp
+++ b/4ed_file_view.cpp
@ -4917,7 +4917,6 @@ draw_file_loaded(View *view, i32_Rect rect, b32 is_active, Render_Target *target
    
    i32 count = 0;
    Full_Cursor render_cursor = {0};
-    Buffer_Render_Options opts = {0};
    
    f32 *wraps = view->file_data.line_wrap_y;
    f32 scroll_x = 0;
@ -4944,8 +4943,7 @@ draw_file_loaded(View *view, i32_Rect rect, b32 is_active, Render_Target *target
                               scroll_x, scroll_y, render_cursor,
                               !view->file_data.unwrapped_lines,
                               (f32)max_x, (f32)max_y,
-                               advance_data, (f32)line_height,
-                               opts);
+                               advance_data, (f32)line_height);
    }
    
    Assert(count > 0);
--- a/TODO.txt
+++ b/TODO.txt
@ -141,6 +141,7 @@
 ;         [] switch to line classification system
 ;         [] more built in options for auto indenting
 ;
+; [] eliminate the need for the lexer state's spare array.

 ; [] miblo's various number editors
 ; [] user file bar string
@ -182,6 +183,7 @@
 ;    [] profile and optimize the current metagen system
 ;    [] expand the use of 4coder_types.h to also allow static variable and function declarations
 ;    [] get more of the helper functions going through the documentation system
+;    [] method of pulling the documentation line from another item rather than copy-pasting the text.
 ;

 ; GUI related tech
--- a/buffer/4coder_buffer_abstract.cpp
+++ b/buffer/4coder_buffer_abstract.cpp
@ -771,10 +771,6 @@ buffer_invert_batch(Buffer_Invert_Batch *state, Buffer_Type *buffer, Buffer_Edit
    return(result);
 }

-struct Buffer_Render_Options{
-    b8 show_slash_t;
-};
-
 internal_4tech Full_Cursor
 buffer_get_start_cursor(Buffer_Type *buffer, f32 *wraps, f32 scroll_y,
                        i32 wrapped, f32 width, f32 *advance_data, f32 font_height){
@ -834,32 +830,28 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                       f32 scroll_x, f32 scroll_y, Full_Cursor start_cursor,
                       i32 wrapped,
                       f32 width, f32 height,
-                       f32 *advance_data, f32 font_height,
-                       Buffer_Render_Options opts){
+                       f32 *advance_data, f32 font_height){
    
-    Buffer_Stringify_Type loop;
-    Buffer_Render_Item *item;
-    Buffer_Render_Item *item_end;
-    char *data;
-    i32 size, end;
-    f32 shift_x, shift_y;
-    f32 x, y;
-    i32 i, item_i;
-    f32 ch_width, ch_width_sub;
-    uint8_t ch;
+    Buffer_Stringify_Type loop = {0};
+    char *data = 0;
+    i32 end = 0;
    
-    size = buffer_size(buffer);
+    i32 size = buffer_size(buffer);
+    f32 shift_x = port_x - scroll_x, shift_y = port_y - scroll_y;
+    f32 ch_width = 0;
+    uint8_t ch = 0;
    
-    shift_x = port_x - scroll_x;
-    shift_y = port_y - scroll_y;
-    if (wrapped) shift_y += start_cursor.wrapped_y;
-    else shift_y += start_cursor.unwrapped_y;
+    if (wrapped){
+        shift_y += start_cursor.wrapped_y;
+    }
+    else{
+        shift_y += start_cursor.unwrapped_y;
+    }
    
-    x = shift_x;
-    y = shift_y;
-    item_i = 0;
-    item = items + item_i;
-    item_end = items + max;
+    f32 x = shift_x;
+    f32 y = shift_y;
+    Buffer_Render_Item *item = items;
+    Buffer_Render_Item *item_end = items + max;
    
    // TODO(allen): What's the plan for when there is not enough space to store
    // more render items?  It seems like we should be able to use the view_x
@ -873,7 +865,7 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
            end = loop.size + loop.absolute_pos;
            data = loop.data - loop.absolute_pos;
            
-            for (i = loop.absolute_pos; i < end; ++i){
+            for (i32 i = loop.absolute_pos; i < end; ++i){
                ch = (uint8_t)data[i];
                ch_width = measure_character(advance_data, ch);
                
@ -888,7 +880,6 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                    if (item < item_end){
                        write_render_item_inline(item, i, ' ', x, y, advance_data, font_height);
                        item->flags = 0;
-                        ++item_i;
                        ++item;
                        
                        x = shift_x;
@ -900,14 +891,12 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                    if (item < item_end){
                        ch_width = write_render_item_inline(item, i, '\\', x, y, advance_data, font_height);
                        item->flags = BRFlag_Special_Character;
-                        ++item_i;
                        ++item;
                        x += ch_width;
                        
                        if (item < item_end){
                            ch_width = write_render_item_inline(item, i, 'r', x, y, advance_data, font_height);
                            item->flags = BRFlag_Special_Character;
-                            ++item_i;
                            ++item;
                            x += ch_width;
                        }
@ -915,27 +904,10 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                    break;
                    
                    case '\t':
-                    if (opts.show_slash_t){
-                        if (item < item_end){
-                            ch_width_sub = write_render_item_inline(item, i, '\\', x, y, advance_data, font_height);
-                            item->flags = BRFlag_Special_Character;
-                            ++item_i;
-                            ++item;
-                            if (item < item_end){
-                                write_render_item_inline(item, i, 't', x + ch_width_sub, y, advance_data, font_height);
-                                item->flags = BRFlag_Special_Character;
-                                ++item_i;
-                                ++item;
-                            }
-                        }
-                    }
-                    else{
-                        if (item < item_end){
-                            write_render_item_inline(item, i, ' ', x, y, advance_data, font_height);
-                            item->flags = 0;
-                            ++item_i;
-                            ++item;
-                        }
+                    if (item < item_end){
+                        write_render_item_inline(item, i, ' ', x, y, advance_data, font_height);
+                        item->flags = 0;
+                        ++item;
                    }
                    x += ch_width;
                    break;
@ -945,14 +917,12 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                        if (ch >= ' ' && ch <= '~'){
                            write_render_item(item, i, ch, x, y, ch_width, font_height);
                            item->flags = 0;
-                            ++item_i;
                            ++item;
                            x += ch_width;
                        }
                        else{
                            ch_width = write_render_item_inline(item, i, '\\', x, y, advance_data, font_height);
                            item->flags = BRFlag_Special_Character;
-                            ++item_i;
                            ++item;
                            x += ch_width;
                            
@ -964,7 +934,6 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                            if (item < item_end){
                                ch_width = write_render_item_inline(item, i, C, x, y, advance_data, font_height);
                                item->flags = BRFlag_Special_Character;
-                                ++item_i;
                                ++item;
                                x += ch_width;
                            }
@ -978,7 +947,6 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                            if (item < item_end){
                                ch_width = write_render_item_inline(item, i, C, x, y, advance_data, font_height);
                                item->flags = BRFlag_Special_Character;
-                                ++item_i;
                                ++item;
                                x += ch_width;
                            }
@ -1000,7 +968,6 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
                ch = 0;
                ch_width = measure_character(advance_data, ' ');
                write_render_item(item, size, ch, x, y, ch_width, font_height);
-                ++item_i;
                ++item;
                x += ch_width;
            }
@ -1011,15 +978,13 @@ buffer_get_render_data(Buffer_Type *buffer, Buffer_Render_Item *items, i32 max,
            ch = 0;
            ch_width = 0;
            write_render_item(item, size, ch, x, y, ch_width, font_height);
-            ++item_i;
            ++item;
            x += ch_width;
        }
    }
    
-    // TODO(allen): handle this with a control state
-    assert_4tech(item_i <= max);
-    *count = item_i;
+    *count = (i32)(item - items);
+    assert_4tech(*count <= max);
 }

 #ifndef NON_ABSTRACT_4TECH