diff --git a/4ed_file_view.cpp b/4ed_file_view.cpp index 34948bf7..a42d38d6 100644 --- a/4ed_file_view.cpp +++ b/4ed_file_view.cpp @@ -1005,8 +1005,8 @@ struct Code_Wrap_State{ f32 tab_indent_amount; f32 byte_advance; - Buffer_Translating_State tran; - Buffer_Translating_Emits emits; + Translation_State tran; + Translation_Emits emits; u32 J; Buffer_Model_Step step; Buffer_Model_Behavior behavior; diff --git a/file/4coder_buffer.cpp b/file/4coder_buffer.cpp index 2fc46af3..69eb7f97 100644 --- a/file/4coder_buffer.cpp +++ b/file/4coder_buffer.cpp @@ -348,14 +348,24 @@ enum{ BufferModelUnit_Numbers, }; -struct Buffer_Translating_State{ +struct Translation_State{ u8 fill_buffer[4]; u32 fill_start_i; u32 fill_i; u32 fill_expected; - +}; +global_const Translation_State null_buffer_translating_state = {0}; + +struct Translation_Byte_Description{ + u32 byte_class; + b32 rebuffer_current; + b32 emit_current_as_cp; + + u32 prelim_emit_type; +}; + +struct Translation_Emit_Type{ u32 byte_class; - u32 emit_type; b32 rebuffer_current; b32 emit_current_as_cp; @@ -364,113 +374,116 @@ struct Buffer_Translating_State{ b32 do_codepoint; b32 do_numbers; }; -global_const Buffer_Translating_State null_buffer_translating_state = {0}; -struct Buffer_Translating_Emits{ +struct Translation_Emits{ Buffer_Model_Step steps[5]; Buffer_Model_Step step_current; u32 step_count; }; internal void -translating_consume_byte(Buffer_Translating_State *tran, u8 ch, u32 i, u32 size){ - tran->byte_class = 0; +translating_consume_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Byte_Description *desc_out){ + desc_out->byte_class = 0; if ((ch >= ' ' && ch < 0x7F) || ch == '\t' || ch == '\n' || ch == '\r'){ - tran->byte_class = 1; + desc_out->byte_class = 1; } else if (ch < 0xC0){ - tran->byte_class = 1000; + desc_out->byte_class = 1000; } else if (ch < 0xE0){ - tran->byte_class = 2; + desc_out->byte_class = 2; } else if (ch < 0xF0){ - tran->byte_class = 3; + desc_out->byte_class = 3; } else{ - tran->byte_class = 4; + desc_out->byte_class = 4; } - tran->emit_type = BufferModelUnit_None; - tran->rebuffer_current = false; - tran->emit_current_as_cp = false; + desc_out->prelim_emit_type = BufferModelUnit_None; + desc_out->rebuffer_current = false; + desc_out->emit_current_as_cp = false; if (tran->fill_expected == 0){ tran->fill_buffer[0] = ch; tran->fill_start_i = i; tran->fill_i = 1; - if (tran->byte_class == 1){ - tran->emit_type = BufferModelUnit_Codepoint; + if (desc_out->byte_class == 1){ + desc_out->prelim_emit_type = BufferModelUnit_Codepoint; } - else if (tran->byte_class == 0 || tran->byte_class == 1000){ - tran->emit_type = BufferModelUnit_Numbers; + else if (desc_out->byte_class == 0 || desc_out->byte_class == 1000){ + desc_out->prelim_emit_type = BufferModelUnit_Numbers; } else{ - tran->fill_expected = tran->byte_class; + tran->fill_expected = desc_out->byte_class; } } else{ - if (tran->byte_class == 1000){ + if (desc_out->byte_class == 1000){ tran->fill_buffer[tran->fill_i] = ch; ++tran->fill_i; if (tran->fill_i == tran->fill_expected){ - tran->emit_type = BufferModelUnit_Codepoint; + desc_out->prelim_emit_type = BufferModelUnit_Codepoint; } } else{ - if (tran->byte_class >= 2 && tran->byte_class <= 4){ - tran->rebuffer_current = true; + if (desc_out->byte_class >= 2 && desc_out->byte_class <= 4){ + desc_out->rebuffer_current = true; } - else if (tran->byte_class == 1){ - tran->emit_current_as_cp = true; + else if (desc_out->byte_class == 1){ + desc_out->emit_current_as_cp = true; } else{ tran->fill_buffer[tran->fill_i] = ch; ++tran->fill_i; } - tran->emit_type = BufferModelUnit_Numbers; + desc_out->prelim_emit_type = BufferModelUnit_Numbers; } } - if (tran->emit_type == BufferModelUnit_None && i+1 == size){ - tran->emit_type = BufferModelUnit_Numbers; + if (desc_out->prelim_emit_type == BufferModelUnit_None && i+1 == size){ + desc_out->prelim_emit_type = BufferModelUnit_Numbers; } } internal void -translating_select_emit_type(Buffer_Translating_State *tran){ - tran->codepoint = 0; - tran->codepoint_length = 0; - tran->do_codepoint = false; - tran->do_numbers = false; - if (tran->emit_type == BufferModelUnit_Codepoint){ - tran->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &tran->codepoint_length); - if ((tran->codepoint >= ' ' && tran->codepoint <= 255 && tran->codepoint != 127) || tran->codepoint == '\t' || tran->codepoint == '\n' || tran->codepoint == '\r'){ - tran->do_codepoint = true; +translating_select_emit_type(Translation_State *tran, Translation_Byte_Description desc, Translation_Emit_Type *type_out){ + type_out->byte_class = desc.byte_class; + type_out->rebuffer_current = desc.rebuffer_current; + type_out->emit_current_as_cp = desc.emit_current_as_cp; + + type_out->codepoint = 0; + type_out->codepoint_length = 0; + type_out->do_codepoint = false; + type_out->do_numbers = false; + if (desc.prelim_emit_type == BufferModelUnit_Codepoint){ + type_out->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &type_out->codepoint_length); + if ((type_out->codepoint >= ' ' && type_out->codepoint <= 255 && type_out->codepoint != 127) || type_out->codepoint == '\t' || type_out->codepoint == '\n' || type_out->codepoint == '\r'){ + type_out->do_codepoint = true; } else{ - tran->do_numbers = true; + type_out->do_numbers = true; } } - else if (tran->emit_type == BufferModelUnit_Numbers){ - tran->do_numbers = true; + else if (desc.prelim_emit_type == BufferModelUnit_Numbers){ + type_out->do_numbers = true; } - Assert((tran->do_codepoint + tran->do_numbers) <= 1); + Assert((type_out->do_codepoint + type_out->do_numbers) <= 1); } internal void -translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_Translating_Emits *emits_out){ +translating_generate_emits(Translation_State *tran, Translation_Emit_Type emit_types, u8 ch, u32 i, Translation_Emits *emits_out){ emits_out->step_count = 0; - if (tran->do_codepoint){ + if (emit_types.do_codepoint){ emits_out->steps[0].type = 1; - emits_out->steps[0].value = tran->codepoint; + emits_out->steps[0].value = emit_types.codepoint; emits_out->steps[0].i = tran->fill_start_i; - emits_out->steps[0].byte_length = tran->codepoint_length; + emits_out->steps[0].byte_length = emit_types.codepoint_length; emits_out->step_count = 1; } - else if (tran->do_numbers){ + else if (emit_types.do_numbers){ for (u32 j = 0; j < tran->fill_i; ++j){ emits_out->steps[j].type = 0; emits_out->steps[j].value = tran->fill_buffer[j]; @@ -480,22 +493,22 @@ translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_ emits_out->step_count = tran->fill_i; } - if (tran->do_codepoint || tran->do_numbers){ + if (emit_types.do_codepoint || emit_types.do_numbers){ tran->fill_start_i = 0; tran->fill_i = 0; tran->fill_expected = 0; } - if (tran->rebuffer_current){ - Assert(tran->do_codepoint || tran->do_numbers); + if (emit_types.rebuffer_current){ + Assert(emit_types.do_codepoint || emit_types.do_numbers); tran->fill_buffer[0] = ch; tran->fill_start_i = i; tran->fill_i = 1; - tran->fill_expected = tran->byte_class; + tran->fill_expected = emit_types.byte_class; } - else if (tran->emit_current_as_cp){ - Assert(tran->do_codepoint || tran->do_numbers); + else if (emit_types.emit_current_as_cp){ + Assert(emit_types.do_codepoint || emit_types.do_numbers); emits_out->steps[emits_out->step_count].type = 1; emits_out->steps[emits_out->step_count].value = ch; @@ -506,10 +519,12 @@ translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_ } internal void -translating_fully_process_byte(Buffer_Translating_State *tran, u8 ch, u32 i, u32 size, Buffer_Translating_Emits *emits_out){ - translating_consume_byte(tran, ch, i, size); - translating_select_emit_type(tran); - translating_generate_emits(tran, ch, i, emits_out); +translating_fully_process_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Emits *emits_out){ + Translation_Byte_Description description = {0}; + translating_consume_byte(tran, ch, i, size, &description); + Translation_Emit_Type emit_type = {0}; + translating_select_emit_type(tran, description, &emit_type); + translating_generate_emits(tran, emit_type, ch, i, emits_out); } internal void @@ -523,7 +538,6 @@ translation_step_read(Buffer_Model_Step step, Buffer_Model_Behavior *behavior_ou { behavior_out->do_newline = true; }break; - default: { behavior_out->do_codepoint_advance = true; @@ -956,8 +970,8 @@ buffer_measure_character_starts(Gap_Buffer *buffer, i32 *character_starts, i32 m skipping_whitespace = 1; } - Buffer_Translating_State tran = {0}; - Buffer_Translating_Emits emits = {0}; + Translation_State tran = {0}; + Translation_Emits emits = {0}; stream.use_termination_character = 1; stream.terminator = '\n'; @@ -1037,8 +1051,8 @@ struct Buffer_Measure_Wrap_State{ b32 did_wrap; b32 first_of_the_line; - Buffer_Translating_State tran; - Buffer_Translating_Emits emits; + Translation_State tran; + Translation_Emits emits; u32 J; Buffer_Model_Step step; Buffer_Model_Behavior behavior; @@ -1297,8 +1311,8 @@ buffer_remeasure_character_starts(Gap_Buffer *buffer, i32 line_start, i32 line_e } // Translation - Buffer_Translating_State tran = {0}; - Buffer_Translating_Emits emits = {0}; + Translation_State tran = {0}; + Translation_Emits emits = {0}; stream.use_termination_character = 1; stream.terminator = '\n'; @@ -1596,8 +1610,8 @@ struct Buffer_Cursor_Seek_State{ i32 font_height; - Buffer_Translating_State tran; - Buffer_Translating_Emits emits; + Translation_State tran; + Translation_Emits emits; u32 J; Buffer_Model_Step step; Buffer_Model_Behavior behavior; @@ -2089,8 +2103,8 @@ struct Buffer_Render_State{ b32 first_of_the_line; i32 wrap_unit_end; - Buffer_Translating_State tran; - Buffer_Translating_Emits emits; + Translation_State tran; + Translation_Emits emits; u32 J; Buffer_Model_Step step; Buffer_Model_Behavior behavior; diff --git a/file/4coder_translation.cpp b/file/4coder_translation.cpp new file mode 100644 index 00000000..e31ed8d0 --- /dev/null +++ b/file/4coder_translation.cpp @@ -0,0 +1,225 @@ +/* + * Mr. 4th Dimention - Allen Webster + * + * 11.03.2017 + * + * Translation system for turning byte streams into a stream of buffer model steps. + * + */ + +// TOP + +struct Translation_State{ + u8 fill_buffer[4]; + u32 fill_start_i; + u32 fill_i; + u32 fill_expected; +}; +global_const Translation_State null_buffer_translating_state = {0}; + +struct Translation_Byte_Description{ + u32 byte_class; + b32 rebuffer_current; + b32 emit_current_as_cp; + + u32 prelim_emit_type; +}; + +struct Translation_Emit_Type{ + u32 byte_class; + b32 rebuffer_current; + b32 emit_current_as_cp; + + u32 codepoint; + u32 codepoint_length; + b32 do_codepoint; + b32 do_numbers; +}; + +struct Translation_Emits{ + Buffer_Model_Step steps[5]; + Buffer_Model_Step step_current; + u32 step_count; +}; + +internal void +translating_consume_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Byte_Description *desc_out){ + desc_out->byte_class = 0; + if ((ch >= ' ' && ch < 0x7F) || ch == '\t' || ch == '\n' || ch == '\r'){ + desc_out->byte_class = 1; + } + else if (ch < 0xC0){ + desc_out->byte_class = 1000; + } + else if (ch < 0xE0){ + desc_out->byte_class = 2; + } + else if (ch < 0xF0){ + desc_out->byte_class = 3; + } + else{ + desc_out->byte_class = 4; + } + + desc_out->prelim_emit_type = BufferModelUnit_None; + desc_out->rebuffer_current = false; + desc_out->emit_current_as_cp = false; + if (tran->fill_expected == 0){ + tran->fill_buffer[0] = ch; + tran->fill_start_i = i; + tran->fill_i = 1; + + if (desc_out->byte_class == 1){ + desc_out->prelim_emit_type = BufferModelUnit_Codepoint; + } + else if (desc_out->byte_class == 0 || desc_out->byte_class == 1000){ + desc_out->prelim_emit_type = BufferModelUnit_Numbers; + } + else{ + tran->fill_expected = desc_out->byte_class; + } + } + else{ + if (desc_out->byte_class == 1000){ + tran->fill_buffer[tran->fill_i] = ch; + ++tran->fill_i; + + if (tran->fill_i == tran->fill_expected){ + desc_out->prelim_emit_type = BufferModelUnit_Codepoint; + } + } + else{ + if (desc_out->byte_class >= 2 && desc_out->byte_class <= 4){ + desc_out->rebuffer_current = true; + } + else if (desc_out->byte_class == 1){ + desc_out->emit_current_as_cp = true; + } + else{ + tran->fill_buffer[tran->fill_i] = ch; + ++tran->fill_i; + } + desc_out->prelim_emit_type = BufferModelUnit_Numbers; + } + } + + if (desc_out->prelim_emit_type == BufferModelUnit_None && i+1 == size){ + desc_out->prelim_emit_type = BufferModelUnit_Numbers; + } +} + +internal void +translating_select_emit_type(Translation_State *tran, Translation_Byte_Description desc, Translation_Emit_Type *type_out){ + type_out->byte_class = desc.byte_class; + type_out->rebuffer_current = desc.rebuffer_current; + type_out->emit_current_as_cp = desc.emit_current_as_cp; + + type_out->codepoint = 0; + type_out->codepoint_length = 0; + type_out->do_codepoint = false; + type_out->do_numbers = false; + if (desc.prelim_emit_type == BufferModelUnit_Codepoint){ + type_out->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &type_out->codepoint_length); + if ((type_out->codepoint >= ' ' && type_out->codepoint <= 255 && type_out->codepoint != 127) || type_out->codepoint == '\t' || type_out->codepoint == '\n' || type_out->codepoint == '\r'){ + type_out->do_codepoint = true; + } + else{ + type_out->do_numbers = true; + } + } + else if (desc.prelim_emit_type == BufferModelUnit_Numbers){ + type_out->do_numbers = true; + } + + Assert((type_out->do_codepoint + type_out->do_numbers) <= 1); +} + +internal void +translating_generate_emits(Translation_State *tran, Translation_Emit_Type emit_types, u8 ch, u32 i, Translation_Emits *emits_out){ + emits_out->step_count = 0; + if (emit_types.do_codepoint){ + emits_out->steps[0].type = 1; + emits_out->steps[0].value = emit_types.codepoint; + emits_out->steps[0].i = tran->fill_start_i; + emits_out->steps[0].byte_length = emit_types.codepoint_length; + emits_out->step_count = 1; + } + else if (emit_types.do_numbers){ + for (u32 j = 0; j < tran->fill_i; ++j){ + emits_out->steps[j].type = 0; + emits_out->steps[j].value = tran->fill_buffer[j]; + emits_out->steps[j].i = tran->fill_start_i + j; + emits_out->steps[j].byte_length = 1; + } + emits_out->step_count = tran->fill_i; + } + + if (emit_types.do_codepoint || emit_types.do_numbers){ + tran->fill_start_i = 0; + tran->fill_i = 0; + tran->fill_expected = 0; + } + + if (emit_types.rebuffer_current){ + Assert(emit_types.do_codepoint || emit_types.do_numbers); + + tran->fill_buffer[0] = ch; + tran->fill_start_i = i; + tran->fill_i = 1; + tran->fill_expected = emit_types.byte_class; + } + else if (emit_types.emit_current_as_cp){ + Assert(emit_types.do_codepoint || emit_types.do_numbers); + + emits_out->steps[emits_out->step_count].type = 1; + emits_out->steps[emits_out->step_count].value = ch; + emits_out->steps[emits_out->step_count].i = i; + emits_out->steps[emits_out->step_count].byte_length = 1; + ++emits_out->step_count; + } +} + +internal void +translating_fully_process_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Emits *emits_out){ + Translation_Byte_Description description = {0}; + translating_consume_byte(tran, ch, i, size, &description); + Translation_Emit_Type emit_type = {0}; + translating_select_emit_type(tran, description, &emit_type); + translating_generate_emits(tran, emit_type, ch, i, emits_out); +} + +internal void +translation_step_read(Buffer_Model_Step step, Buffer_Model_Behavior *behavior_out){ + behavior_out->do_newline = false; + behavior_out->do_codepoint_advance = false; + behavior_out->do_number_advance = false; + if (step.type == 1){ + switch (step.value){ + case '\n': + { + behavior_out->do_newline = true; + }break; + default: + { + behavior_out->do_codepoint_advance = true; + }break; + } + } + else{ + behavior_out->do_number_advance = true; + } +} + +#define TRANSLATION_DECL_OUTPUT(_j,_emit) u32 _j = 0; _j < (_emit).step_count; ++_j +#define TRANSLATION_DECL_GET_STEP(_step,_behav,_j,_emit) \ +Buffer_Model_Step _step = _emit.steps[_j]; Buffer_Model_Behavior _behav; \ +translation_step_read(_step, &_behav) + +#define TRANSLATION_OUTPUT(_j,_emit) _j = 0; _j < (_emit).step_count; ++_j +#define TRANSLATION_GET_STEP(_step,_behav,_j,_emit)\ +(_step) = _emit.steps[_j]; translation_step_read((_step), &(_behav)) + +// BOTTOM + + +