finished cleaning up translation system

master
Allen Webster 2017-03-11 20:49:45 -05:00
parent dc90ec0c83
commit 5b0883f690
3 changed files with 309 additions and 70 deletions

View File

@ -1005,8 +1005,8 @@ struct Code_Wrap_State{
f32 tab_indent_amount; f32 tab_indent_amount;
f32 byte_advance; f32 byte_advance;
Buffer_Translating_State tran; Translation_State tran;
Buffer_Translating_Emits emits; Translation_Emits emits;
u32 J; u32 J;
Buffer_Model_Step step; Buffer_Model_Step step;
Buffer_Model_Behavior behavior; Buffer_Model_Behavior behavior;

View File

@ -348,14 +348,24 @@ enum{
BufferModelUnit_Numbers, BufferModelUnit_Numbers,
}; };
struct Buffer_Translating_State{ struct Translation_State{
u8 fill_buffer[4]; u8 fill_buffer[4];
u32 fill_start_i; u32 fill_start_i;
u32 fill_i; u32 fill_i;
u32 fill_expected; u32 fill_expected;
};
global_const Translation_State null_buffer_translating_state = {0};
struct Translation_Byte_Description{
u32 byte_class;
b32 rebuffer_current;
b32 emit_current_as_cp;
u32 prelim_emit_type;
};
struct Translation_Emit_Type{
u32 byte_class; u32 byte_class;
u32 emit_type;
b32 rebuffer_current; b32 rebuffer_current;
b32 emit_current_as_cp; b32 emit_current_as_cp;
@ -364,113 +374,116 @@ struct Buffer_Translating_State{
b32 do_codepoint; b32 do_codepoint;
b32 do_numbers; b32 do_numbers;
}; };
global_const Buffer_Translating_State null_buffer_translating_state = {0};
struct Buffer_Translating_Emits{ struct Translation_Emits{
Buffer_Model_Step steps[5]; Buffer_Model_Step steps[5];
Buffer_Model_Step step_current; Buffer_Model_Step step_current;
u32 step_count; u32 step_count;
}; };
internal void internal void
translating_consume_byte(Buffer_Translating_State *tran, u8 ch, u32 i, u32 size){ translating_consume_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Byte_Description *desc_out){
tran->byte_class = 0; desc_out->byte_class = 0;
if ((ch >= ' ' && ch < 0x7F) || ch == '\t' || ch == '\n' || ch == '\r'){ if ((ch >= ' ' && ch < 0x7F) || ch == '\t' || ch == '\n' || ch == '\r'){
tran->byte_class = 1; desc_out->byte_class = 1;
} }
else if (ch < 0xC0){ else if (ch < 0xC0){
tran->byte_class = 1000; desc_out->byte_class = 1000;
} }
else if (ch < 0xE0){ else if (ch < 0xE0){
tran->byte_class = 2; desc_out->byte_class = 2;
} }
else if (ch < 0xF0){ else if (ch < 0xF0){
tran->byte_class = 3; desc_out->byte_class = 3;
} }
else{ else{
tran->byte_class = 4; desc_out->byte_class = 4;
} }
tran->emit_type = BufferModelUnit_None; desc_out->prelim_emit_type = BufferModelUnit_None;
tran->rebuffer_current = false; desc_out->rebuffer_current = false;
tran->emit_current_as_cp = false; desc_out->emit_current_as_cp = false;
if (tran->fill_expected == 0){ if (tran->fill_expected == 0){
tran->fill_buffer[0] = ch; tran->fill_buffer[0] = ch;
tran->fill_start_i = i; tran->fill_start_i = i;
tran->fill_i = 1; tran->fill_i = 1;
if (tran->byte_class == 1){ if (desc_out->byte_class == 1){
tran->emit_type = BufferModelUnit_Codepoint; desc_out->prelim_emit_type = BufferModelUnit_Codepoint;
} }
else if (tran->byte_class == 0 || tran->byte_class == 1000){ else if (desc_out->byte_class == 0 || desc_out->byte_class == 1000){
tran->emit_type = BufferModelUnit_Numbers; desc_out->prelim_emit_type = BufferModelUnit_Numbers;
} }
else{ else{
tran->fill_expected = tran->byte_class; tran->fill_expected = desc_out->byte_class;
} }
} }
else{ else{
if (tran->byte_class == 1000){ if (desc_out->byte_class == 1000){
tran->fill_buffer[tran->fill_i] = ch; tran->fill_buffer[tran->fill_i] = ch;
++tran->fill_i; ++tran->fill_i;
if (tran->fill_i == tran->fill_expected){ if (tran->fill_i == tran->fill_expected){
tran->emit_type = BufferModelUnit_Codepoint; desc_out->prelim_emit_type = BufferModelUnit_Codepoint;
} }
} }
else{ else{
if (tran->byte_class >= 2 && tran->byte_class <= 4){ if (desc_out->byte_class >= 2 && desc_out->byte_class <= 4){
tran->rebuffer_current = true; desc_out->rebuffer_current = true;
} }
else if (tran->byte_class == 1){ else if (desc_out->byte_class == 1){
tran->emit_current_as_cp = true; desc_out->emit_current_as_cp = true;
} }
else{ else{
tran->fill_buffer[tran->fill_i] = ch; tran->fill_buffer[tran->fill_i] = ch;
++tran->fill_i; ++tran->fill_i;
} }
tran->emit_type = BufferModelUnit_Numbers; desc_out->prelim_emit_type = BufferModelUnit_Numbers;
} }
} }
if (tran->emit_type == BufferModelUnit_None && i+1 == size){ if (desc_out->prelim_emit_type == BufferModelUnit_None && i+1 == size){
tran->emit_type = BufferModelUnit_Numbers; desc_out->prelim_emit_type = BufferModelUnit_Numbers;
} }
} }
internal void internal void
translating_select_emit_type(Buffer_Translating_State *tran){ translating_select_emit_type(Translation_State *tran, Translation_Byte_Description desc, Translation_Emit_Type *type_out){
tran->codepoint = 0; type_out->byte_class = desc.byte_class;
tran->codepoint_length = 0; type_out->rebuffer_current = desc.rebuffer_current;
tran->do_codepoint = false; type_out->emit_current_as_cp = desc.emit_current_as_cp;
tran->do_numbers = false;
if (tran->emit_type == BufferModelUnit_Codepoint){ type_out->codepoint = 0;
tran->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &tran->codepoint_length); type_out->codepoint_length = 0;
if ((tran->codepoint >= ' ' && tran->codepoint <= 255 && tran->codepoint != 127) || tran->codepoint == '\t' || tran->codepoint == '\n' || tran->codepoint == '\r'){ type_out->do_codepoint = false;
tran->do_codepoint = true; type_out->do_numbers = false;
if (desc.prelim_emit_type == BufferModelUnit_Codepoint){
type_out->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &type_out->codepoint_length);
if ((type_out->codepoint >= ' ' && type_out->codepoint <= 255 && type_out->codepoint != 127) || type_out->codepoint == '\t' || type_out->codepoint == '\n' || type_out->codepoint == '\r'){
type_out->do_codepoint = true;
} }
else{ else{
tran->do_numbers = true; type_out->do_numbers = true;
} }
} }
else if (tran->emit_type == BufferModelUnit_Numbers){ else if (desc.prelim_emit_type == BufferModelUnit_Numbers){
tran->do_numbers = true; type_out->do_numbers = true;
} }
Assert((tran->do_codepoint + tran->do_numbers) <= 1); Assert((type_out->do_codepoint + type_out->do_numbers) <= 1);
} }
internal void internal void
translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_Translating_Emits *emits_out){ translating_generate_emits(Translation_State *tran, Translation_Emit_Type emit_types, u8 ch, u32 i, Translation_Emits *emits_out){
emits_out->step_count = 0; emits_out->step_count = 0;
if (tran->do_codepoint){ if (emit_types.do_codepoint){
emits_out->steps[0].type = 1; emits_out->steps[0].type = 1;
emits_out->steps[0].value = tran->codepoint; emits_out->steps[0].value = emit_types.codepoint;
emits_out->steps[0].i = tran->fill_start_i; emits_out->steps[0].i = tran->fill_start_i;
emits_out->steps[0].byte_length = tran->codepoint_length; emits_out->steps[0].byte_length = emit_types.codepoint_length;
emits_out->step_count = 1; emits_out->step_count = 1;
} }
else if (tran->do_numbers){ else if (emit_types.do_numbers){
for (u32 j = 0; j < tran->fill_i; ++j){ for (u32 j = 0; j < tran->fill_i; ++j){
emits_out->steps[j].type = 0; emits_out->steps[j].type = 0;
emits_out->steps[j].value = tran->fill_buffer[j]; emits_out->steps[j].value = tran->fill_buffer[j];
@ -480,22 +493,22 @@ translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_
emits_out->step_count = tran->fill_i; emits_out->step_count = tran->fill_i;
} }
if (tran->do_codepoint || tran->do_numbers){ if (emit_types.do_codepoint || emit_types.do_numbers){
tran->fill_start_i = 0; tran->fill_start_i = 0;
tran->fill_i = 0; tran->fill_i = 0;
tran->fill_expected = 0; tran->fill_expected = 0;
} }
if (tran->rebuffer_current){ if (emit_types.rebuffer_current){
Assert(tran->do_codepoint || tran->do_numbers); Assert(emit_types.do_codepoint || emit_types.do_numbers);
tran->fill_buffer[0] = ch; tran->fill_buffer[0] = ch;
tran->fill_start_i = i; tran->fill_start_i = i;
tran->fill_i = 1; tran->fill_i = 1;
tran->fill_expected = tran->byte_class; tran->fill_expected = emit_types.byte_class;
} }
else if (tran->emit_current_as_cp){ else if (emit_types.emit_current_as_cp){
Assert(tran->do_codepoint || tran->do_numbers); Assert(emit_types.do_codepoint || emit_types.do_numbers);
emits_out->steps[emits_out->step_count].type = 1; emits_out->steps[emits_out->step_count].type = 1;
emits_out->steps[emits_out->step_count].value = ch; emits_out->steps[emits_out->step_count].value = ch;
@ -506,10 +519,12 @@ translating_generate_emits(Buffer_Translating_State *tran, u8 ch, u32 i, Buffer_
} }
internal void internal void
translating_fully_process_byte(Buffer_Translating_State *tran, u8 ch, u32 i, u32 size, Buffer_Translating_Emits *emits_out){ translating_fully_process_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Emits *emits_out){
translating_consume_byte(tran, ch, i, size); Translation_Byte_Description description = {0};
translating_select_emit_type(tran); translating_consume_byte(tran, ch, i, size, &description);
translating_generate_emits(tran, ch, i, emits_out); Translation_Emit_Type emit_type = {0};
translating_select_emit_type(tran, description, &emit_type);
translating_generate_emits(tran, emit_type, ch, i, emits_out);
} }
internal void internal void
@ -523,7 +538,6 @@ translation_step_read(Buffer_Model_Step step, Buffer_Model_Behavior *behavior_ou
{ {
behavior_out->do_newline = true; behavior_out->do_newline = true;
}break; }break;
default: default:
{ {
behavior_out->do_codepoint_advance = true; behavior_out->do_codepoint_advance = true;
@ -956,8 +970,8 @@ buffer_measure_character_starts(Gap_Buffer *buffer, i32 *character_starts, i32 m
skipping_whitespace = 1; skipping_whitespace = 1;
} }
Buffer_Translating_State tran = {0}; Translation_State tran = {0};
Buffer_Translating_Emits emits = {0}; Translation_Emits emits = {0};
stream.use_termination_character = 1; stream.use_termination_character = 1;
stream.terminator = '\n'; stream.terminator = '\n';
@ -1037,8 +1051,8 @@ struct Buffer_Measure_Wrap_State{
b32 did_wrap; b32 did_wrap;
b32 first_of_the_line; b32 first_of_the_line;
Buffer_Translating_State tran; Translation_State tran;
Buffer_Translating_Emits emits; Translation_Emits emits;
u32 J; u32 J;
Buffer_Model_Step step; Buffer_Model_Step step;
Buffer_Model_Behavior behavior; Buffer_Model_Behavior behavior;
@ -1297,8 +1311,8 @@ buffer_remeasure_character_starts(Gap_Buffer *buffer, i32 line_start, i32 line_e
} }
// Translation // Translation
Buffer_Translating_State tran = {0}; Translation_State tran = {0};
Buffer_Translating_Emits emits = {0}; Translation_Emits emits = {0};
stream.use_termination_character = 1; stream.use_termination_character = 1;
stream.terminator = '\n'; stream.terminator = '\n';
@ -1596,8 +1610,8 @@ struct Buffer_Cursor_Seek_State{
i32 font_height; i32 font_height;
Buffer_Translating_State tran; Translation_State tran;
Buffer_Translating_Emits emits; Translation_Emits emits;
u32 J; u32 J;
Buffer_Model_Step step; Buffer_Model_Step step;
Buffer_Model_Behavior behavior; Buffer_Model_Behavior behavior;
@ -2089,8 +2103,8 @@ struct Buffer_Render_State{
b32 first_of_the_line; b32 first_of_the_line;
i32 wrap_unit_end; i32 wrap_unit_end;
Buffer_Translating_State tran; Translation_State tran;
Buffer_Translating_Emits emits; Translation_Emits emits;
u32 J; u32 J;
Buffer_Model_Step step; Buffer_Model_Step step;
Buffer_Model_Behavior behavior; Buffer_Model_Behavior behavior;

225
file/4coder_translation.cpp Normal file
View File

@ -0,0 +1,225 @@
/*
* Mr. 4th Dimention - Allen Webster
*
* 11.03.2017
*
* Translation system for turning byte streams into a stream of buffer model steps.
*
*/
// TOP
struct Translation_State{
u8 fill_buffer[4];
u32 fill_start_i;
u32 fill_i;
u32 fill_expected;
};
global_const Translation_State null_buffer_translating_state = {0};
struct Translation_Byte_Description{
u32 byte_class;
b32 rebuffer_current;
b32 emit_current_as_cp;
u32 prelim_emit_type;
};
struct Translation_Emit_Type{
u32 byte_class;
b32 rebuffer_current;
b32 emit_current_as_cp;
u32 codepoint;
u32 codepoint_length;
b32 do_codepoint;
b32 do_numbers;
};
struct Translation_Emits{
Buffer_Model_Step steps[5];
Buffer_Model_Step step_current;
u32 step_count;
};
internal void
translating_consume_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Byte_Description *desc_out){
desc_out->byte_class = 0;
if ((ch >= ' ' && ch < 0x7F) || ch == '\t' || ch == '\n' || ch == '\r'){
desc_out->byte_class = 1;
}
else if (ch < 0xC0){
desc_out->byte_class = 1000;
}
else if (ch < 0xE0){
desc_out->byte_class = 2;
}
else if (ch < 0xF0){
desc_out->byte_class = 3;
}
else{
desc_out->byte_class = 4;
}
desc_out->prelim_emit_type = BufferModelUnit_None;
desc_out->rebuffer_current = false;
desc_out->emit_current_as_cp = false;
if (tran->fill_expected == 0){
tran->fill_buffer[0] = ch;
tran->fill_start_i = i;
tran->fill_i = 1;
if (desc_out->byte_class == 1){
desc_out->prelim_emit_type = BufferModelUnit_Codepoint;
}
else if (desc_out->byte_class == 0 || desc_out->byte_class == 1000){
desc_out->prelim_emit_type = BufferModelUnit_Numbers;
}
else{
tran->fill_expected = desc_out->byte_class;
}
}
else{
if (desc_out->byte_class == 1000){
tran->fill_buffer[tran->fill_i] = ch;
++tran->fill_i;
if (tran->fill_i == tran->fill_expected){
desc_out->prelim_emit_type = BufferModelUnit_Codepoint;
}
}
else{
if (desc_out->byte_class >= 2 && desc_out->byte_class <= 4){
desc_out->rebuffer_current = true;
}
else if (desc_out->byte_class == 1){
desc_out->emit_current_as_cp = true;
}
else{
tran->fill_buffer[tran->fill_i] = ch;
++tran->fill_i;
}
desc_out->prelim_emit_type = BufferModelUnit_Numbers;
}
}
if (desc_out->prelim_emit_type == BufferModelUnit_None && i+1 == size){
desc_out->prelim_emit_type = BufferModelUnit_Numbers;
}
}
internal void
translating_select_emit_type(Translation_State *tran, Translation_Byte_Description desc, Translation_Emit_Type *type_out){
type_out->byte_class = desc.byte_class;
type_out->rebuffer_current = desc.rebuffer_current;
type_out->emit_current_as_cp = desc.emit_current_as_cp;
type_out->codepoint = 0;
type_out->codepoint_length = 0;
type_out->do_codepoint = false;
type_out->do_numbers = false;
if (desc.prelim_emit_type == BufferModelUnit_Codepoint){
type_out->codepoint = utf8_to_u32_length_unchecked(tran->fill_buffer, &type_out->codepoint_length);
if ((type_out->codepoint >= ' ' && type_out->codepoint <= 255 && type_out->codepoint != 127) || type_out->codepoint == '\t' || type_out->codepoint == '\n' || type_out->codepoint == '\r'){
type_out->do_codepoint = true;
}
else{
type_out->do_numbers = true;
}
}
else if (desc.prelim_emit_type == BufferModelUnit_Numbers){
type_out->do_numbers = true;
}
Assert((type_out->do_codepoint + type_out->do_numbers) <= 1);
}
internal void
translating_generate_emits(Translation_State *tran, Translation_Emit_Type emit_types, u8 ch, u32 i, Translation_Emits *emits_out){
emits_out->step_count = 0;
if (emit_types.do_codepoint){
emits_out->steps[0].type = 1;
emits_out->steps[0].value = emit_types.codepoint;
emits_out->steps[0].i = tran->fill_start_i;
emits_out->steps[0].byte_length = emit_types.codepoint_length;
emits_out->step_count = 1;
}
else if (emit_types.do_numbers){
for (u32 j = 0; j < tran->fill_i; ++j){
emits_out->steps[j].type = 0;
emits_out->steps[j].value = tran->fill_buffer[j];
emits_out->steps[j].i = tran->fill_start_i + j;
emits_out->steps[j].byte_length = 1;
}
emits_out->step_count = tran->fill_i;
}
if (emit_types.do_codepoint || emit_types.do_numbers){
tran->fill_start_i = 0;
tran->fill_i = 0;
tran->fill_expected = 0;
}
if (emit_types.rebuffer_current){
Assert(emit_types.do_codepoint || emit_types.do_numbers);
tran->fill_buffer[0] = ch;
tran->fill_start_i = i;
tran->fill_i = 1;
tran->fill_expected = emit_types.byte_class;
}
else if (emit_types.emit_current_as_cp){
Assert(emit_types.do_codepoint || emit_types.do_numbers);
emits_out->steps[emits_out->step_count].type = 1;
emits_out->steps[emits_out->step_count].value = ch;
emits_out->steps[emits_out->step_count].i = i;
emits_out->steps[emits_out->step_count].byte_length = 1;
++emits_out->step_count;
}
}
internal void
translating_fully_process_byte(Translation_State *tran, u8 ch, u32 i, u32 size, Translation_Emits *emits_out){
Translation_Byte_Description description = {0};
translating_consume_byte(tran, ch, i, size, &description);
Translation_Emit_Type emit_type = {0};
translating_select_emit_type(tran, description, &emit_type);
translating_generate_emits(tran, emit_type, ch, i, emits_out);
}
internal void
translation_step_read(Buffer_Model_Step step, Buffer_Model_Behavior *behavior_out){
behavior_out->do_newline = false;
behavior_out->do_codepoint_advance = false;
behavior_out->do_number_advance = false;
if (step.type == 1){
switch (step.value){
case '\n':
{
behavior_out->do_newline = true;
}break;
default:
{
behavior_out->do_codepoint_advance = true;
}break;
}
}
else{
behavior_out->do_number_advance = true;
}
}
#define TRANSLATION_DECL_OUTPUT(_j,_emit) u32 _j = 0; _j < (_emit).step_count; ++_j
#define TRANSLATION_DECL_GET_STEP(_step,_behav,_j,_emit) \
Buffer_Model_Step _step = _emit.steps[_j]; Buffer_Model_Behavior _behav; \
translation_step_read(_step, &_behav)
#define TRANSLATION_OUTPUT(_j,_emit) _j = 0; _j < (_emit).step_count; ++_j
#define TRANSLATION_GET_STEP(_step,_behav,_j,_emit)\
(_step) = _emit.steps[_j]; translation_step_read((_step), &(_behav))
// BOTTOM