From c9af44f0e7f3737396a8b50f2366f84025872c40 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Sat, 16 Jul 2016 22:24:13 -0400 Subject: [PATCH] upgrade to the new lexer complete --- 4coder_default_bindings.cpp | 4 +- 4coder_default_include.cpp | 416 ++++++++-------- 4coder_jump_parsing.cpp | 34 +- 4cpp_lexer.h | 4 +- 4ed_app_target.cpp | 7 +- 4ed_file_view.cpp | 11 +- build.bat | 2 +- test/4cpp_new_lexer.h | 962 ++++++++++++++++++------------------ test/experiment.cpp | 67 ++- 9 files changed, 780 insertions(+), 727 deletions(-) diff --git a/4coder_default_bindings.cpp b/4coder_default_bindings.cpp index f8ee8e1a..69647741 100644 --- a/4coder_default_bindings.cpp +++ b/4coder_default_bindings.cpp @@ -378,8 +378,8 @@ default_keys(Bind_Helper *context){ bind(context, 's', MDFR_CTRL, cmdid_save); bind(context, 'u', MDFR_CTRL, to_uppercase); bind(context, 'U', MDFR_CTRL, rewrite_as_single_caps); - bind(context, 'v', MDFR_CTRL, paste); - bind(context, 'V', MDFR_CTRL, paste_next); + bind(context, 'v', MDFR_CTRL, paste_and_indent); + bind(context, 'V', MDFR_CTRL, paste_next_and_indent); bind(context, 'x', MDFR_CTRL, cut); bind(context, 'y', MDFR_CTRL, cmdid_redo); bind(context, 'z', MDFR_CTRL, cmdid_undo); diff --git a/4coder_default_include.cpp b/4coder_default_include.cpp index e574cf10..6c3e2843 100644 --- a/4coder_default_include.cpp +++ b/4coder_default_include.cpp @@ -15,6 +15,10 @@ # define DEFAULT_INDENT_FLAGS 0 #endif +#ifndef DEF_TAB_WIDTH +# define DEF_TAB_WIDTH 4 +#endif + // // Memory @@ -694,6 +698,205 @@ CUSTOM_COMMAND_SIG(move_right){ true); } +// +// Auto Indenting and Whitespace +// + +static int +seek_line_end(Application_Links *app, Buffer_Summary *buffer, int pos){ + char chunk[1024]; + int chunk_size = sizeof(chunk); + Stream_Chunk stream = {0}; + + int still_looping; + char at_pos; + + if (init_stream_chunk(&stream, app, buffer, pos, chunk, chunk_size)){ + still_looping = 1; + do{ + for (; pos < stream.end; ++pos){ + at_pos = stream.data[pos]; + if (at_pos == '\n'){ + goto double_break; + } + } + still_looping = forward_stream_chunk(&stream); + }while(still_looping); + double_break:; + + if (pos > buffer->size){ + pos = buffer->size; + } + } + + return(pos); +} + +static int +seek_line_beginning(Application_Links *app, Buffer_Summary *buffer, int pos){ + char chunk[1024]; + int chunk_size = sizeof(chunk); + Stream_Chunk stream = {0}; + + int still_looping; + char at_pos; + + --pos; + if (init_stream_chunk(&stream, app, buffer, pos, chunk, chunk_size)){ + still_looping = 1; + do{ + for (; pos >= stream.start; --pos){ + at_pos = stream.data[pos]; + if (at_pos == '\n'){ + goto double_break; + } + } + still_looping = backward_stream_chunk(&stream); + }while(still_looping); + double_break:; + + if (pos != 0){ + ++pos; + } + if (pos < 0){ + pos = 0; + } + } + + return(pos); +} + +static void +move_past_lead_whitespace(Application_Links *app, View_Summary *view, Buffer_Summary *buffer){ + refresh_view(app, view); + + int new_pos = seek_line_beginning(app, buffer, view->cursor.pos); + char space[1024]; + Stream_Chunk chunk = {0}; + int still_looping = false; + + int i = new_pos; + if (init_stream_chunk(&chunk, app, buffer, i, space, sizeof(space))){ + do{ + for (; i < chunk.end; ++i){ + char at_pos = chunk.data[i]; + if (at_pos == '\n' || !char_is_whitespace(at_pos)){ + goto break2; + } + } + still_looping = forward_stream_chunk(&chunk); + }while(still_looping); + break2:; + + if (i > view->cursor.pos){ + app->view_set_cursor(app, view, seek_pos(i), true); + } + } +} + +CUSTOM_COMMAND_SIG(auto_tab_line_at_cursor){ + unsigned int access = AccessOpen; + View_Summary view = app->get_active_view(app, access); + Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); + + app->buffer_auto_indent(app, &buffer, + view.cursor.pos, view.cursor.pos, + DEF_TAB_WIDTH, + DEFAULT_INDENT_FLAGS); + move_past_lead_whitespace(app, &view, &buffer); +} + +CUSTOM_COMMAND_SIG(auto_tab_whole_file){ + unsigned int access = AccessOpen; + View_Summary view = app->get_active_view(app, access); + Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); + + app->buffer_auto_indent(app, &buffer, + 0, buffer.size, + DEF_TAB_WIDTH, + DEFAULT_INDENT_FLAGS); +} + +CUSTOM_COMMAND_SIG(auto_tab_range){ + unsigned int access = AccessOpen; + View_Summary view = app->get_active_view(app, access); + Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); + Range range = get_range(&view); + + app->buffer_auto_indent(app, &buffer, + range.min, range.max, + DEF_TAB_WIDTH, + DEFAULT_INDENT_FLAGS); + move_past_lead_whitespace(app, &view, &buffer); +} + +CUSTOM_COMMAND_SIG(write_and_auto_tab){ + exec_command(app, write_character); + exec_command(app, auto_tab_line_at_cursor); +} + +CUSTOM_COMMAND_SIG(clean_all_lines){ + // TODO(allen): This command always iterates accross the entire + // buffer, so streaming it is actually the wrong call. Rewrite this + // to minimize calls to app->buffer_read_range. + View_Summary view = app->get_active_view(app, AccessOpen); + Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, AccessOpen); + + int line_count = buffer.line_count; + int edit_max = line_count; + + if (edit_max*sizeof(Buffer_Edit) < app->memory_size){ + Buffer_Edit *edits = (Buffer_Edit*)app->memory; + + char data[1024]; + Stream_Chunk chunk = {0}; + + int i = 0; + if (init_stream_chunk(&chunk, app, &buffer, + i, data, sizeof(data))){ + Buffer_Edit *edit = edits; + + int buffer_size = buffer.size; + int still_looping = true; + int last_hard = buffer_size; + do{ + for (; i < chunk.end; ++i){ + char at_pos = chunk.data[i]; + if (at_pos == '\n'){ + if (last_hard+1 < i){ + edit->str_start = 0; + edit->len = 0; + edit->start = last_hard+1; + edit->end = i; + ++edit; + } + last_hard = buffer_size; + } + else if (char_is_whitespace(at_pos)){ + // NOTE(allen): do nothing + } + else{ + last_hard = i; + } + } + + still_looping = forward_stream_chunk(&chunk); + }while(still_looping); + + if (last_hard+1 < buffer_size){ + edit->str_start = 0; + edit->len = 0; + edit->start = last_hard+1; + edit->end = buffer_size; + ++edit; + } + + int edit_count = (int)(edit - edits); + app->buffer_batch_edit(app, &buffer, 0, 0, edits, edit_count, BatchEdit_PreserveTokens); + } + } +} + // // Clipboard // @@ -843,6 +1046,16 @@ CUSTOM_COMMAND_SIG(paste_next){ } } +CUSTOM_COMMAND_SIG(paste_and_indent){ + exec_command(app, paste); + exec_command(app, auto_tab_range); +} + +CUSTOM_COMMAND_SIG(paste_next_and_indent){ + exec_command(app, paste_next); + exec_command(app, auto_tab_range); +} + // // Fancy Editing // @@ -1032,70 +1245,6 @@ CUSTOM_COMMAND_SIG(seek_whitespace_down){ true); } -static int -seek_line_end(Application_Links *app, Buffer_Summary *buffer, int pos){ - char chunk[1024]; - int chunk_size = sizeof(chunk); - Stream_Chunk stream = {0}; - - int still_looping; - char at_pos; - - if (init_stream_chunk(&stream, app, buffer, pos, chunk, chunk_size)){ - still_looping = 1; - do{ - for (; pos < stream.end; ++pos){ - at_pos = stream.data[pos]; - if (at_pos == '\n'){ - goto double_break; - } - } - still_looping = forward_stream_chunk(&stream); - }while(still_looping); - double_break:; - - if (pos > buffer->size){ - pos = buffer->size; - } - } - - return(pos); -} - -static int -seek_line_beginning(Application_Links *app, Buffer_Summary *buffer, int pos){ - char chunk[1024]; - int chunk_size = sizeof(chunk); - Stream_Chunk stream = {0}; - - int still_looping; - char at_pos; - - --pos; - if (init_stream_chunk(&stream, app, buffer, pos, chunk, chunk_size)){ - still_looping = 1; - do{ - for (; pos >= stream.start; --pos){ - at_pos = stream.data[pos]; - if (at_pos == '\n'){ - goto double_break; - } - } - still_looping = backward_stream_chunk(&stream); - }while(still_looping); - double_break:; - - if (pos != 0){ - ++pos; - } - if (pos < 0){ - pos = 0; - } - } - - return(pos); -} - CUSTOM_COMMAND_SIG(seek_end_of_line){ unsigned int access = AccessProtected; View_Summary view = app->get_active_view(app, access); @@ -1163,38 +1312,6 @@ CUSTOM_COMMAND_SIG(write_increment){ write_string(app, make_lit_string("++")); } -#ifndef DEF_TAB_WIDTH -# define DEF_TAB_WIDTH 4 -#endif - -static void -move_past_lead_whitespace(Application_Links *app, View_Summary *view, Buffer_Summary *buffer){ - refresh_view(app, view); - - int new_pos = seek_line_beginning(app, buffer, view->cursor.pos); - char space[1024]; - Stream_Chunk chunk = {0}; - int still_looping = false; - - int i = new_pos; - if (init_stream_chunk(&chunk, app, buffer, i, space, sizeof(space))){ - do{ - for (; i < chunk.end; ++i){ - char at_pos = chunk.data[i]; - if (at_pos == '\n' || !char_is_whitespace(at_pos)){ - goto break2; - } - } - still_looping = forward_stream_chunk(&chunk); - }while(still_looping); - break2:; - - if (i > view->cursor.pos){ - app->view_set_cursor(app, view, seek_pos(i), true); - } - } -} - static void long_braces(Application_Links *app, char *text, int size){ unsigned int access = AccessOpen; @@ -1873,113 +1990,6 @@ CUSTOM_COMMAND_SIG(execute_previous_cli){ } } -// -// Auto Indenting and Whitespace -// - -CUSTOM_COMMAND_SIG(auto_tab_line_at_cursor){ - unsigned int access = AccessOpen; - View_Summary view = app->get_active_view(app, access); - Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); - - app->buffer_auto_indent(app, &buffer, - view.cursor.pos, view.cursor.pos, - DEF_TAB_WIDTH, - DEFAULT_INDENT_FLAGS); - move_past_lead_whitespace(app, &view, &buffer); -} - -CUSTOM_COMMAND_SIG(auto_tab_whole_file){ - unsigned int access = AccessOpen; - View_Summary view = app->get_active_view(app, access); - Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); - - app->buffer_auto_indent(app, &buffer, - 0, buffer.size, - DEF_TAB_WIDTH, - DEFAULT_INDENT_FLAGS); -} - -CUSTOM_COMMAND_SIG(auto_tab_range){ - unsigned int access = AccessOpen; - View_Summary view = app->get_active_view(app, access); - Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, access); - Range range = get_range(&view); - - app->buffer_auto_indent(app, &buffer, - range.min, range.max, - DEF_TAB_WIDTH, - DEFAULT_INDENT_FLAGS); - move_past_lead_whitespace(app, &view, &buffer); -} - -CUSTOM_COMMAND_SIG(write_and_auto_tab){ - exec_command(app, write_character); - exec_command(app, auto_tab_line_at_cursor); -} - -CUSTOM_COMMAND_SIG(clean_all_lines){ - // TODO(allen): This command always iterates accross the entire - // buffer, so streaming it is actually the wrong call. Rewrite this - // to minimize calls to app->buffer_read_range. - View_Summary view = app->get_active_view(app, AccessOpen); - Buffer_Summary buffer = app->get_buffer(app, view.buffer_id, AccessOpen); - - int line_count = buffer.line_count; - int edit_max = line_count; - - if (edit_max*sizeof(Buffer_Edit) < app->memory_size){ - Buffer_Edit *edits = (Buffer_Edit*)app->memory; - - char data[1024]; - Stream_Chunk chunk = {0}; - - int i = 0; - if (init_stream_chunk(&chunk, app, &buffer, - i, data, sizeof(data))){ - Buffer_Edit *edit = edits; - - int buffer_size = buffer.size; - int still_looping = true; - int last_hard = buffer_size; - do{ - for (; i < chunk.end; ++i){ - char at_pos = chunk.data[i]; - if (at_pos == '\n'){ - if (last_hard+1 < i){ - edit->str_start = 0; - edit->len = 0; - edit->start = last_hard+1; - edit->end = i; - ++edit; - } - last_hard = buffer_size; - } - else if (char_is_whitespace(at_pos)){ - // NOTE(allen): do nothing - } - else{ - last_hard = i; - } - } - - still_looping = forward_stream_chunk(&chunk); - }while(still_looping); - - if (last_hard+1 < buffer_size){ - edit->str_start = 0; - edit->len = 0; - edit->start = last_hard+1; - edit->end = buffer_size; - ++edit; - } - - int edit_count = (int)(edit - edits); - app->buffer_batch_edit(app, &buffer, 0, 0, edits, edit_count, BatchEdit_PreserveTokens); - } - } -} - // // Default Building Stuff // diff --git a/4coder_jump_parsing.cpp b/4coder_jump_parsing.cpp index 376e6e71..7bf3e502 100644 --- a/4coder_jump_parsing.cpp +++ b/4coder_jump_parsing.cpp @@ -14,15 +14,6 @@ jump_to_location(Application_Links *app, View_Summary *view, Jump_Location *l){ app->view_set_cursor(app, view, seek_line_char(l->line, l->column), true); } -static int -gcc_style_verify(String line, int colon_pos){ - int result = false; - if (colon_pos < line.size){ - result = true; - } - return(result); -} - static int ms_style_verify(String line, int paren_pos){ int result = false; @@ -43,13 +34,16 @@ parse_error(String line, Jump_Location *location, int skip_sub_errors, int *colon_char){ int result = false; + String original_line = line; + line = skip_chop_whitespace(line); + int colon_pos = find(line, 0, ')'); if (ms_style_verify(line, colon_pos)){ colon_pos = find(line, colon_pos, ':'); if (colon_pos < line.size){ String location_str = substr(line, 0, colon_pos); - if (!(skip_sub_errors && line.str[0] == ' ')){ + if (!(skip_sub_errors && original_line.str[0] == ' ')){ location_str = skip_chop_whitespace(location_str); int paren_pos = find(location_str, 0, '('); @@ -92,18 +86,16 @@ parse_error(String line, Jump_Location *location, else{ int colon_pos1 = find(line, 0, ':'); - if (colon_pos1 == 1){ - if (line.size > colon_pos1+1){ - if (char_is_slash(line.str[colon_pos1+1])){ - colon_pos1 = find(line, colon_pos1+1, ':'); - } + if (line.size > colon_pos1+1){ + if (char_is_slash(line.str[colon_pos1+1])){ + colon_pos1 = find(line, colon_pos1+1, ':'); } } int colon_pos2 = find(line, colon_pos1+1, ':'); int colon_pos3 = find(line, colon_pos2+1, ':'); - if (gcc_style_verify(line, colon_pos3)){ + if (colon_pos3 < line.size){ String filename = substr(line, 0, colon_pos1); String line_number = substr(line, colon_pos1+1, colon_pos2 - colon_pos1 - 1); String column_number = substr(line, colon_pos2+1, colon_pos3 - colon_pos2 - 1); @@ -119,8 +111,14 @@ parse_error(String line, Jump_Location *location, } } else{ - int colon_pos1 = find(line, 0, ':'); - int colon_pos2 = find(line, colon_pos1+1, ':'); + colon_pos1 = find(line, 0, ':'); + if (line.size > colon_pos1+1){ + if (char_is_slash(line.str[colon_pos1+1])){ + colon_pos1 = find(line, colon_pos1+1, ':'); + } + } + + colon_pos2 = find(line, colon_pos1+1, ':'); if (colon_pos2 < line.size){ String filename = substr(line, 0, colon_pos1); diff --git a/4cpp_lexer.h b/4cpp_lexer.h index 486c587f..3c9b3852 100644 --- a/4cpp_lexer.h +++ b/4cpp_lexer.h @@ -862,18 +862,18 @@ FCPP_LINK Cpp_Read_Result cpp_read_block_comment(char *data, int size, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; - + _Assert(data[pos] == '/' && data[pos + 1] == '*'); pos += 2; while (pos < size){ if (data[pos] == '*' && pos + 1 < size && data[pos+1] == '/'){ + pos += 2; break; } ++pos; } - pos += 2; result.token.size = pos - result.token.start; result.token.type = CPP_TOKEN_COMMENT; result.pos = pos; diff --git a/4ed_app_target.cpp b/4ed_app_target.cpp index ee787c2e..844fc632 100644 --- a/4ed_app_target.cpp +++ b/4ed_app_target.cpp @@ -27,9 +27,14 @@ #include "4coder_table.cpp" +#define USE_NEW_LEXER 1 + +#if USE_NEW_LEXER +#include "test/4cpp_new_lexer.h" +#else #define FCPP_LEXER_IMPLEMENTATION -//#include "test/4cpp_new_lexer.h" #include "4cpp_lexer.h" +#endif #include "4ed_template.cpp" diff --git a/4ed_file_view.cpp b/4ed_file_view.cpp index d308f94e..d20cc1df 100644 --- a/4ed_file_view.cpp +++ b/4ed_file_view.cpp @@ -1183,7 +1183,7 @@ Job_Callback_Sig(job_full_lex){ i32 buffer_size = file->state.buffer.size; buffer_size = (buffer_size + 3)&(~3); -#if 0 +#if USE_NEW_LEXER while (memory->size < buffer_size*2){ system->grow_thread_memory(memory); } @@ -1202,7 +1202,7 @@ Job_Callback_Sig(job_full_lex){ do{ i32 result = cpp_lex_size_nonalloc(&lex, - cpp_file.data, cpp_file.size, cpp_file.size, + text_data, text_size, text_size, &tokens, 2048); switch (result){ @@ -1376,9 +1376,12 @@ file_relex_parallel(System_Functions *system, relex_space.max_count = state.space_request; relex_space.tokens = push_array(part, Cpp_Token, relex_space.max_count); - // char *spare = push_array(part, char, cpp_file.size); - // if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){ +#if USE_NEW_LEXER + char *spare = push_array(part, char, size); + if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end, spare)){ +#else if (cpp_relex_nonalloc_main(&state, &relex_space, &relex_end)){ +#endif inline_lex = 0; } else{ diff --git a/build.bat b/build.bat index d4d32294..c266190f 100644 --- a/build.bat +++ b/build.bat @@ -1,6 +1,6 @@ @echo off -REM "build_exp.bat" /Zi +REM "build_exp.bat" /O2 "build_all.bat" /DFRED_SUPER /DFRED_INTERNAL /Zi REM "build_all.bat" /DFRED_INTERNAL /Zi REM "build_all.bat" /O2 /Zi diff --git a/test/4cpp_new_lexer.h b/test/4cpp_new_lexer.h index d5d2f8e0..2eaf6eb0 100644 --- a/test/4cpp_new_lexer.h +++ b/test/4cpp_new_lexer.h @@ -220,7 +220,6 @@ cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amoun enum Pos_Update_Rule{ PUR_none, PUR_back_one, - PUR_unget_whitespace, }; lexer_link Lex_PP_State @@ -259,7 +258,7 @@ cpp_pp_directive_to_state(Cpp_Token_Type type){ case CPP_PP_ERROR: result = LSPP_error; break; - + case CPP_PP_UNKNOWN: case CPP_PP_ELSE: case CPP_PP_ENDIF: @@ -385,6 +384,7 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, DrCase(4); DrCase(5); DrCase(6); + DrCase(7); } for (;;){ @@ -410,6 +410,8 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, S.pp_state -= LSPP_count; } + S.token.state_flags = S.pp_state; + S.token_start = S.pos; S.tb_pos = 0; S.fsm = zero_lex_fsm(); @@ -438,522 +440,530 @@ cpp_lex_nonalloc(Lex_Data *S_ptr, Assert(S.fsm.emit_token == 1); - if (c != 0){ - if (S.fsm.state >= LS_count) S.fsm.state -= LS_count; - pos_update_rule = PUR_none; - if (S.pp_state == LSPP_include){ - switch (S.fsm.state){ - case LSINC_default:break; - - case LSINC_quotes: - case LSINC_pointy: - S.token.type = CPP_TOKEN_INCLUDE_FILE; - S.token.flags = 0; - break; - - case LSINC_junk: - S.token.type = CPP_TOKEN_JUNK; - S.token.flags = 0; - break; - } + if (c == 0){ + S.completed = 1; + } + + if (S.fsm.state >= LS_count) S.fsm.state -= LS_count; + pos_update_rule = PUR_none; + if (S.pp_state == LSPP_include){ + switch (S.fsm.state){ + case LSINC_default:break; + + case LSINC_quotes: + case LSINC_pointy: + S.token.type = CPP_TOKEN_INCLUDE_FILE; + S.token.flags = 0; + break; + + case LSINC_junk: + S.token.type = CPP_TOKEN_JUNK; + S.token.flags = 0; + break; } - else{ - switch (S.fsm.state){ - case LS_default: - switch (c){ + } + else{ + switch (S.fsm.state){ + case LS_default: + switch (c){ + case 0: S.fsm.emit_token = 0; break; + #define OperCase(op,t) case op: S.token.type = t; break; - OperCase('{', CPP_TOKEN_BRACE_OPEN); - OperCase('}', CPP_TOKEN_BRACE_CLOSE); - - OperCase('[', CPP_TOKEN_BRACKET_OPEN); - OperCase(']', CPP_TOKEN_BRACKET_CLOSE); - - OperCase('(', CPP_TOKEN_PARENTHESE_OPEN); - OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE); - - OperCase('~', CPP_TOKEN_TILDE); - OperCase(',', CPP_TOKEN_COMMA); - OperCase(';', CPP_TOKEN_SEMICOLON); - OperCase('?', CPP_TOKEN_TERNARY_QMARK); - - OperCase('@', CPP_TOKEN_JUNK); - OperCase('$', CPP_TOKEN_JUNK); + OperCase('{', CPP_TOKEN_BRACE_OPEN); + OperCase('}', CPP_TOKEN_BRACE_CLOSE); + + OperCase('[', CPP_TOKEN_BRACKET_OPEN); + OperCase(']', CPP_TOKEN_BRACKET_CLOSE); + + OperCase('(', CPP_TOKEN_PARENTHESE_OPEN); + OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE); + + OperCase('~', CPP_TOKEN_TILDE); + OperCase(',', CPP_TOKEN_COMMA); + OperCase(';', CPP_TOKEN_SEMICOLON); + OperCase('?', CPP_TOKEN_TERNARY_QMARK); + + OperCase('@', CPP_TOKEN_JUNK); + OperCase('$', CPP_TOKEN_JUNK); #undef OperCase - - case '\\': - if (S.pp_state == LSPP_default){ - S.token.type = CPP_TOKEN_JUNK; - } - else{ - S.pos_overide = S.pos; - S.wfsm.white_done = 0; - for (;;){ - for (; S.wfsm.white_done == 0 && S.pos < end_pos;){ - c = chunk[S.pos++]; - if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1; - } - - if (S.wfsm.white_done == 0){ - S.chunk_pos += size; - DrYield(1, LexNeedChunk); - } - else break; - } - - if (c == '\n'){ - S.fsm.emit_token = 0; - S.pos_overide = 0; - } - else{ - S.token.type = CPP_TOKEN_JUNK; - } - } - break; - } - if (c != '@' && c != '$' && c != '\\'){ - S.token.flags = CPP_TFLAG_IS_OPERATOR; - } - break; - case LS_identifier: - { - --S.pos; - - int word_size = S.pos - S.token_start; - - if (S.pp_state == LSPP_body_if){ - if (match(make_string(S.tb, word_size), make_lit_string("defined"))){ - S.token.type = CPP_TOKEN_DEFINED; - S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; - break; - } - } - - Sub_Match_List_Result sub_match; - sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size); - - if (sub_match.index != -1){ - S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; - S.token.flags = CPP_TFLAG_IS_KEYWORD; - } - else{ - sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size); - - if (sub_match.index != -1){ - String_And_Flag data = keywords.data[sub_match.index]; - S.token.type = (Cpp_Token_Type)data.flags; - S.token.flags = CPP_TFLAG_IS_KEYWORD; - } - else{ - S.token.type = CPP_TOKEN_IDENTIFIER; - S.token.flags = 0; - } - } - }break; - - case LS_pound: - S.token.flags = 0; - switch (c){ - case '#': S.token.type = CPP_PP_CONCAT; break; - default: - S.token.type = CPP_PP_STRINGIFY; - pos_update_rule = PUR_back_one; - break; + case '\\': + if (S.pp_state == LSPP_default){ + S.token.type = CPP_TOKEN_JUNK; } - break; - - case LS_pp: - { - S.fsm.directive_state = LSDIR_default; - S.fsm.emit_token = 0; + else{ + S.pos_overide = S.pos; + S.wfsm.white_done = 0; for (;;){ - for (; S.fsm.directive_state < LSDIR_count && S.pos < end_pos;){ + for (; S.wfsm.white_done == 0 && S.pos < end_pos;){ c = chunk[S.pos++]; - S.fsm.directive_state = pp_directive_table[S.fsm.directive_state + pp_directive_eq_classes[c]]; + if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1; } - S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count); - if (S.fsm.emit_token == 0){ + if (S.wfsm.white_done == 0){ S.chunk_pos += size; - DrYield(6, LexNeedChunk); + DrYield(1, LexNeedChunk); } else break; } - --S.pos; - Cpp_Token_Type type = (Cpp_Token_Type)(S.fsm.directive_state - pp_directive_terminal_base); - S.token.type = type; - if (type == CPP_TOKEN_JUNK){ - S.token.flags = 0; + if (c == '\n'){ + S.fsm.emit_token = 0; + S.pos_overide = 0; } else{ - S.token.flags = CPP_TFLAG_PP_DIRECTIVE; - S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type); + S.token.type = CPP_TOKEN_JUNK; } - }break; - - case LS_number: - case LS_number0: - case LS_hex: - S.fsm.int_state = LSINT_default; - S.fsm.emit_token = 0; + } + break; + } + if (c != '@' && c != '$' && c != '\\'){ + S.token.flags = CPP_TFLAG_IS_OPERATOR; + } + break; + + case LS_identifier: + { --S.pos; - for (;;){ - for (; S.fsm.int_state < LSINT_count && S.pos < end_pos;){ - c = chunk[S.pos++]; - S.fsm.int_state = int_fsm_table[S.fsm.int_state + int_fsm_eq_classes[c]]; + + int word_size = S.pos - S.token_start; + + if (S.pp_state == LSPP_body_if){ + if (match(make_string(S.tb, word_size), make_lit_string("defined"))){ + S.token.type = CPP_TOKEN_DEFINED; + S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; + break; } - S.fsm.emit_token = (S.fsm.int_state >= LSINT_count); + } + + Sub_Match_List_Result sub_match; + sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size); + + if (sub_match.index != -1){ + S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; + S.token.flags = CPP_TFLAG_IS_KEYWORD; + } + else{ + sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size); + + if (sub_match.index != -1){ + String_And_Flag data = keywords.data[sub_match.index]; + S.token.type = (Cpp_Token_Type)data.flags; + S.token.flags = CPP_TFLAG_IS_KEYWORD; + } + else{ + S.token.type = CPP_TOKEN_IDENTIFIER; + S.token.flags = 0; + } + } + }break; + + case LS_pound: + S.token.flags = 0; + switch (c){ + case '#': S.token.type = CPP_PP_CONCAT; break; + default: + S.token.type = CPP_PP_STRINGIFY; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_pp: + { + S.fsm.directive_state = LSDIR_default; + S.fsm.emit_token = 0; + for (;;){ + for (; S.fsm.directive_state < LSDIR_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.fsm.directive_state = pp_directive_table[S.fsm.directive_state + pp_directive_eq_classes[c]]; + } + S.fsm.emit_token = (S.fsm.int_state >= LSDIR_count); if (S.fsm.emit_token == 0){ S.chunk_pos += size; - DrYield(5, LexNeedChunk); + DrYield(6, LexNeedChunk); } else break; } --S.pos; - S.token.type = CPP_TOKEN_INTEGER_CONSTANT; - S.token.flags = 0; - break; - - case LS_float: - case LS_crazy_float0: - case LS_crazy_float1: - S.token.type = CPP_TOKEN_FLOATING_CONSTANT; - S.token.flags = 0; - switch (c){ - case 'f': case 'F': - case 'l': case 'L':break; - default: - pos_update_rule = PUR_back_one; - break; + Cpp_Token_Type type = (Cpp_Token_Type)(S.fsm.directive_state - pp_directive_terminal_base); + S.token.type = type; + if (type == CPP_TOKEN_JUNK){ + S.token.flags = 0; } - break; - - case LS_char: - case LS_char_slashed: - S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; - S.token.flags = 0; - break; - - case LS_char_multiline: - S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; - S.token.flags = CPP_TFLAG_MULTILINE; - break; - - case LS_string: - case LS_string_slashed: - S.token.type = CPP_TOKEN_STRING_CONSTANT; - S.token.flags = 0; - break; - - case LS_string_multiline: - S.token.type = CPP_TOKEN_STRING_CONSTANT; - S.token.flags = CPP_TFLAG_MULTILINE; - break; - - case LS_comment_pre: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_DIVEQ; break; - default: - S.token.type = CPP_TOKEN_DIV; - pos_update_rule = PUR_back_one; - break; + else{ + S.token.flags = CPP_TFLAG_PP_DIRECTIVE; + S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type); } - break; - - case LS_comment: - case LS_comment_slashed: - case LS_comment_block: - case LS_comment_block_ending: - S.token.type = CPP_TOKEN_COMMENT; - S.token.flags = 0; - pos_update_rule = PUR_unget_whitespace; - break; - - case LS_error_message: - S.token.type = CPP_TOKEN_ERROR_MESSAGE; - S.token.flags = 0; - pos_update_rule = PUR_unget_whitespace; - break; - - case LS_dot: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '*': S.token.type = CPP_TOKEN_PTRDOT; break; - default: - S.token.type = CPP_TOKEN_DOT; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_ellipsis: - switch (c){ - case '.': - S.token.flags = CPP_TFLAG_IS_OPERATOR; - S.token.type = CPP_TOKEN_ELLIPSIS; - break; - - default: - S.token.type = CPP_TOKEN_JUNK; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_less: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_LESSEQ; break; - default: - S.token.type = CPP_TOKEN_LESS; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_less_less: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_LSHIFTEQ; break; - default: - S.token.type = CPP_TOKEN_LSHIFT; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_more: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_GRTREQ; break; - default: - S.token.type = CPP_TOKEN_GRTR; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_more_more: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_RSHIFTEQ; break; - default: - S.token.type = CPP_TOKEN_RSHIFT; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_minus: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '-': S.token.type = CPP_TOKEN_DECREMENT; break; - case '=': S.token.type = CPP_TOKEN_SUBEQ; break; - default: - S.token.type = CPP_TOKEN_MINUS; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_arrow: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '*': S.token.type = CPP_TOKEN_PTRARROW; break; - default: - S.token.type = CPP_TOKEN_ARROW; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_and: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '&': S.token.type = CPP_TOKEN_AND; break; - case '=': S.token.type = CPP_TOKEN_ANDEQ; break; - default: - S.token.type = CPP_TOKEN_AMPERSAND; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_or: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '|': S.token.type = CPP_TOKEN_OR; break; - case '=': S.token.type = CPP_TOKEN_OREQ; break; - default: - S.token.type = CPP_TOKEN_BIT_OR; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_plus: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '+': S.token.type = CPP_TOKEN_INCREMENT; break; - case '=': S.token.type = CPP_TOKEN_ADDEQ; break; - default: - S.token.type = CPP_TOKEN_PLUS; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_colon: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case ':': S.token.type = CPP_TOKEN_SCOPE; break; - default: - S.token.type = CPP_TOKEN_COLON; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_star: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_MULEQ; break; - default: - S.token.type = CPP_TOKEN_STAR; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_modulo: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_MODEQ; break; - default: - S.token.type = CPP_TOKEN_MOD; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_caret: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_XOREQ; break; - default: - S.token.type = CPP_TOKEN_BIT_XOR; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_eq: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_EQEQ; break; - default: - S.token.type = CPP_TOKEN_EQ; - pos_update_rule = PUR_back_one; - break; - } - break; - - case LS_bang: - S.token.flags = CPP_TFLAG_IS_OPERATOR; - switch (c){ - case '=': S.token.type = CPP_TOKEN_NOTEQ; break; - default: - S.token.type = CPP_TOKEN_NOT; - pos_update_rule = PUR_back_one; - break; - } - break; - } + }break; - switch (pos_update_rule){ - case PUR_back_one: - --S.pos; + case LS_number: + case LS_number0: + case LS_hex: + S.fsm.int_state = LSINT_default; + S.fsm.emit_token = 0; + --S.pos; + for (;;){ + for (; S.fsm.int_state < LSINT_count && S.pos < end_pos;){ + c = chunk[S.pos++]; + S.fsm.int_state = int_fsm_table[S.fsm.int_state + int_fsm_eq_classes[c]]; + } + S.fsm.emit_token = (S.fsm.int_state >= LSINT_count); + + if (S.fsm.emit_token == 0){ + S.chunk_pos += size; + DrYield(5, LexNeedChunk); + } + else break; + } + --S.pos; + + S.token.type = CPP_TOKEN_INTEGER_CONSTANT; + S.token.flags = 0; + break; + + case LS_float: + case LS_crazy_float0: + case LS_crazy_float1: + S.token.type = CPP_TOKEN_FLOATING_CONSTANT; + S.token.flags = 0; + switch (c){ + case 'f': case 'F': + case 'l': case 'L':break; + default: + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_char: + case LS_char_slashed: + S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; + S.token.flags = 0; + break; + + case LS_char_multiline: + S.token.type = CPP_TOKEN_CHARACTER_CONSTANT; + S.token.flags = CPP_TFLAG_MULTILINE; + break; + + case LS_string: + case LS_string_slashed: + S.token.type = CPP_TOKEN_STRING_CONSTANT; + S.token.flags = 0; + break; + + case LS_string_multiline: + S.token.type = CPP_TOKEN_STRING_CONSTANT; + S.token.flags = CPP_TFLAG_MULTILINE; + break; + + case LS_comment_pre: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_DIVEQ; break; + default: + S.token.type = CPP_TOKEN_DIV; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_comment: + case LS_comment_slashed: + S.token.type = CPP_TOKEN_COMMENT; + S.token.flags = 0; + pos_update_rule = PUR_back_one; + break; + + case LS_comment_block: + case LS_comment_block_ending: + S.token.type = CPP_TOKEN_COMMENT; + S.token.flags = 0; + break; + + case LS_error_message: + S.token.type = CPP_TOKEN_ERROR_MESSAGE; + S.token.flags = 0; + pos_update_rule = PUR_back_one; + break; + + case LS_dot: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '*': S.token.type = CPP_TOKEN_PTRDOT; break; + default: + S.token.type = CPP_TOKEN_DOT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_ellipsis: + switch (c){ + case '.': + S.token.flags = CPP_TFLAG_IS_OPERATOR; + S.token.type = CPP_TOKEN_ELLIPSIS; break; - case PUR_unget_whitespace: - c = chunk[--S.pos]; - while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){ - c = chunk[--S.pos]; - } - ++S.pos; + default: + S.token.type = CPP_TOKEN_JUNK; + pos_update_rule = PUR_back_one; break; } + break; - if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ - switch (S.pp_state){ - case LSPP_include: - if (S.token.type != CPP_TOKEN_INCLUDE_FILE){ - S.token.type = CPP_TOKEN_JUNK; - } - S.pp_state = LSPP_junk; - break; - - case LSPP_macro_identifier: - if (S.fsm.state != LS_identifier){ - S.token.type = CPP_TOKEN_JUNK; - S.pp_state = LSPP_junk; - } - else{ - S.pp_state = LSPP_body; - } - break; - - case LSPP_identifier: - if (S.fsm.state != LS_identifier){ - S.token.type = CPP_TOKEN_JUNK; - } - S.pp_state = LSPP_junk; - break; - - case LSPP_number: - if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){ - S.token.type = CPP_TOKEN_JUNK; - S.pp_state = LSPP_junk; - } - else{ - S.pp_state = LSPP_include; - } - break; - - case LSPP_junk: - S.token.type = CPP_TOKEN_JUNK; - break; - } + case LS_less: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_LESSEQ; break; + default: + S.token.type = CPP_TOKEN_LESS; + pos_update_rule = PUR_back_one; + break; } + break; + + case LS_less_less: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_LSHIFTEQ; break; + default: + S.token.type = CPP_TOKEN_LSHIFT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_more: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_GRTREQ; break; + default: + S.token.type = CPP_TOKEN_GRTR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_more_more: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_RSHIFTEQ; break; + default: + S.token.type = CPP_TOKEN_RSHIFT; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_minus: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '-': S.token.type = CPP_TOKEN_DECREMENT; break; + case '=': S.token.type = CPP_TOKEN_SUBEQ; break; + default: + S.token.type = CPP_TOKEN_MINUS; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_arrow: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '*': S.token.type = CPP_TOKEN_PTRARROW; break; + default: + S.token.type = CPP_TOKEN_ARROW; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_and: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '&': S.token.type = CPP_TOKEN_AND; break; + case '=': S.token.type = CPP_TOKEN_ANDEQ; break; + default: + S.token.type = CPP_TOKEN_AMPERSAND; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_or: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '|': S.token.type = CPP_TOKEN_OR; break; + case '=': S.token.type = CPP_TOKEN_OREQ; break; + default: + S.token.type = CPP_TOKEN_BIT_OR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_plus: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '+': S.token.type = CPP_TOKEN_INCREMENT; break; + case '=': S.token.type = CPP_TOKEN_ADDEQ; break; + default: + S.token.type = CPP_TOKEN_PLUS; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_colon: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case ':': S.token.type = CPP_TOKEN_SCOPE; break; + default: + S.token.type = CPP_TOKEN_COLON; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_star: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_MULEQ; break; + default: + S.token.type = CPP_TOKEN_STAR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_modulo: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_MODEQ; break; + default: + S.token.type = CPP_TOKEN_MOD; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_caret: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_XOREQ; break; + default: + S.token.type = CPP_TOKEN_BIT_XOR; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_eq: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_EQEQ; break; + default: + S.token.type = CPP_TOKEN_EQ; + pos_update_rule = PUR_back_one; + break; + } + break; + + case LS_bang: + S.token.flags = CPP_TFLAG_IS_OPERATOR; + switch (c){ + case '=': S.token.type = CPP_TOKEN_NOTEQ; break; + default: + S.token.type = CPP_TOKEN_NOT; + pos_update_rule = PUR_back_one; + break; + } + break; } - if (S.fsm.emit_token){ - S.token.start = S.token_start; - if (S.pos_overide){ - S.token.size = S.pos_overide - S.token_start; - S.pos_overide = 0; - } - else{ - S.token.size = S.pos - S.token_start; - } - if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ - S.token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0); - } - S.token.state_flags = S.pp_state; + switch (pos_update_rule){ + case PUR_back_one: + --S.pos; + break; - token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token); - if (token_i == max_token_i){ - DrYield(2, LexNeedTokenMemory); + default: + if (chunk[S.pos-1] == 0){ + --S.pos; + } + break; + } + + if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ + switch (S.pp_state){ + case LSPP_include: + if (S.token.type != CPP_TOKEN_INCLUDE_FILE){ + S.token.type = CPP_TOKEN_JUNK; + } + S.pp_state = LSPP_junk; + break; + + case LSPP_macro_identifier: + if (S.fsm.state != LS_identifier){ + S.token.type = CPP_TOKEN_JUNK; + S.pp_state = LSPP_junk; + } + else{ + S.pp_state = LSPP_body; + } + break; + + case LSPP_identifier: + if (S.fsm.state != LS_identifier){ + S.token.type = CPP_TOKEN_JUNK; + } + S.pp_state = LSPP_junk; + break; + + case LSPP_number: + if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){ + S.token.type = CPP_TOKEN_JUNK; + S.pp_state = LSPP_junk; + } + else{ + S.pp_state = LSPP_include; + } + break; + + case LSPP_junk: + S.token.type = CPP_TOKEN_JUNK; + break; } } } - // NOTE(allen): else case for "if (c != 0) {...} - else{ - S.completed = 1; + + if (S.fsm.emit_token){ + S.token.start = S.token_start; + if (S.pos_overide){ + S.token.size = S.pos_overide - S.token_start; + S.pos_overide = 0; + } + else{ + S.token.size = S.pos - S.token_start; + } + if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ + S.token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0); + } + + token_i = cpp_place_token_nonalloc(out_tokens, token_i, S.token); + if (token_i == max_token_i){ + if (S.pos == end_pos){ + S.chunk_pos += size; + DrYield(7, LexNeedChunk); + } + DrYield(2, LexNeedTokenMemory); + } + } + + if (S.completed){ break; } } diff --git a/test/experiment.cpp b/test/experiment.cpp index c2c8483e..e2b9005b 100644 --- a/test/experiment.cpp +++ b/test/experiment.cpp @@ -206,6 +206,19 @@ end_t(Times *t){ *t = time; } +static int +equivalent_comments(Cpp_Token *a, Cpp_Token *b, char *data){ + String s_a = make_string(data + a->start, a->size); + String s_b = make_string(data + b->start, b->size); + + s_a = skip_chop_whitespace(s_a); + s_b = skip_chop_whitespace(s_b); + + int result = match(s_a, s_b); + + return(result); +} + static void run_experiment(Experiment *exp, char *filename, int verbose, int chunks, int max_tokens){ @@ -266,16 +279,17 @@ run_experiment(Experiment *exp, char *filename, int verbose, } else{ start = __rdtsc(); - new_lex::cpp_lex_nonalloc(&ld, - (char*)file_data.data, file_data.size, - &exp->testing_stack); + new_lex::cpp_lex_size_nonalloc( + &ld, (char*)file_data.data, file_data.size, + file_data.size, &exp->testing_stack); + time.fsm += (__rdtsc() - start); } } else{ if (chunks){ start = __rdtsc(); - int relevant_size = file_data.size + 1; + int relevant_size = file_data.size; is_last = 0; for (k = 0; k < relevant_size; k += chunks){ chunk_size = chunks; @@ -289,7 +303,8 @@ run_experiment(Experiment *exp, char *filename, int verbose, do{ result = new_lex::cpp_lex_size_nonalloc(&ld, - (char*)file_data.data + k, chunk_size, file_data.size, + (char*)file_data.data + k, chunk_size, + file_data.size, &exp->testing_stack, max_tokens); if (result == new_lex::LexFinished || @@ -348,15 +363,27 @@ run_experiment(Experiment *exp, char *filename, int verbose, } if (correct->start != testing->start || correct->size != testing->size){ - pass = 0; - if (verbose >= 1){ - printf("token range mismatch at token %d\n" - " %d:%d original %d:%d testing\n" - " %.*s original %.*s testing\n", - j, - correct->start, correct->size, testing->start, testing->size, - correct->size, data + correct->start, - testing->size, data + testing->start); + + int mismatch = 1; + if (correct->type == testing->type && + (correct->type == CPP_TOKEN_COMMENT || + correct->type == CPP_TOKEN_ERROR_MESSAGE)){ + if (equivalent_comments(correct, testing, data)){ + mismatch = 0; + } + } + + if (mismatch){ + pass = 0; + if (verbose >= 1){ + printf("token range mismatch at token %d\n" + " %d:%d original %d:%d testing\n" + " %.*s original %.*s testing\n", + j, + correct->start, correct->size, testing->start, testing->size, + correct->size, data + correct->start, + testing->size, data + testing->start); + } } } @@ -402,12 +429,12 @@ show_time(Times t, int repeats, char *type){ int main(){ int repeats = 1; - int verbose_level = 1; - int chunk_start = 64; - int chunk_end = 64; -#define TEST_FILE "lexer_test2.cpp" -#define SINGLE_ITEM 1 - int token_limit = 1; + int verbose_level = -1; + int chunk_start = 0; + int chunk_end = 16; +#define TEST_FILE "parser_test_gcc.cpp" +#define SINGLE_ITEM 0 + int token_limit = 0; int chunks = (chunk_start > 0 && chunk_start <= chunk_end); int c = 0;