From 37c3422603ecad731595e3fdfc6afdb890c1c850 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Tue, 2 May 2017 11:51:35 -0400 Subject: [PATCH] Fixed lexer bug --- 4coder_default_bindings.cpp | 1 + 4cpp/4cpp_lexer.h | 22 +++++++++------------- 4cpp/4cpp_lexer_tables.c | 27 ++++++++++++++------------- meta/fsm_table_generator.cpp | 9 ++++++++- 4 files changed, 32 insertions(+), 27 deletions(-) diff --git a/4coder_default_bindings.cpp b/4coder_default_bindings.cpp index 7ebc67b9..c66c2445 100644 --- a/4coder_default_bindings.cpp +++ b/4coder_default_bindings.cpp @@ -220,3 +220,4 @@ get_bindings(void *data, int32_t size){ // BOTTOM +R" \ No newline at end of file diff --git a/4cpp/4cpp_lexer.h b/4cpp/4cpp_lexer.h index 8418dc0e..075743b4 100644 --- a/4cpp/4cpp_lexer.h +++ b/4cpp/4cpp_lexer.h @@ -266,9 +266,7 @@ FCPP_LINK Cpp_Lex_PP_State cpp_pp_directive_to_state(Cpp_Token_Type type){ Cpp_Lex_PP_State result = LSPP_default; switch (type){ - case CPP_PP_INCLUDE: - case CPP_PP_IMPORT: - case CPP_PP_USING: + case CPP_PP_INCLUDE: case CPP_PP_IMPORT: case CPP_PP_USING: result = LSPP_include; break; @@ -276,14 +274,11 @@ cpp_pp_directive_to_state(Cpp_Token_Type type){ result = LSPP_macro_identifier; break; - case CPP_PP_UNDEF: - case CPP_PP_IFDEF: - case CPP_PP_IFNDEF: + case CPP_PP_UNDEF: case CPP_PP_IFDEF: case CPP_PP_IFNDEF: result = LSPP_identifier; break; - case CPP_PP_IF: - case CPP_PP_ELIF: + case CPP_PP_IF: case CPP_PP_ELIF: result = LSPP_body_if; break; @@ -291,8 +286,7 @@ cpp_pp_directive_to_state(Cpp_Token_Type type){ result = LSPP_body; break; - case CPP_PP_VERSION: - case CPP_PP_LINE: + case CPP_PP_VERSION: case CPP_PP_LINE: result = LSPP_number; break; @@ -300,9 +294,7 @@ cpp_pp_directive_to_state(Cpp_Token_Type type){ result = LSPP_error; break; - case CPP_PP_UNKNOWN: - case CPP_PP_ELSE: - case CPP_PP_ENDIF: + case CPP_PP_UNKNOWN: case CPP_PP_ELSE: case CPP_PP_ENDIF: result = LSPP_junk; break; } @@ -621,6 +613,8 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, i32_4tech s case LS_string_raw: { + Assert(c != 0); + S.tb_pos = 0; S.delim_length = 0; @@ -701,6 +695,8 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, i32_4tech s case LS_string_normal: { + Assert(c != 0); + S.fsm.state = LSSTR_default; S.fsm.flags = 0; for (;;){ diff --git a/4cpp/4cpp_lexer_tables.c b/4cpp/4cpp_lexer_tables.c index e77ca43a..4741e8b6 100644 --- a/4cpp/4cpp_lexer_tables.c +++ b/4cpp/4cpp_lexer_tables.c @@ -56,7 +56,7 @@ u8_4tech int_fsm_table[] = { u16_4tech raw_str_eq_classes[] = { 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 9, 3, 0, 3, 3, 3, 3, 3, 12, 9, 3, 3, 3, 3, 3, 3, + 9, 3, 12, 3, 3, 3, 3, 3, 15, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, @@ -72,13 +72,14 @@ u16_4tech raw_str_eq_classes[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, }; -const i32_4tech num_raw_str_eq_classes = 5; +const i32_4tech num_raw_str_eq_classes = 6; u8_4tech raw_str_table[] = { - 0, 6, 6, + 3, 6, 6, 0, 1, 2, 3, 2, 2, 3, 1, 2, + 0, 6, 6, 4, 1, 2, }; @@ -201,7 +202,7 @@ u16_4tech main_fsm_eq_classes[] = { const i32_4tech num_main_fsm_eq_classes = 32; u8_4tech main_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -257,7 +258,7 @@ u16_4tech pp_include_fsm_eq_classes[] = { const i32_4tech num_pp_include_fsm_eq_classes = 32; u8_4tech pp_include_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -313,7 +314,7 @@ u16_4tech pp_macro_fsm_eq_classes[] = { const i32_4tech num_pp_macro_fsm_eq_classes = 32; u8_4tech pp_macro_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -369,7 +370,7 @@ u16_4tech pp_identifier_fsm_eq_classes[] = { const i32_4tech num_pp_identifier_fsm_eq_classes = 32; u8_4tech pp_identifier_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -425,7 +426,7 @@ u16_4tech pp_body_if_fsm_eq_classes[] = { const i32_4tech num_pp_body_if_fsm_eq_classes = 32; u8_4tech pp_body_if_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -481,7 +482,7 @@ u16_4tech pp_body_fsm_eq_classes[] = { const i32_4tech num_pp_body_fsm_eq_classes = 32; u8_4tech pp_body_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -537,7 +538,7 @@ u16_4tech pp_number_fsm_eq_classes[] = { const i32_4tech num_pp_number_fsm_eq_classes = 32; u8_4tech pp_number_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -593,7 +594,7 @@ u16_4tech pp_error_fsm_eq_classes[] = { const i32_4tech num_pp_error_fsm_eq_classes = 3; u8_4tech pp_error_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, }; @@ -620,7 +621,7 @@ u16_4tech pp_junk_fsm_eq_classes[] = { const i32_4tech num_pp_junk_fsm_eq_classes = 32; u8_4tech pp_junk_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 32, 32, 32, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, @@ -676,7 +677,7 @@ u16_4tech no_string_fsm_eq_classes[] = { const i32_4tech num_no_string_fsm_eq_classes = 29; u8_4tech no_string_fsm_table[] = { - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 31, 32, 33, 34, 35, 32, 32, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 32, 33, 34, 35, 62, 62, 62, 39, 40, 41, 42, 43, 44, 45, 15, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 34, 35, 62, 62, 62, 39, 40, 41, 42, 43, 44, 45, 46, 15, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, 0, 32, 33, 3, 35, 62, 62, 62, 39, 40, 41, 42, 43, 44, 45, 15, 16, 17, 17, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 30, diff --git a/meta/fsm_table_generator.cpp b/meta/fsm_table_generator.cpp index 2a5009f9..ef322002 100644 --- a/meta/fsm_table_generator.cpp +++ b/meta/fsm_table_generator.cpp @@ -218,7 +218,7 @@ FSM_SIG(raw_str_fsm){ case LSSTR_default: { switch (c){ - case ')': case '\\': case ' ': case '\n': fsm.emit_token = true; break; + case 0: case ')': case '\\': case ' ': case '\n': fsm.emit_token = true; break; case '(': fsm.state = LSSTR_get_delim; fsm.emit_token = true; break; default: break; } @@ -282,6 +282,13 @@ is_identifier_char_non_numeric(u8_4tech c, b32_4tech ignore_string_delims){ Cpp_Lex_FSM main_fsm(Cpp_Lex_FSM fsm, uint8_t pp_state, uint8_t c, bool32 ignore_string_delims){ if (c == 0){ + switch (fsm.state){ + case LS_string_R: + case LS_string_LUu8: + { + fsm.state = LS_identifier; + }break; + } fsm.emit_token = true; } else{