/* "4cpp" Open C++ Parser v0.1: Lexer no warranty implied; use at your own risk NOTES ON USE: OPTIONS: Set options by defining macros before including this file. FCPP_LEXER_IMPLEMENTATION - causes this file to output function implementations - this option is unset after use so that future includes of this file in the same unit do not continue to output implementations FCPP_NO_MALLOC - prevent including FCPP_NO_ASSERT - prevent including FCPP_NO_STRING - prevent including FCPP_NO_CRT - FCPP_NO_MALLOC & FCPP_NO_ASSERT & FCPP_NO_STRING FCPP_FORBID_MALLOC - one step above *NO_MALLOC with this set 4cpp functions that do allocations are not allowed to be declared or defined at all, forcing the user to handle allocation themselves - implies FCPP_NO_MALLOC FCPP_GET_MEMORY - defines how to make allocations, interface of malloc, defaults to malloc FCPP_FREE_MEMORY - defines how to free memory, interface of ree, defaults to free (The above must be defined if FCPP_NO_MALLOC is set, unless FCPP_FORBID_MALLOC is set) FCPP_ASSERT - defines how to make assertions, interface of assert, defaults to assert FCPP_MEM_COPY - defines how to copy blocks of memory, interface of memcpy, defaults to memcpy FCPP_MEM_MOVE - defines how to move blocks of memory, interface of memmove, defaults to memmove (The above must be defined if FCPP_NO_STRING is set) FCPP_LINK - defines linkage of non-inline functions, defaults to static FCPP_EXTERN - changes FCPP_LINK default to extern, this option is ignored if FCPP_LINK is defined include the file "4cpp_clear_config.h" if you want to undefine all options for some reason HIDDDEN DEPENDENCIES: 4cpp is not a single file include library, there are dependencies between the files. Be sure to include these dependencies before 4cpp_lexer.h: 4cpp_types.h 4cpp_string.h */ // TOP // TODO(allen): // // EASE OF USE AND DEPLOYMENT // - make it easier to locate the list of function declarations // - more C compatibility // // POTENTIAL // - Experiment with optimizations. Sean's State machine? // - Reserve 0th token for null? Put a EOF token at the end? // - Pass Cpp_File and Cpp_Token_Stack by value instead of by pointer? // // CURRENT // - lex in chunks // #include "4cpp_config.h" #ifndef FCPP_LEXER_INC #define FCPP_LEXER_INC enum Cpp_Token_Type{ CPP_TOKEN_JUNK, CPP_TOKEN_COMMENT, CPP_TOKEN_KEY_TYPE, CPP_TOKEN_KEY_MODIFIER, CPP_TOKEN_KEY_QUALIFIER, CPP_TOKEN_KEY_OPERATOR, // NOTE(allen): This type is not actually stored in tokens CPP_TOKEN_KEY_CONTROL_FLOW, CPP_TOKEN_KEY_CAST, CPP_TOKEN_KEY_TYPE_DECLARATION, CPP_TOKEN_KEY_ACCESS, CPP_TOKEN_KEY_LINKAGE, CPP_TOKEN_KEY_OTHER, CPP_TOKEN_IDENTIFIER, CPP_TOKEN_INTEGER_CONSTANT, CPP_TOKEN_CHARACTER_CONSTANT, CPP_TOKEN_FLOATING_CONSTANT, CPP_TOKEN_STRING_CONSTANT, CPP_TOKEN_BOOLEAN_CONSTANT, CPP_TOKEN_STATIC_ASSERT, CPP_TOKEN_BRACKET_OPEN, CPP_TOKEN_BRACKET_CLOSE, CPP_TOKEN_PARENTHESE_OPEN, CPP_TOKEN_PARENTHESE_CLOSE, CPP_TOKEN_BRACE_OPEN, CPP_TOKEN_BRACE_CLOSE, CPP_TOKEN_SEMICOLON, CPP_TOKEN_ELLIPSIS, // NOTE(allen): Ambiguous tokens, lexer only, // parser figures out the real meaning CPP_TOKEN_STAR, CPP_TOKEN_AMPERSAND, CPP_TOKEN_TILDE, CPP_TOKEN_PLUS, CPP_TOKEN_MINUS, CPP_TOKEN_INCREMENT, CPP_TOKEN_DECREMENT, // NOTE(allen): Precedence 1, LtoR CPP_TOKEN_SCOPE, // NOTE(allen): Precedence 2, LtoR CPP_TOKEN_POSTINC, // from increment, parser only CPP_TOKEN_POSTDEC, // from decrement, parser only CPP_TOKEN_FUNC_STYLE_CAST, // parser only CPP_TOKEN_CPP_STYLE_CAST, CPP_TOKEN_CALL, // from open paren, parser only CPP_TOKEN_INDEX, // from bracket open, parser only CPP_TOKEN_DOT, CPP_TOKEN_ARROW, // NOTE(allen): Precedence 3, RtoL CPP_TOKEN_PREINC, // from increment, parser only CPP_TOKEN_PREDEC, // from decrement, parser only CPP_TOKEN_POSITIVE, // from plus, parser only CPP_TOKEN_NEGAITVE, // from minus, parser only CPP_TOKEN_NOT, CPP_TOKEN_BIT_NOT, // from tilde, direct from 'compl' CPP_TOKEN_CAST, // from open paren, parser only CPP_TOKEN_DEREF, // from star, parser only CPP_TOKEN_TYPE_PTR, // from star, parser only CPP_TOKEN_ADDRESS, // from ampersand, parser only CPP_TOKEN_TYPE_REF, // from ampersand, parser only CPP_TOKEN_SIZEOF, CPP_TOKEN_ALIGNOF, CPP_TOKEN_DECLTYPE, CPP_TOKEN_TYPEID, CPP_TOKEN_NEW, CPP_TOKEN_DELETE, CPP_TOKEN_NEW_ARRAY, // from new and bracket open, parser only CPP_TOKEN_DELETE_ARRAY, // from delete and bracket open, parser only // NOTE(allen): Precedence 4, LtoR CPP_TOKEN_PTRDOT, CPP_TOKEN_PTRARROW, // NOTE(allen): Precedence 5, LtoR CPP_TOKEN_MUL, // from start, parser only CPP_TOKEN_DIV, CPP_TOKEN_MOD, // NOTE(allen): Precedence 6, LtoR CPP_TOKEN_ADD, // from plus, parser only CPP_TOKEN_SUB, // from minus, parser only // NOTE(allen): Precedence 7, LtoR CPP_TOKEN_LSHIFT, CPP_TOKEN_RSHIFT, // NOTE(allen): Precedence 8, LtoR CPP_TOKEN_LESS, CPP_TOKEN_GRTR, CPP_TOKEN_GRTREQ, CPP_TOKEN_LESSEQ, // NOTE(allen): Precedence 9, LtoR CPP_TOKEN_EQEQ, CPP_TOKEN_NOTEQ, // NOTE(allen): Precedence 10, LtoR CPP_TOKEN_BIT_AND, // from ampersand, direct from 'bitand' // NOTE(allen): Precedence 11, LtoR CPP_TOKEN_BIT_XOR, // NOTE(allen): Precedence 12, LtoR CPP_TOKEN_BIT_OR, // NOTE(allen): Precedence 13, LtoR CPP_TOKEN_AND, // NOTE(allen): Precedence 14, LtoR CPP_TOKEN_OR, // NOTE(allen): Precedence 15, RtoL CPP_TOKEN_TERNARY_QMARK, CPP_TOKEN_COLON, CPP_TOKEN_THROW, CPP_TOKEN_EQ, CPP_TOKEN_ADDEQ, CPP_TOKEN_SUBEQ, CPP_TOKEN_MULEQ, CPP_TOKEN_DIVEQ, CPP_TOKEN_MODEQ, CPP_TOKEN_LSHIFTEQ, CPP_TOKEN_RSHIFTEQ, CPP_TOKEN_ANDEQ, CPP_TOKEN_OREQ, CPP_TOKEN_XOREQ, // NOTE(allen): Precedence 16, LtoR CPP_TOKEN_COMMA, CPP_PP_INCLUDE, CPP_PP_DEFINE, CPP_PP_UNDEF, CPP_PP_IF, CPP_PP_IFDEF, CPP_PP_IFNDEF, CPP_PP_ELSE, CPP_PP_ELIF, CPP_PP_ENDIF, CPP_PP_ERROR, CPP_PP_IMPORT, CPP_PP_USING, CPP_PP_LINE, CPP_PP_PRAGMA, CPP_PP_STRINGIFY, CPP_PP_CONCAT, CPP_PP_UNKNOWN, CPP_TOKEN_DEFINED, CPP_TOKEN_INCLUDE_FILE, // NOTE(allen): used in the parser CPP_TOKEN_EOF }; struct Cpp_File{ char *data; int size; }; struct Cpp_Token{ Cpp_Token_Type type; fcpp_i32 start, size; fcpp_u16 state_flags; fcpp_u16 flags; }; enum Cpp_Token_Flag{ CPP_TFLAG_IGNORE = 1 << 0, CPP_TFLAG_PP_DIRECTIVE = 1 << 1, CPP_TFLAG_PP_BODY = 1 << 2, CPP_TFLAG_BAD_ENDING = 1 << 3, CPP_TFLAG_MULTILINE = 1 << 4, CPP_TFLAG_PARAMETERIZED = 1 << 5, CPP_TFLAG_IS_OPERATOR = 1 << 6, CPP_TFLAG_IS_KEYWORD = 1 << 7 }; enum Cpp_Preprocessor_State{ CPP_LEX_PP_DEFAULT, CPP_LEX_PP_IDENTIFIER, CPP_LEX_PP_MACRO_IDENTIFIER, CPP_LEX_PP_INCLUDE, CPP_LEX_PP_BODY, CPP_LEX_PP_BODY_IF, CPP_LEX_PP_NUMBER, CPP_LEX_PP_JUNK, // NEVER ADD BELOW THIS CPP_LEX_PP_COUNT }; struct Cpp_Lex_Data{ Cpp_Preprocessor_State pp_state; fcpp_i32 pos; fcpp_bool32 complete; }; struct Cpp_Read_Result{ Cpp_Token token; fcpp_i32 pos; fcpp_bool8 newline; fcpp_bool8 has_result; }; struct Cpp_Token_Stack{ Cpp_Token *tokens; int count, max_count; }; struct Cpp_Token_Merge{ Cpp_Token new_token; fcpp_bool32 did_merge; }; struct Seek_Result{ fcpp_i32 pos; fcpp_bool32 new_line; }; struct Cpp_Get_Token_Result{ fcpp_i32 token_index; fcpp_bool32 in_whitespace; }; // TODO(allen): revisit this keyword data declaration system struct String_And_Flag{ char *str; fcpp_u32 flags; }; struct String_List{ String_And_Flag *data; int count; }; struct Sub_Match_List_Result{ int index; fcpp_i32 new_pos; }; inline fcpp_u16 cpp_token_set_pp_state(fcpp_u16 bitfield, Cpp_Preprocessor_State state_value){ return (fcpp_u16)state_value; } inline Cpp_Preprocessor_State cpp_token_get_pp_state(fcpp_u16 bitfield){ return (Cpp_Preprocessor_State)(bitfield); } inline String cpp_get_lexeme(char *str, Cpp_Token *token){ String result; result.str = str + token->start; result.size = token->size; return result; } inline bool is_keyword(Cpp_Token_Type type){ return (type >= CPP_TOKEN_KEY_TYPE && type <= CPP_TOKEN_KEY_OTHER); } FCPP_LINK Sub_Match_List_Result sub_match_list(Cpp_File file, int pos, String_List list, int sub_size); FCPP_LINK Seek_Result seek_unescaped_eol(char *data, int size, int pos); FCPP_LINK Seek_Result seek_unescaped_delim(char *data, int size, int pos, char delim); FCPP_LINK Seek_Result seek_block_comment_end(char *data, int size, int pos); FCPP_LINK Cpp_Read_Result cpp_read_whitespace(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_junk_line(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_operator(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_pp_operator(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_alpha_numeric(Cpp_File file, int pos, bool in_if_body); inline Cpp_Read_Result cpp_read_alpha_numeric(Cpp_File file, int pos) { return cpp_read_alpha_numeric(file, pos, 0); } FCPP_LINK Cpp_Read_Result cpp_read_number(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_string_litteral(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_character_litteral(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_line_comment(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_block_comment(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_preprocessor(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_pp_include_file(Cpp_File file, int pos); FCPP_LINK Cpp_Read_Result cpp_read_pp_default_mode(Cpp_File file, int pos, bool in_if_body); inline Cpp_Read_Result cpp_read_pp_default_mode(Cpp_File file, int pos) { return cpp_read_pp_default_mode(file, pos, 0); } FCPP_LINK Cpp_Token_Merge cpp_attempt_token_merge(Cpp_Token prev, Cpp_Token next); FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *stack, Cpp_Token token); FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *stack, Cpp_Token token); FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex); FCPP_LINK int cpp_lex_file_token_count(Cpp_File file); FCPP_LINK Cpp_Lex_Data cpp_lex_file_nonalloc(Cpp_File file, Cpp_Token_Stack *stack, Cpp_Lex_Data data); inline Cpp_Lex_Data cpp_lex_file_nonalloc(Cpp_File file, Cpp_Token_Stack *stack) { return cpp_lex_file_nonalloc(file, stack, {}); } FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *stack, int pos); FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end); FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token, int amount); struct Cpp_Relex_State{ Cpp_File file; Cpp_Token_Stack *stack; int start, end, amount; int start_token_i; int end_token_i; int relex_start; int tolerance; int space_request; }; FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance); FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State state, Cpp_Token_Stack *stack); #ifndef FCPP_FORBID_MALLOC FCPP_LINK Cpp_Token_Stack cpp_make_token_stack(int max); FCPP_LINK void cpp_free_token_stack(Cpp_Token_Stack stack); FCPP_LINK void cpp_resize_token_stack(Cpp_Token_Stack *stack, int new_max); FCPP_LINK void cpp_push_token(Cpp_Token_Stack *stack, Cpp_Token token); FCPP_LINK void cpp_lex_file(Cpp_File file, Cpp_Token_Stack *stack); FCPP_LINK bool cpp_relex_file_limited(Cpp_File file, Cpp_Token_Stack *stack, int start_i, int end_i, int amount, int extra_tolerance); inline void cpp_relex_file(Cpp_File file, Cpp_Token_Stack *stack, int start_i, int end_i, int amount) { cpp_relex_file_limited(file, stack, start_i, end_i, amount, -1); } #endif #define FCPP_STRING_LIST(x) {x, FCPP_COUNT(x)} // TODO(allen): shift towards storing in a context FCPP_GLOBAL String_And_Flag int_suf_strings[] = { {"ull"}, {"ULL"}, {"llu"}, {"LLU"}, {"ll"}, {"LL"}, {"l"}, {"L"}, {"u"}, {"U"} }; FCPP_GLOBAL String_List int_sufs = FCPP_STRING_LIST(int_suf_strings); FCPP_GLOBAL String_And_Flag float_suf_strings[] = { {"f"}, {"F"}, {"l"}, {"L"} }; FCPP_GLOBAL String_List float_sufs = FCPP_STRING_LIST(float_suf_strings); FCPP_GLOBAL String_And_Flag bool_lit_strings[] = { {"true"}, {"false"} }; FCPP_GLOBAL String_List bool_lits = FCPP_STRING_LIST(bool_lit_strings); FCPP_GLOBAL String_And_Flag keyword_strings[] = { {"and", CPP_TOKEN_AND}, {"and_eq", CPP_TOKEN_ANDEQ}, {"bitand", CPP_TOKEN_BIT_AND}, {"bitor", CPP_TOKEN_BIT_OR}, {"or", CPP_TOKEN_OR}, {"or_eq", CPP_TOKEN_OREQ}, {"sizeof", CPP_TOKEN_SIZEOF}, {"alignof", CPP_TOKEN_ALIGNOF}, {"decltype", CPP_TOKEN_DECLTYPE}, {"throw", CPP_TOKEN_THROW}, {"new", CPP_TOKEN_NEW}, {"delete", CPP_TOKEN_DELETE}, {"xor", CPP_TOKEN_BIT_XOR}, {"xor_eq", CPP_TOKEN_XOREQ}, {"not", CPP_TOKEN_NOT}, {"not_eq", CPP_TOKEN_NOTEQ}, {"typeid", CPP_TOKEN_TYPEID}, {"compl", CPP_TOKEN_BIT_NOT}, {"void", CPP_TOKEN_KEY_TYPE}, {"bool", CPP_TOKEN_KEY_TYPE}, {"char", CPP_TOKEN_KEY_TYPE}, {"int", CPP_TOKEN_KEY_TYPE}, {"float", CPP_TOKEN_KEY_TYPE}, {"double", CPP_TOKEN_KEY_TYPE}, {"long", CPP_TOKEN_KEY_MODIFIER}, {"short", CPP_TOKEN_KEY_MODIFIER}, {"const", CPP_TOKEN_KEY_QUALIFIER}, {"volatile", CPP_TOKEN_KEY_QUALIFIER}, {"asm", CPP_TOKEN_KEY_CONTROL_FLOW}, {"break", CPP_TOKEN_KEY_CONTROL_FLOW}, {"case", CPP_TOKEN_KEY_CONTROL_FLOW}, {"catch", CPP_TOKEN_KEY_CONTROL_FLOW}, {"continue", CPP_TOKEN_KEY_CONTROL_FLOW}, {"default", CPP_TOKEN_KEY_CONTROL_FLOW}, {"do", CPP_TOKEN_KEY_CONTROL_FLOW}, {"else", CPP_TOKEN_KEY_CONTROL_FLOW}, {"for", CPP_TOKEN_KEY_CONTROL_FLOW}, {"goto", CPP_TOKEN_KEY_CONTROL_FLOW}, {"if", CPP_TOKEN_KEY_CONTROL_FLOW}, {"return", CPP_TOKEN_KEY_CONTROL_FLOW}, {"switch", CPP_TOKEN_KEY_CONTROL_FLOW}, {"try", CPP_TOKEN_KEY_CONTROL_FLOW}, {"while", CPP_TOKEN_KEY_CONTROL_FLOW}, {"static_assert", CPP_TOKEN_KEY_CONTROL_FLOW}, {"const_cast", CPP_TOKEN_KEY_CAST}, {"dynamic_cast", CPP_TOKEN_KEY_CAST}, {"reinterpret_cast", CPP_TOKEN_KEY_CAST}, {"static_cast", CPP_TOKEN_KEY_CAST}, {"class", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"enum", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"struct", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"typedef", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"union", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"template", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"typename", CPP_TOKEN_KEY_TYPE_DECLARATION}, {"friend", CPP_TOKEN_KEY_ACCESS}, {"namespace", CPP_TOKEN_KEY_ACCESS}, {"private", CPP_TOKEN_KEY_ACCESS}, {"protected", CPP_TOKEN_KEY_ACCESS}, {"public", CPP_TOKEN_KEY_ACCESS}, {"using", CPP_TOKEN_KEY_ACCESS}, {"extern", CPP_TOKEN_KEY_LINKAGE}, {"export", CPP_TOKEN_KEY_LINKAGE}, {"inline", CPP_TOKEN_KEY_LINKAGE}, {"static", CPP_TOKEN_KEY_LINKAGE}, {"virtual", CPP_TOKEN_KEY_LINKAGE}, {"alignas", CPP_TOKEN_KEY_OTHER}, {"explicit", CPP_TOKEN_KEY_OTHER}, {"noexcept", CPP_TOKEN_KEY_OTHER}, {"nullptr", CPP_TOKEN_KEY_OTHER}, {"operator", CPP_TOKEN_KEY_OTHER}, {"register", CPP_TOKEN_KEY_OTHER}, {"this", CPP_TOKEN_KEY_OTHER}, {"thread_local", CPP_TOKEN_KEY_OTHER}, }; FCPP_GLOBAL String_List keywords = FCPP_STRING_LIST(keyword_strings); FCPP_GLOBAL String_And_Flag op_strings[] = { {"...", CPP_TOKEN_ELLIPSIS}, {"<<=", CPP_TOKEN_LSHIFTEQ}, {">>=", CPP_TOKEN_RSHIFTEQ}, {"->*", CPP_TOKEN_PTRARROW}, {"<<", CPP_TOKEN_LSHIFT}, {">>", CPP_TOKEN_RSHIFT}, {"&&", CPP_TOKEN_AND}, {"||", CPP_TOKEN_OR}, {"->", CPP_TOKEN_ARROW}, {"++", CPP_TOKEN_INCREMENT}, {"--", CPP_TOKEN_DECREMENT}, {"::", CPP_TOKEN_SCOPE}, {"+=", CPP_TOKEN_ADDEQ}, {"-=", CPP_TOKEN_SUBEQ}, {"*=", CPP_TOKEN_MULEQ}, {"/=", CPP_TOKEN_DIVEQ}, {"%=", CPP_TOKEN_MODEQ}, {"&=", CPP_TOKEN_ANDEQ}, {"|=", CPP_TOKEN_OREQ}, {"^=", CPP_TOKEN_XOREQ}, {"==", CPP_TOKEN_EQEQ}, {">=", CPP_TOKEN_GRTREQ}, {"<=", CPP_TOKEN_LESSEQ}, {"!=", CPP_TOKEN_NOTEQ}, {".*", CPP_TOKEN_PTRDOT}, {"{", CPP_TOKEN_BRACE_OPEN}, {"}", CPP_TOKEN_BRACE_CLOSE}, {"[", CPP_TOKEN_BRACKET_OPEN}, {"]", CPP_TOKEN_BRACKET_CLOSE}, {"(", CPP_TOKEN_PARENTHESE_OPEN}, {")", CPP_TOKEN_PARENTHESE_CLOSE}, {"<", CPP_TOKEN_LESS}, {">", CPP_TOKEN_GRTR}, {"+", CPP_TOKEN_PLUS}, {"-", CPP_TOKEN_MINUS}, {"!", CPP_TOKEN_NOT}, {"~", CPP_TOKEN_TILDE}, {"*", CPP_TOKEN_STAR}, {"&", CPP_TOKEN_AMPERSAND}, {"|", CPP_TOKEN_BIT_OR}, {"^", CPP_TOKEN_BIT_XOR}, {"=", CPP_TOKEN_EQ}, {",", CPP_TOKEN_COMMA}, {":", CPP_TOKEN_COLON}, {";", CPP_TOKEN_SEMICOLON}, {"/", CPP_TOKEN_DIV}, {"?", CPP_TOKEN_TERNARY_QMARK}, {"%", CPP_TOKEN_MOD}, {".", CPP_TOKEN_DOT}, }; FCPP_GLOBAL String_List ops = FCPP_STRING_LIST(op_strings); FCPP_GLOBAL String_And_Flag pp_op_strings[] = { {"##", CPP_PP_CONCAT}, {"#", CPP_PP_STRINGIFY}, }; FCPP_GLOBAL String_List pp_ops = FCPP_STRING_LIST(pp_op_strings); FCPP_GLOBAL String_And_Flag preprop_strings[] = { {"include", CPP_PP_INCLUDE}, {"INCLUDE", CPP_PP_INCLUDE}, {"ifndef", CPP_PP_IFNDEF}, {"IFNDEF", CPP_PP_IFNDEF}, {"define", CPP_PP_DEFINE}, {"DEFINE", CPP_PP_DEFINE}, {"import", CPP_PP_IMPORT}, {"IMPORT", CPP_PP_IMPORT}, {"pragma", CPP_PP_PRAGMA}, {"PRAGMA", CPP_PP_PRAGMA}, {"undef", CPP_PP_UNDEF}, {"UNDEF", CPP_PP_UNDEF}, {"endif", CPP_PP_ENDIF}, {"ENDIF", CPP_PP_ENDIF}, {"error", CPP_PP_ERROR}, {"ERROR", CPP_PP_ERROR}, {"ifdef", CPP_PP_IFDEF}, {"IFDEF", CPP_PP_IFDEF}, {"using", CPP_PP_USING}, {"USING", CPP_PP_USING}, {"else", CPP_PP_ELSE}, {"ELSE", CPP_PP_ELSE}, {"elif", CPP_PP_ELIF}, {"ELIF", CPP_PP_ELIF}, {"line", CPP_PP_LINE}, {"LINE", CPP_PP_LINE}, {"if", CPP_PP_IF}, {"IF", CPP_PP_IF}, }; FCPP_GLOBAL String_List preprops = FCPP_STRING_LIST(preprop_strings); #undef FCPP_STRING_LIST #endif // #ifndef FCPP_CPP_LEXER #ifdef FCPP_LEXER_IMPLEMENTATION #define _Assert FCPP_ASSERT #define _TentativeAssert FCPP_ASSERT FCPP_LINK Sub_Match_List_Result sub_match_list(Cpp_File file, int pos, String_List list, int sub_size){ Sub_Match_List_Result result; String str_main; char *str_check; int i,l; result.index = -1; result.new_pos = pos; str_main = make_string(file.data + pos, file.size - pos); if (sub_size > 0){ str_main = substr(str_main, 0, sub_size); for (i = 0; i < list.count; ++i){ str_check = list.data[i].str; if (match(str_main, str_check)){ result.index = i; result.new_pos = pos + sub_size; break; } } } else{ for (i = 0; i < list.count; ++i){ str_check = list.data[i].str; if (match_part(str_main, str_check, &l)){ result.index = i; result.new_pos = pos + l; break; } } } return result; } FCPP_LINK Seek_Result seek_unescaped_eol(char *data, int size, int pos){ Seek_Result result = {}; ++pos; while (pos < size){ if (data[pos] == '\\'){ if (pos + 1 < size && data[pos+1] == '\n'){ result.new_line = 1; ++pos; } else if (pos + 1 < size && data[pos+1] == '\r' && pos + 2 < size && data[pos+2] == '\n'){ result.new_line = 1; pos += 2; } } else if (data[pos] == '\n'){ break; } ++pos; } ++pos; result.pos = pos; return result; } FCPP_LINK Seek_Result seek_unescaped_delim(char *data, int size, int pos, char delim){ Seek_Result result = {}; bool escape = 0; ++pos; while (pos < size){ if (data[pos] == '\n'){ result.new_line = 1; } if (escape){ escape = 0; } else{ if (data[pos] == '\\'){ escape = 1; } else if (data[pos] == delim){ break; } } ++pos; } ++pos; result.pos = pos; return result; } FCPP_LINK Seek_Result seek_block_comment_end(char *data, int size, int pos){ Seek_Result result = {}; pos += 2; while (pos < size){ if (data[pos] == '*' && pos + 1 < size && data[pos+1] == '/'){ break; } if (data[pos] == '\n'){ result.new_line = 1; } ++pos; } pos += 2; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_whitespace(Cpp_File file, int pos){ Cpp_Read_Result result = {}; while (pos < file.size && char_is_whitespace(file.data[pos])){ if (file.data[pos] == '\n'){ result.newline = 1; } ++pos; } result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_junk_line(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; result.token.type = CPP_TOKEN_JUNK; bool comment_end = 0; while (pos < file.size && file.data[pos] != '\n'){ if (file.data[pos] == '/' && pos + 1 < file.size){ if (file.data[pos + 1] == '/' || file.data[pos + 1] == '*'){ comment_end = 1; break; } } ++pos; } if (comment_end){ result.pos = pos; result.token.size = pos - result.token.start; } else{ while (pos > 0 && file.data[pos - 1] == '\r'){ --pos; } if (pos > 0 && file.data[pos - 1] == '\\'){ --pos; } result.pos = pos; result.token.size = pos - result.token.start - 1; } return result; } FCPP_LINK Cpp_Read_Result cpp_read_operator(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.pos = pos; result.token.start = pos; Sub_Match_List_Result match; match = sub_match_list(file, result.token.start, ops, -1); if (match.index != -1){ result.pos = match.new_pos; result.token.size = result.pos - result.token.start; result.token.type = (Cpp_Token_Type)ops.data[match.index].flags; result.token.flags |= CPP_TFLAG_IS_OPERATOR; } else{ result.token.size = 1; result.token.type = CPP_TOKEN_JUNK; result.pos = pos + 1; } return result; } FCPP_LINK Cpp_Read_Result cpp_read_pp_operator(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.pos = pos; result.token.start = pos; Sub_Match_List_Result match; match = sub_match_list(file, result.token.start, pp_ops, -1); _Assert(match.index != -1); result.pos = match.new_pos; result.token.size = result.pos - result.token.start; result.token.type = (Cpp_Token_Type)pp_ops.data[match.index].flags; return result; } FCPP_LINK Cpp_Read_Result cpp_read_alpha_numeric(Cpp_File file, int pos, bool in_if_body){ Cpp_Read_Result result = {}; result.pos = pos; result.token.start = pos; while (result.pos < file.size && char_is_alpha_numeric(file.data[result.pos])){ ++result.pos; } result.token.size = result.pos - result.token.start; // TODO(allen): do better if (in_if_body){ String word; word.size = result.token.size; word.str = file.data + result.token.start; if (match(word, "defined")){ result.token.type = CPP_TOKEN_DEFINED; result.token.flags |= CPP_TFLAG_IS_OPERATOR; result.token.flags |= CPP_TFLAG_IS_KEYWORD; } } if (result.token.type == CPP_TOKEN_JUNK){ Sub_Match_List_Result match; match = sub_match_list(file, result.token.start, bool_lits, result.token.size); if (match.index != -1){ result.token.type = CPP_TOKEN_BOOLEAN_CONSTANT; result.token.flags |= CPP_TFLAG_IS_KEYWORD; } else{ match = sub_match_list(file, result.token.start, keywords, result.token.size); if (match.index != -1){ String_And_Flag data = keywords.data[match.index]; result.token.type = (Cpp_Token_Type)data.flags; result.token.flags |= CPP_TFLAG_IS_KEYWORD; } else{ result.token.type = CPP_TOKEN_IDENTIFIER; } } } return result; } FCPP_LINK Cpp_Read_Result cpp_read_number(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.pos = pos; result.token.start = pos; bool is_float = 0; bool is_integer = 0; bool is_oct = 0; bool is_hex = 0; bool is_zero = 0; if (file.data[pos] == '0'){ if (pos+1 < file.size){ char next = file.data[pos+1]; if (next == 'x'){ is_hex = 1; is_integer = 1; } else if (next == '.'){ is_float = 1; ++result.pos; } else if (next >= '0' && next <= '9'){ is_oct = 1; is_integer = 1; } else{ is_zero = 1; is_integer = 1; } } else{ is_zero = 1; is_integer = 1; } } else if (file.data[pos] == '.'){ is_float = 1; } if (is_zero){ ++result.pos; } else if (is_hex){ ++result.pos; char character; do{ ++result.pos; if (result.pos >= file.size){ break; } character = file.data[result.pos]; } while(char_is_hex(character)); } else if (is_oct){ char character; do{ ++result.pos; if (result.pos >= file.size){ break; } character = file.data[result.pos]; }while(char_is_numeric(character)); } else{ if (!is_float){ is_integer = 1; while (1){ ++result.pos; if (result.pos >= file.size){ break; } bool is_good = 0; char character = file.data[result.pos]; if (character >= '0' && character <= '9'){ is_good = 1; } else if (character == '.'){ is_integer = 0; is_float = 1; } if (!is_good){ break; } } } if (is_float){ bool e_mode = 0; bool e_minus = 0; bool is_good = 0; char character; while (1){ ++result.pos; if (result.pos >= file.size){ break; } is_good = 0; character = file.data[result.pos]; if (character >= '0' && character <= '9'){ is_good = 1; } else{ if (character == 'e' && !e_mode){ e_mode = 1; is_good = 1; } else if (character == '-' && e_mode && !e_minus){ e_minus = 1; is_good = 1; } } if (!is_good){ break; } } } } if (is_integer){ Sub_Match_List_Result match = sub_match_list(file, result.pos, int_sufs, -1); if (match.index != -1){ result.pos = match.new_pos; } result.token.type = CPP_TOKEN_INTEGER_CONSTANT; result.token.size = result.pos - result.token.start; } else if (is_float){ Sub_Match_List_Result match = sub_match_list(file, result.pos, float_sufs, -1); if (match.index != -1){ result.pos = match.new_pos; } result.token.type = CPP_TOKEN_FLOATING_CONSTANT; result.token.size = result.pos - result.token.start; } else{ _Assert(!"This shouldn't happen!"); } return result; } FCPP_LINK Cpp_Read_Result cpp_read_string_litteral(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; _Assert(file.data[pos] == '"'); Seek_Result seek = seek_unescaped_delim(file.data, file.size, pos, '"'); pos = seek.pos; if (seek.new_line){ result.token.flags |= CPP_TFLAG_MULTILINE; } result.token.size = pos - result.token.start; result.token.type = CPP_TOKEN_STRING_CONSTANT; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_character_litteral(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; _Assert(file.data[pos] == '\''); Seek_Result seek = seek_unescaped_delim(file.data, file.size, pos, '\''); pos = seek.pos; if (seek.new_line){ result.token.flags |= CPP_TFLAG_MULTILINE; } result.token.size = pos - result.token.start; result.token.type = CPP_TOKEN_CHARACTER_CONSTANT; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_line_comment(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; _Assert(file.data[pos] == '/' && file.data[pos + 1] == '/'); pos += 2; while (pos < file.size){ if (file.data[pos] == '\n'){ break; } if (file.data[pos] == '\\'){ if (pos + 1 < file.size && file.data[pos + 1] == '\n'){ ++pos; } else if (pos + 2 < file.size && file.data[pos + 1] == '\r' && file.data[pos + 2] == '\n'){ pos += 2; } } ++pos; } if (pos > 0 && file.data[pos-1] == '\r'){ --pos; } result.token.size = pos - result.token.start; result.token.type = CPP_TOKEN_COMMENT; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_block_comment(Cpp_File file, int pos){ Cpp_Read_Result result = {}; result.token.start = pos; _Assert(file.data[pos] == '/' && file.data[pos + 1] == '*'); pos += 2; while (pos < file.size){ if (file.data[pos] == '*' && pos + 1 < file.size && file.data[pos+1] == '/'){ break; } ++pos; } pos += 2; result.token.size = pos - result.token.start; result.token.type = CPP_TOKEN_COMMENT; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_preprocessor(Cpp_File file, int pos){ _Assert(file.data[pos] == '#'); Cpp_Read_Result result = {}; result.token.start = pos; result.token.type = CPP_PP_UNKNOWN; result.token.flags |= CPP_TFLAG_PP_DIRECTIVE; ++pos; while (pos < file.size && (file.data[pos] == ' ' || file.data[pos] == '\t')){ ++pos; } Sub_Match_List_Result match = sub_match_list(file, pos, preprops, -1); if (match.index != -1){ result.token.size = match.new_pos - result.token.start; result.token.type = (Cpp_Token_Type)preprops.data[match.index].flags; result.pos = match.new_pos; } else{ while (pos < file.size && !char_is_whitespace(file.data[pos])){ ++pos; } result.token.size = pos - result.token.start; result.pos = pos; } return result; } FCPP_LINK Cpp_Read_Result cpp_read_pp_include_file(Cpp_File file, int pos){ char start = file.data[pos]; _Assert(start == '<' || start == '"'); Cpp_Read_Result result = {}; result.token.start = pos; result.token.type = CPP_TOKEN_INCLUDE_FILE; result.token.flags |= CPP_TFLAG_PP_BODY; char end; if (start == '<'){ end = '>'; } else{ end = '"'; } ++pos; while (pos < file.size && file.data[pos] != end){ if (file.data[pos] == '\n'){ result.token.type = CPP_TOKEN_JUNK; result.token.flags |= CPP_TFLAG_BAD_ENDING; break; } if (file.data[pos] == '\\'){ // TODO(allen): Not sure that this is 100% correct. if (pos + 1 < file.size && file.data[pos + 1] == '\n'){ ++pos; result.token.flags |= CPP_TFLAG_MULTILINE; } else if (pos + 2 < file.size && file.data[pos + 1] == '\r' && file.data[pos + 2] == '\n'){ pos += 2; result.token.flags |= CPP_TFLAG_MULTILINE; } } ++pos; } if (result.token.type != CPP_TOKEN_JUNK){ if (pos < file.size){ ++pos; } } result.token.size = pos - result.token.start; result.pos = pos; return result; } FCPP_LINK Cpp_Read_Result cpp_read_pp_default_mode(Cpp_File file, int pos, bool in_if_body){ char current = file.data[pos]; Cpp_Read_Result result; if (char_is_numeric(current)){ result = cpp_read_number(file, pos); } else if (char_is_alpha(current)){ result = cpp_read_alpha_numeric(file, pos, in_if_body); } else if (current == '.'){ if (pos + 1 < file.size){ char next = file.data[pos + 1]; if (char_is_numeric(next)){ result = cpp_read_number(file, pos); } else{ result = cpp_read_operator(file, pos); } } else{ result = cpp_read_operator(file, pos); } } else if (current == '/'){ if (pos + 1 < file.size){ char next = file.data[pos + 1]; if (next == '/'){ result = cpp_read_line_comment(file, pos); } else if (next == '*'){ result = cpp_read_block_comment(file, pos); } else{ result = cpp_read_operator(file, pos); } } else{ result = cpp_read_operator(file, pos); } } else if (current == '"'){ result = cpp_read_string_litteral(file, pos); } else if (current == '\''){ result = cpp_read_character_litteral(file, pos); } else{ result = cpp_read_operator(file, pos); } return result; } FCPP_LINK Cpp_Token_Merge cpp_attempt_token_merge(Cpp_Token prev_token, Cpp_Token next_token){ Cpp_Token_Merge result = {}; if (next_token.type == CPP_TOKEN_COMMENT && prev_token.type == CPP_TOKEN_COMMENT && next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ result.did_merge = 1; prev_token.size = next_token.start + next_token.size - prev_token.start; result.new_token = prev_token; } else if (next_token.type == CPP_TOKEN_JUNK && prev_token.type == CPP_TOKEN_JUNK && next_token.flags == prev_token.flags && next_token.state_flags == prev_token.state_flags){ result.did_merge = 1; prev_token.size = next_token.start + next_token.size - prev_token.start; result.new_token = prev_token; } return result; } FCPP_LINK bool cpp_push_token_no_merge(Cpp_Token_Stack *token_stack, Cpp_Token token){ if (token_stack->count >= token_stack->max_count){ return 0; } token_stack->tokens[token_stack->count++] = token; return 1; } FCPP_LINK bool cpp_push_token_nonalloc(Cpp_Token_Stack *token_stack, Cpp_Token token){ Cpp_Token_Merge merge = {}; if (token_stack->count > 0){ Cpp_Token prev_token = token_stack->tokens[token_stack->count - 1]; merge = cpp_attempt_token_merge(prev_token, token); if (merge.did_merge){ token_stack->tokens[token_stack->count - 1] = merge.new_token; } } if (!merge.did_merge){ if (token_stack->count >= token_stack->max_count){ return 0; } token_stack->tokens[token_stack->count++] = token; } return 1; } FCPP_LINK Cpp_Read_Result cpp_lex_step(Cpp_File file, Cpp_Lex_Data *lex_data){ Cpp_Lex_Data lex = *lex_data; Cpp_Read_Result result = {}; bool has_result = 1; fcpp_u16 state_flags = cpp_token_set_pp_state(0, lex.pp_state); char current = file.data[lex.pos]; if (char_is_whitespace(current)){ result = cpp_read_whitespace(file, lex.pos); lex.pos = result.pos; if (result.newline && lex.pp_state != CPP_LEX_PP_DEFAULT){ lex.pp_state = CPP_LEX_PP_DEFAULT; } has_result = 0; } else{ if (lex.pp_state == CPP_LEX_PP_DEFAULT){ // TODO(allen): Not first hard of the line? Then it's junk. if (current == '#'){ result = cpp_read_preprocessor(file, lex.pos); lex.pos = result.pos; switch (result.token.type){ case CPP_PP_INCLUDE: case CPP_PP_IMPORT: case CPP_PP_USING: lex.pp_state = CPP_LEX_PP_INCLUDE; break; case CPP_PP_DEFINE: lex.pp_state = CPP_LEX_PP_MACRO_IDENTIFIER; break; case CPP_PP_UNDEF: case CPP_PP_IFDEF: case CPP_PP_IFNDEF: lex.pp_state = CPP_LEX_PP_IDENTIFIER; break; case CPP_PP_IF: case CPP_PP_ELIF: lex.pp_state = CPP_LEX_PP_BODY_IF; break; case CPP_PP_PRAGMA: lex.pp_state = CPP_LEX_PP_BODY; break; case CPP_PP_LINE: lex.pp_state = CPP_LEX_PP_NUMBER; break; case CPP_PP_ERROR: case CPP_PP_UNKNOWN: case CPP_PP_ELSE: case CPP_PP_ENDIF: lex.pp_state = CPP_LEX_PP_JUNK; break; } } else{ result = cpp_read_pp_default_mode(file, lex.pos); lex.pos = result.pos; } } else{ if (current == '\\'){ fcpp_i32 seek = lex.pos; ++seek; while (seek < file.size && file.data[seek] == '\r'){ ++seek; } if ((seek < file.size && file.data[seek] == '\n') || seek >= file.size){ lex.pos = seek + 1; has_result = 0; } else{ lex.pp_state = CPP_LEX_PP_JUNK; result.token.type = CPP_TOKEN_JUNK; result.token.start = lex.pos; result.token.size = 1; result.token.flags |= CPP_TFLAG_PP_BODY; lex.pos = seek; } } else{ if (lex.pp_state == CPP_LEX_PP_IDENTIFIER){ if (!char_is_alpha_numeric(current)){ has_result = 0; lex.pp_state = CPP_LEX_PP_JUNK; } else{ result = cpp_read_alpha_numeric(file, lex.pos); result.token.flags |= CPP_TFLAG_PP_BODY; lex.pos = result.pos; lex.pp_state = CPP_LEX_PP_JUNK; } } else if (lex.pp_state == CPP_LEX_PP_MACRO_IDENTIFIER){ if (!char_is_alpha_numeric(current)){ has_result = 0; lex.pp_state = CPP_LEX_PP_JUNK; } else{ result = cpp_read_alpha_numeric(file, lex.pos); result.token.flags |= CPP_TFLAG_PP_BODY; lex.pos = result.pos; lex.pp_state = CPP_LEX_PP_BODY; } } else if (lex.pp_state == CPP_LEX_PP_INCLUDE){ if (current != '"' && current != '<'){ has_result = 0; lex.pp_state = CPP_LEX_PP_JUNK; } else{ result = cpp_read_pp_include_file(file, lex.pos); lex.pos = result.pos; lex.pp_state = CPP_LEX_PP_JUNK; } } else if (lex.pp_state == CPP_LEX_PP_BODY){ if (current == '#'){ result = cpp_read_pp_operator(file, lex.pos); } else{ result = cpp_read_pp_default_mode(file, lex.pos); } lex.pos = result.pos; result.token.flags |= CPP_TFLAG_PP_BODY; } else if (lex.pp_state == CPP_LEX_PP_BODY_IF){ if (current == '#'){ result = cpp_read_pp_operator(file, lex.pos); } else{ result = cpp_read_pp_default_mode(file, lex.pos, 1); } lex.pos = result.pos; result.token.flags |= CPP_TFLAG_PP_BODY; } else if (lex.pp_state == CPP_LEX_PP_NUMBER){ if (!char_is_numeric(current)){ has_result = 0; lex.pp_state = CPP_LEX_PP_JUNK; } else{ result = cpp_read_number(file, lex.pos); lex.pos = result.pos; result.token.flags |= CPP_TFLAG_PP_BODY; lex.pp_state = CPP_LEX_PP_INCLUDE; } } else{ bool took_comment = 0; if (current == '/' && lex.pos + 1 < file.size){ if (file.data[lex.pos + 1] == '/'){ result = cpp_read_line_comment(file, lex.pos); lex.pp_state = CPP_LEX_PP_DEFAULT; lex.pos = result.pos; took_comment = 1; }else if (file.data[lex.pos + 1] == '*'){ result = cpp_read_block_comment(file, lex.pos); lex.pos = result.pos; took_comment = 1; } } if (!took_comment){ result = cpp_read_junk_line(file, lex.pos); lex.pos = result.pos; result.token.flags |= CPP_TFLAG_PP_BODY; } } } } } result.token.state_flags = state_flags; result.has_result = has_result; *lex_data = lex; return result; } FCPP_LINK int cpp_lex_file_token_count(Cpp_File file){ int count = 0; Cpp_Lex_Data lex = {}; Cpp_Token token = {}; while (lex.pos < file.size){ Cpp_Read_Result step_result = cpp_lex_step(file, &lex); if (step_result.has_result){ if (count > 0){ Cpp_Token_Merge merge = cpp_attempt_token_merge(token, step_result.token); if (merge.did_merge){ token = merge.new_token; } else{ token = step_result.token; ++count; } } else{ token = step_result.token; ++count; } } } return count; } FCPP_LINK Cpp_Lex_Data cpp_lex_file_nonalloc(Cpp_File file, Cpp_Token_Stack *token_stack_out, Cpp_Lex_Data data){ while (data.pos < file.size){ Cpp_Lex_Data prev_lex = data; Cpp_Read_Result step_result = cpp_lex_step(file, &data); if (step_result.has_result){ if (!cpp_push_token_nonalloc(token_stack_out, step_result.token)){ data = prev_lex; return data; } } } data.complete = 1; return data; } FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *token_stack, int pos){ int first, last; first = 0; last = token_stack->count; Cpp_Get_Token_Result result = {}; while (1){ result.token_index = (first + last)/2; int this_start = token_stack->tokens[result.token_index].start; int next_start; if (result.token_index + 1 < token_stack->count){ next_start = token_stack->tokens[result.token_index+1].start; } else{ next_start = this_start + token_stack->tokens[result.token_index].size; } if (this_start <= pos && pos < next_start){ break; } else if (pos < this_start){ last = result.token_index; } else{ first = result.token_index + 1; } if (first == last){ result.token_index = first; break; } } if (result.token_index == token_stack->count){ --result.token_index; result.in_whitespace = 1; } else{ Cpp_Token *token = token_stack->tokens + result.token_index; if (token->start + token->size <= pos){ result.in_whitespace = 1; } } return result; } FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end){ Cpp_Get_Token_Result result = cpp_get_token(stack, end); if (result.token_index < 0) result.token_index = 0; else if (end > stack->tokens[result.token_index].start) ++result.token_index; return result.token_index; } FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int amount){ int count = stack->count; Cpp_Token *token = stack->tokens + from_token_i; for (int i = from_token_i; i < count; ++i, ++token){ token->start += amount; } } FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance){ Cpp_Relex_State state; state.file = file; state.stack = stack; state.start = start; state.end = end; state.amount = amount; state.tolerance = tolerance; Cpp_Get_Token_Result result = cpp_get_token(stack, start); if (result.token_index <= 0){ state.start_token_i = 0; } else{ state.start_token_i = result.token_index-1; } result = cpp_get_token(stack, end); if (result.token_index < 0) result.token_index = 0; else if (end > stack->tokens[result.token_index].start) ++result.token_index; state.end_token_i = result.token_index; state.relex_start = stack->tokens[state.start_token_i].start; if (start < state.relex_start) state.relex_start = start; state.space_request = state.end_token_i - state.start_token_i + tolerance + 1; return state; } inline Cpp_Token cpp__get_token(Cpp_Token_Stack *stack, Cpp_Token *tokens, int size, int index){ Cpp_Token result; if (index < stack->count){ result = tokens[index]; } else{ result.start = size; result.size = 0; result.type = CPP_TOKEN_EOF; result.flags = 0; result.state_flags = 0; } return result; } FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State *state, Cpp_Token_Stack *relex_stack, int *relex_end){ Cpp_Token_Stack *stack = state->stack; Cpp_Token *tokens = stack->tokens; cpp_shift_token_starts(stack, state->end_token_i, state->amount); Cpp_Lex_Data lex = {}; lex.pp_state = cpp_token_get_pp_state(tokens[state->start_token_i].state_flags); lex.pos = state->relex_start; int relex_end_i = state->end_token_i; Cpp_Token match_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); Cpp_Token end_token = match_token; bool went_too_far = 0; for (;;){ Cpp_Read_Result read = cpp_lex_step(state->file, &lex); if (read.has_result){ if (read.token.start == match_token.start && read.token.size == match_token.size && read.token.flags == match_token.flags && read.token.state_flags == match_token.state_flags){ break; } cpp_push_token_nonalloc(relex_stack, read.token); while (lex.pos > end_token.start && relex_end_i < stack->count){ ++relex_end_i; end_token = cpp__get_token(stack, tokens, state->file.size, relex_end_i); } if (relex_stack->count == relex_stack->max_count){ went_too_far = 1; break; } } if (lex.pos >= state->file.size) break; } if (!went_too_far){ if (relex_stack->count > 0){ if (state->start_token_i > 0){ Cpp_Token_Merge merge = cpp_attempt_token_merge(tokens[state->start_token_i - 1], relex_stack->tokens[0]); if (merge.did_merge){ --state->start_token_i; relex_stack->tokens[0] = merge.new_token; } } if (relex_end_i < state->stack->count){ Cpp_Token_Merge merge = cpp_attempt_token_merge(relex_stack->tokens[relex_stack->count-1], tokens[relex_end_i]); if (merge.did_merge){ ++relex_end_i; relex_stack->tokens[relex_stack->count-1] = merge.new_token; } } } *relex_end = relex_end_i; } else{ cpp_shift_token_starts(stack, state->end_token_i, -state->amount); } return went_too_far; } #ifndef FCPP_FORBID_MALLOC FCPP_LINK Cpp_Token_Stack cpp_make_token_stack(int starting_max){ Cpp_Token_Stack token_stack; token_stack.count = 0; token_stack.max_count = starting_max; token_stack.tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*starting_max); return token_stack; } FCPP_LINK void cpp_free_token_stack(Cpp_Token_Stack token_stack){ FCPP_FREE_MEMORY(token_stack.tokens); } FCPP_LINK void cpp_resize_token_stack(Cpp_Token_Stack *token_stack, int new_max){ Cpp_Token *new_tokens = (Cpp_Token*)FCPP_GET_MEMORY(sizeof(Cpp_Token)*new_max); if (new_tokens){ FCPP_MEM_COPY(new_tokens, token_stack->tokens, sizeof(Cpp_Token)*token_stack->count); FCPP_FREE_MEMORY(token_stack->tokens); token_stack->tokens = new_tokens; token_stack->max_count = new_max; } } FCPP_LINK void cpp_push_token(Cpp_Token_Stack *token_stack, Cpp_Token token){ if (!cpp_push_token_nonalloc(token_stack, token)){ int new_max = 2*token_stack->max_count + 1; cpp_resize_token_stack(token_stack, new_max); bool result = cpp_push_token_nonalloc(token_stack, token); _Assert(result); } } FCPP_LINK void cpp_lex_file(Cpp_File file, Cpp_Token_Stack *token_stack_out){ Cpp_Lex_Data lex = {}; while (lex.pos < file.size){ Cpp_Read_Result step_result = cpp_lex_step(file, &lex); if (step_result.has_result){ cpp_push_token(token_stack_out, step_result.token); } } } FCPP_LINK bool cpp_relex_file_limited(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance){ #if 0 int start_token_i, end_token_i; Cpp_Get_Token_Result get_result = cpp_get_token(token_stack, start_i); start_token_i = get_result.token_index; get_result = cpp_get_token(token_stack, end_i); end_token_i = get_result.token_index; if (end_token_i == -1){ end_token_i = 0; } else if (end > token_stack->tokens[end_token_i].start){ ++end_token_i; } cpp_shift_token_starts(token_stack, end_token_i, amount); int relex_start_i = start_token_i - 1; if (relex_start_i < 0){ relex_start_i = 0; } int end_guess_i = end_token_i + 1; if (end_guess_i > token_stack->count){ --end_guess_i; } #endif int relex_start_i; int end_token_i, end_guess_i; { Cpp_Get_Token_Result result = cpp_get_token(stack, start); if (result.token_index <= 0){ relex_start_i = 0; } else{ relex_start_i = result.token_index-1; } result = cpp_get_token(stack, end); if (result.token_index < 0) result.token_index = 0; else if (end > stack->tokens[result.token_index].start) ++result.token_index; end_token_i = result.token_index; end_guess_i = result.token_index+1; } int relex_start = stack->tokens[relex_start_i].start; if (start < relex_start) relex_start = start; cpp_shift_token_starts(stack, end_token_i, amount); Cpp_Token_Stack relex_stack = cpp_make_token_stack((end_guess_i - relex_start_i + 1) * 3 / 2); Cpp_Lex_Data lex = {}; lex.pp_state = cpp_token_get_pp_state(stack->tokens[relex_start_i].state_flags); lex.pos = relex_start; bool went_too_far = 0; while (1){ Cpp_Read_Result result = cpp_lex_step(file, &lex); if (result.has_result){ if (end_guess_i < stack->count && result.token.start == stack->tokens[end_guess_i].start && result.token.size == stack->tokens[end_guess_i].size && result.token.flags == stack->tokens[end_guess_i].flags && result.token.state_flags == stack->tokens[end_guess_i].state_flags){ break; } else{ cpp_push_token(&relex_stack, result.token); while (lex.pos > stack->tokens[end_guess_i].start && end_guess_i < stack->count){ ++end_guess_i; } } } if (lex.pos >= file.size){ break; } if (tolerance >= 0 && relex_stack.count + relex_start_i >= end_guess_i + tolerance){ went_too_far = 1; break; } } if (!went_too_far){ int relex_end_i = end_guess_i; if (relex_stack.count > 0){ if (relex_start_i > 0){ Cpp_Token_Merge merge = cpp_attempt_token_merge(stack->tokens[relex_start_i - 1], relex_stack.tokens[0]); if (merge.did_merge){ --relex_start_i; relex_stack.tokens[0] = merge.new_token; } } if (relex_end_i < stack->count){ Cpp_Token_Merge merge = cpp_attempt_token_merge(relex_stack.tokens[relex_stack.count - 1], stack->tokens[relex_end_i]); if (merge.did_merge){ ++relex_end_i; relex_stack.tokens[relex_stack.count - 1] = merge.new_token; } } } int token_delete_amount = relex_end_i - relex_start_i; int token_shift_amount = relex_stack.count - token_delete_amount; if (token_shift_amount != 0){ int new_token_count = stack->count + token_shift_amount; if (new_token_count > stack->max_count){ int new_max = 2*stack->max_count + 1; while (new_token_count > new_max){ new_max = 2*new_max + 1; } cpp_resize_token_stack(stack, new_max); } if (relex_end_i < stack->count){ FCPP_MEM_MOVE(stack->tokens + relex_end_i + token_shift_amount, stack->tokens + relex_end_i, sizeof(Cpp_Token)*(stack->count - relex_end_i)); } stack->count += token_shift_amount; } FCPP_MEM_COPY(stack->tokens + relex_start_i, relex_stack.tokens, sizeof(Cpp_Token)*relex_stack.count); cpp_free_token_stack(relex_stack); } else{ cpp_shift_token_starts(stack, end_token_i, -amount); cpp_free_token_stack(relex_stack); } return went_too_far; } #endif #undef _Assert #undef _TentativeAssert #undef FCPP_LEXER_IMPLEMENTATION #endif // #ifdef FCPP_LEXER_IMPLEMENTATION // BOTTOM