bug fixes and new lexer started

master
Allen Webster 2016-03-09 21:59:58 -05:00
parent 6dabe0a995
commit 19dd5af51a
9 changed files with 795 additions and 303 deletions

View File

@ -445,8 +445,10 @@ isearch(Application_Links *app, int start_reversed){
made_change = 1;
}
else if (in.key.keycode == key_back){
--bar.string.size;
made_change = 1;
if (bar.string.size > 0){
--bar.string.size;
made_change = 1;
}
}
int step_forward = 0;

View File

@ -50,6 +50,8 @@ inline bool char_is_slash(char c) { return (c == '\\' || c == '/'); }
inline char char_to_upper(char c) { return (c >= 'a' && c <= 'z') ? c + (char)('A' - 'a') : c; }
inline char char_to_lower(char c) { return (c >= 'A' && c <= 'Z') ? c - (char)('A' - 'a') : c; }
inline int char_to_int(char c) { return (c - '0'); }
inline char int_to_char(int x) { return (char)(x + '0'); }
inline bool char_is_whitespace(char c) { return (c == ' ' || c == '\n' || c == '\r' || c == '\t'); }
inline bool char_is_white_not_r(char c) { return (c == ' ' || c == '\n' || c == '\t'); }
@ -173,8 +175,10 @@ FCPP_LINK int compare(String a, String b);
FCPP_LINK int reverse_seek_slash(String str);
FCPP_LINK int reverse_seek_slash(String str, int start_pos);
inline bool get_front_of_directory(String *dest, String dir) { return append_checked(dest, substr(dir, reverse_seek_slash(dir) + 1)); }
inline bool get_path_of_directory(String *dest, String dir) { return append_checked(dest, substr(dir, 0, reverse_seek_slash(dir) + 1)); }
inline String front_of_directory(String dir) { return substr(dir, reverse_seek_slash(dir) + 1); }
inline String path_of_directory(String dir) { return substr(dir, 0, reverse_seek_slash(dir) + 1); }
inline bool get_front_of_directory(String *dest, String dir) { return append_checked(dest, front_of_directory(dir)); }
inline bool get_path_of_directory(String *dest, String dir) { return append_checked(dest, path_of_directory(dir)); }
FCPP_LINK bool set_last_folder(String *dir, char *folder_name, char slash);
FCPP_LINK bool set_last_folder(String *dir, String folder_name, char slash);
FCPP_LINK String file_extension(String str);

View File

@ -63,175 +63,7 @@ NOTES ON USE:
#ifndef FCPP_LEXER_INC
#define FCPP_LEXER_INC
enum Cpp_Token_Type{
CPP_TOKEN_JUNK,
CPP_TOKEN_COMMENT,
CPP_TOKEN_KEY_TYPE,
CPP_TOKEN_KEY_MODIFIER,
CPP_TOKEN_KEY_QUALIFIER,
CPP_TOKEN_KEY_OPERATOR, // NOTE(allen): This type is not actually stored in tokens
CPP_TOKEN_KEY_CONTROL_FLOW,
CPP_TOKEN_KEY_CAST,
CPP_TOKEN_KEY_TYPE_DECLARATION,
CPP_TOKEN_KEY_ACCESS,
CPP_TOKEN_KEY_LINKAGE,
CPP_TOKEN_KEY_OTHER,
CPP_TOKEN_IDENTIFIER,
CPP_TOKEN_INTEGER_CONSTANT,
CPP_TOKEN_CHARACTER_CONSTANT,
CPP_TOKEN_FLOATING_CONSTANT,
CPP_TOKEN_STRING_CONSTANT,
CPP_TOKEN_BOOLEAN_CONSTANT,
CPP_TOKEN_STATIC_ASSERT,
CPP_TOKEN_BRACKET_OPEN,
CPP_TOKEN_BRACKET_CLOSE,
CPP_TOKEN_PARENTHESE_OPEN,
CPP_TOKEN_PARENTHESE_CLOSE,
CPP_TOKEN_BRACE_OPEN,
CPP_TOKEN_BRACE_CLOSE,
CPP_TOKEN_SEMICOLON,
CPP_TOKEN_ELLIPSIS,
// NOTE(allen): Ambiguous tokens, lexer only,
// parser figures out the real meaning
CPP_TOKEN_STAR,
CPP_TOKEN_AMPERSAND,
CPP_TOKEN_TILDE,
CPP_TOKEN_PLUS,
CPP_TOKEN_MINUS,
CPP_TOKEN_INCREMENT,
CPP_TOKEN_DECREMENT,
// NOTE(allen): Precedence 1, LtoR
CPP_TOKEN_SCOPE,
// NOTE(allen): Precedence 2, LtoR
CPP_TOKEN_POSTINC, // from increment, parser only
CPP_TOKEN_POSTDEC, // from decrement, parser only
CPP_TOKEN_FUNC_STYLE_CAST, // parser only
CPP_TOKEN_CPP_STYLE_CAST,
CPP_TOKEN_CALL, // from open paren, parser only
CPP_TOKEN_INDEX, // from bracket open, parser only
CPP_TOKEN_DOT,
CPP_TOKEN_ARROW,
// NOTE(allen): Precedence 3, RtoL
CPP_TOKEN_PREINC, // from increment, parser only
CPP_TOKEN_PREDEC, // from decrement, parser only
CPP_TOKEN_POSITIVE, // from plus, parser only
CPP_TOKEN_NEGAITVE, // from minus, parser only
CPP_TOKEN_NOT,
CPP_TOKEN_BIT_NOT, // from tilde, direct from 'compl'
CPP_TOKEN_CAST, // from open paren, parser only
CPP_TOKEN_DEREF, // from star, parser only
CPP_TOKEN_TYPE_PTR, // from star, parser only
CPP_TOKEN_ADDRESS, // from ampersand, parser only
CPP_TOKEN_TYPE_REF, // from ampersand, parser only
CPP_TOKEN_SIZEOF,
CPP_TOKEN_ALIGNOF,
CPP_TOKEN_DECLTYPE,
CPP_TOKEN_TYPEID,
CPP_TOKEN_NEW,
CPP_TOKEN_DELETE,
CPP_TOKEN_NEW_ARRAY, // from new and bracket open, parser only
CPP_TOKEN_DELETE_ARRAY, // from delete and bracket open, parser only
// NOTE(allen): Precedence 4, LtoR
CPP_TOKEN_PTRDOT,
CPP_TOKEN_PTRARROW,
// NOTE(allen): Precedence 5, LtoR
CPP_TOKEN_MUL, // from start, parser only
CPP_TOKEN_DIV,
CPP_TOKEN_MOD,
// NOTE(allen): Precedence 6, LtoR
CPP_TOKEN_ADD, // from plus, parser only
CPP_TOKEN_SUB, // from minus, parser only
// NOTE(allen): Precedence 7, LtoR
CPP_TOKEN_LSHIFT,
CPP_TOKEN_RSHIFT,
// NOTE(allen): Precedence 8, LtoR
CPP_TOKEN_LESS,
CPP_TOKEN_GRTR,
CPP_TOKEN_GRTREQ,
CPP_TOKEN_LESSEQ,
// NOTE(allen): Precedence 9, LtoR
CPP_TOKEN_EQEQ,
CPP_TOKEN_NOTEQ,
// NOTE(allen): Precedence 10, LtoR
CPP_TOKEN_BIT_AND, // from ampersand, direct from 'bitand'
// NOTE(allen): Precedence 11, LtoR
CPP_TOKEN_BIT_XOR,
// NOTE(allen): Precedence 12, LtoR
CPP_TOKEN_BIT_OR,
// NOTE(allen): Precedence 13, LtoR
CPP_TOKEN_AND,
// NOTE(allen): Precedence 14, LtoR
CPP_TOKEN_OR,
// NOTE(allen): Precedence 15, RtoL
CPP_TOKEN_TERNARY_QMARK,
CPP_TOKEN_COLON,
CPP_TOKEN_THROW,
CPP_TOKEN_EQ,
CPP_TOKEN_ADDEQ,
CPP_TOKEN_SUBEQ,
CPP_TOKEN_MULEQ,
CPP_TOKEN_DIVEQ,
CPP_TOKEN_MODEQ,
CPP_TOKEN_LSHIFTEQ,
CPP_TOKEN_RSHIFTEQ,
CPP_TOKEN_ANDEQ,
CPP_TOKEN_OREQ,
CPP_TOKEN_XOREQ,
// NOTE(allen): Precedence 16, LtoR
CPP_TOKEN_COMMA,
CPP_PP_INCLUDE,
CPP_PP_DEFINE,
CPP_PP_UNDEF,
CPP_PP_IF,
CPP_PP_IFDEF,
CPP_PP_IFNDEF,
CPP_PP_ELSE,
CPP_PP_ELIF,
CPP_PP_ENDIF,
CPP_PP_ERROR,
CPP_PP_IMPORT,
CPP_PP_USING,
CPP_PP_LINE,
CPP_PP_PRAGMA,
CPP_PP_STRINGIFY,
CPP_PP_CONCAT,
CPP_PP_UNKNOWN,
CPP_TOKEN_DEFINED,
CPP_TOKEN_INCLUDE_FILE,
CPP_TOKEN_ERROR_MESSAGE,
// NOTE(allen): used in the parser
CPP_TOKEN_EOF
};
// TODO(allen): This is a dumb redundant type... probably just
// move towards using String for this everywhere eventually.
struct Cpp_File{
char *data;
int size;
};
#include "4cpp_lexer_types.h"
Cpp_File
data_as_cpp_file(Data data){
@ -241,71 +73,6 @@ data_as_cpp_file(Data data){
return(result);
}
struct Cpp_Token{
Cpp_Token_Type type;
fcpp_i32 start, size;
fcpp_u16 state_flags;
fcpp_u16 flags;
};
enum Cpp_Token_Flag{
CPP_TFLAG_IGNORE = 1 << 0,
CPP_TFLAG_PP_DIRECTIVE = 1 << 1,
CPP_TFLAG_PP_BODY = 1 << 2,
CPP_TFLAG_BAD_ENDING = 1 << 3,
CPP_TFLAG_MULTILINE = 1 << 4,
CPP_TFLAG_PARAMETERIZED = 1 << 5,
CPP_TFLAG_IS_OPERATOR = 1 << 6,
CPP_TFLAG_IS_KEYWORD = 1 << 7
};
enum Cpp_Preprocessor_State{
CPP_LEX_PP_DEFAULT,
CPP_LEX_PP_IDENTIFIER,
CPP_LEX_PP_MACRO_IDENTIFIER,
CPP_LEX_PP_INCLUDE,
CPP_LEX_PP_BODY,
CPP_LEX_PP_BODY_IF,
CPP_LEX_PP_NUMBER,
CPP_LEX_PP_ERROR,
CPP_LEX_PP_JUNK,
// NEVER ADD BELOW THIS
CPP_LEX_PP_COUNT
};
struct Cpp_Lex_Data{
Cpp_Preprocessor_State pp_state;
fcpp_i32 pos;
fcpp_bool32 complete;
};
struct Cpp_Read_Result{
Cpp_Token token;
fcpp_i32 pos;
fcpp_bool8 newline;
fcpp_bool8 has_result;
};
struct Cpp_Token_Stack{
Cpp_Token *tokens;
int count, max_count;
};
struct Cpp_Token_Merge{
Cpp_Token new_token;
fcpp_bool32 did_merge;
};
struct Seek_Result{
fcpp_i32 pos;
fcpp_bool32 new_line;
};
struct Cpp_Get_Token_Result{
fcpp_i32 token_index;
fcpp_bool32 in_whitespace;
};
// TODO(allen): revisit this keyword data declaration system
struct String_And_Flag{
char *str;
@ -383,17 +150,6 @@ FCPP_LINK Cpp_Get_Token_Result cpp_get_token(Cpp_Token_Stack *stack, int pos);
FCPP_LINK int cpp_get_end_token(Cpp_Token_Stack *stack, int end);
FCPP_LINK void cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token, int amount);
struct Cpp_Relex_State{
Cpp_File file;
Cpp_Token_Stack *stack;
int start, end, amount;
int start_token_i;
int end_token_i;
int relex_start;
int tolerance;
int space_request;
};
FCPP_LINK Cpp_Relex_State cpp_relex_nonalloc_start(Cpp_File file, Cpp_Token_Stack *stack, int start, int end, int amount, int tolerance);
FCPP_LINK bool cpp_relex_nonalloc_main(Cpp_Relex_State state, Cpp_Token_Stack *stack);
@ -1614,14 +1370,6 @@ cpp_get_token(Cpp_Token_Stack *token_stack, int pos){
return result;
}
FCPP_LINK int
cpp_get_end_token(Cpp_Token_Stack *stack, int end){
Cpp_Get_Token_Result result = cpp_get_token(stack, end);
if (result.token_index < 0) result.token_index = 0;
else if (end > stack->tokens[result.token_index].start) ++result.token_index;
return result.token_index;
}
FCPP_LINK void
cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int amount){
int count = stack->count;

256
4cpp_lexer_types.h Normal file
View File

@ -0,0 +1,256 @@
// TOP
#ifndef FCPP_LEXER_TYPES_INC
#define FCPP_LEXER_TYPES_INC
enum Cpp_Token_Type{
CPP_TOKEN_JUNK,
CPP_TOKEN_COMMENT,
CPP_TOKEN_KEY_TYPE,
CPP_TOKEN_KEY_MODIFIER,
CPP_TOKEN_KEY_QUALIFIER,
CPP_TOKEN_KEY_OPERATOR, // NOTE(allen): This type is not actually stored in tokens
CPP_TOKEN_KEY_CONTROL_FLOW,
CPP_TOKEN_KEY_CAST,
CPP_TOKEN_KEY_TYPE_DECLARATION,
CPP_TOKEN_KEY_ACCESS,
CPP_TOKEN_KEY_LINKAGE,
CPP_TOKEN_KEY_OTHER,
CPP_TOKEN_IDENTIFIER,
CPP_TOKEN_INTEGER_CONSTANT,
CPP_TOKEN_CHARACTER_CONSTANT,
CPP_TOKEN_FLOATING_CONSTANT,
CPP_TOKEN_STRING_CONSTANT,
CPP_TOKEN_BOOLEAN_CONSTANT,
CPP_TOKEN_STATIC_ASSERT,
CPP_TOKEN_BRACKET_OPEN,
CPP_TOKEN_BRACKET_CLOSE,
CPP_TOKEN_PARENTHESE_OPEN,
CPP_TOKEN_PARENTHESE_CLOSE,
CPP_TOKEN_BRACE_OPEN,
CPP_TOKEN_BRACE_CLOSE,
CPP_TOKEN_SEMICOLON,
CPP_TOKEN_ELLIPSIS,
// NOTE(allen): Ambiguous tokens, lexer only,
// parser figures out the real meaning
CPP_TOKEN_STAR,
CPP_TOKEN_AMPERSAND,
CPP_TOKEN_TILDE,
CPP_TOKEN_PLUS,
CPP_TOKEN_MINUS,
CPP_TOKEN_INCREMENT,
CPP_TOKEN_DECREMENT,
// NOTE(allen): Precedence 1, LtoR
CPP_TOKEN_SCOPE,
// NOTE(allen): Precedence 2, LtoR
CPP_TOKEN_POSTINC, // from increment, parser only
CPP_TOKEN_POSTDEC, // from decrement, parser only
CPP_TOKEN_FUNC_STYLE_CAST, // parser only
CPP_TOKEN_CPP_STYLE_CAST,
CPP_TOKEN_CALL, // from open paren, parser only
CPP_TOKEN_INDEX, // from bracket open, parser only
CPP_TOKEN_DOT,
CPP_TOKEN_ARROW,
// NOTE(allen): Precedence 3, RtoL
CPP_TOKEN_PREINC, // from increment, parser only
CPP_TOKEN_PREDEC, // from decrement, parser only
CPP_TOKEN_POSITIVE, // from plus, parser only
CPP_TOKEN_NEGAITVE, // from minus, parser only
CPP_TOKEN_NOT,
CPP_TOKEN_BIT_NOT, // from tilde, direct from 'compl'
CPP_TOKEN_CAST, // from open paren, parser only
CPP_TOKEN_DEREF, // from star, parser only
CPP_TOKEN_TYPE_PTR, // from star, parser only
CPP_TOKEN_ADDRESS, // from ampersand, parser only
CPP_TOKEN_TYPE_REF, // from ampersand, parser only
CPP_TOKEN_SIZEOF,
CPP_TOKEN_ALIGNOF,
CPP_TOKEN_DECLTYPE,
CPP_TOKEN_TYPEID,
CPP_TOKEN_NEW,
CPP_TOKEN_DELETE,
CPP_TOKEN_NEW_ARRAY, // from new and bracket open, parser only
CPP_TOKEN_DELETE_ARRAY, // from delete and bracket open, parser only
// NOTE(allen): Precedence 4, LtoR
CPP_TOKEN_PTRDOT,
CPP_TOKEN_PTRARROW,
// NOTE(allen): Precedence 5, LtoR
CPP_TOKEN_MUL, // from start, parser only
CPP_TOKEN_DIV,
CPP_TOKEN_MOD,
// NOTE(allen): Precedence 6, LtoR
CPP_TOKEN_ADD, // from plus, parser only
CPP_TOKEN_SUB, // from minus, parser only
// NOTE(allen): Precedence 7, LtoR
CPP_TOKEN_LSHIFT,
CPP_TOKEN_RSHIFT,
// NOTE(allen): Precedence 8, LtoR
CPP_TOKEN_LESS,
CPP_TOKEN_GRTR,
CPP_TOKEN_GRTREQ,
CPP_TOKEN_LESSEQ,
// NOTE(allen): Precedence 9, LtoR
CPP_TOKEN_EQEQ,
CPP_TOKEN_NOTEQ,
// NOTE(allen): Precedence 10, LtoR
CPP_TOKEN_BIT_AND, // from ampersand, direct from 'bitand'
// NOTE(allen): Precedence 11, LtoR
CPP_TOKEN_BIT_XOR,
// NOTE(allen): Precedence 12, LtoR
CPP_TOKEN_BIT_OR,
// NOTE(allen): Precedence 13, LtoR
CPP_TOKEN_AND,
// NOTE(allen): Precedence 14, LtoR
CPP_TOKEN_OR,
// NOTE(allen): Precedence 15, RtoL
CPP_TOKEN_TERNARY_QMARK,
CPP_TOKEN_COLON,
CPP_TOKEN_THROW,
CPP_TOKEN_EQ,
CPP_TOKEN_ADDEQ,
CPP_TOKEN_SUBEQ,
CPP_TOKEN_MULEQ,
CPP_TOKEN_DIVEQ,
CPP_TOKEN_MODEQ,
CPP_TOKEN_LSHIFTEQ,
CPP_TOKEN_RSHIFTEQ,
CPP_TOKEN_ANDEQ,
CPP_TOKEN_OREQ,
CPP_TOKEN_XOREQ,
// NOTE(allen): Precedence 16, LtoR
CPP_TOKEN_COMMA,
CPP_PP_INCLUDE,
CPP_PP_DEFINE,
CPP_PP_UNDEF,
CPP_PP_IF,
CPP_PP_IFDEF,
CPP_PP_IFNDEF,
CPP_PP_ELSE,
CPP_PP_ELIF,
CPP_PP_ENDIF,
CPP_PP_ERROR,
CPP_PP_IMPORT,
CPP_PP_USING,
CPP_PP_LINE,
CPP_PP_PRAGMA,
CPP_PP_STRINGIFY,
CPP_PP_CONCAT,
CPP_PP_UNKNOWN,
CPP_TOKEN_DEFINED,
CPP_TOKEN_INCLUDE_FILE,
CPP_TOKEN_ERROR_MESSAGE,
// NOTE(allen): used in the parser
CPP_TOKEN_EOF
};
// TODO(allen): This is a dumb redundant type... probably just
// move towards using String for this everywhere eventually.
struct Cpp_File{
char *data;
int size;
};
struct Cpp_Token{
Cpp_Token_Type type;
fcpp_i32 start, size;
fcpp_u16 state_flags;
fcpp_u16 flags;
};
enum Cpp_Token_Flag{
CPP_TFLAG_IGNORE = 1 << 0,
CPP_TFLAG_PP_DIRECTIVE = 1 << 1,
CPP_TFLAG_PP_BODY = 1 << 2,
CPP_TFLAG_BAD_ENDING = 1 << 3,
CPP_TFLAG_MULTILINE = 1 << 4,
CPP_TFLAG_PARAMETERIZED = 1 << 5,
CPP_TFLAG_IS_OPERATOR = 1 << 6,
CPP_TFLAG_IS_KEYWORD = 1 << 7
};
enum Cpp_Preprocessor_State{
CPP_LEX_PP_DEFAULT,
CPP_LEX_PP_IDENTIFIER,
CPP_LEX_PP_MACRO_IDENTIFIER,
CPP_LEX_PP_INCLUDE,
CPP_LEX_PP_BODY,
CPP_LEX_PP_BODY_IF,
CPP_LEX_PP_NUMBER,
CPP_LEX_PP_ERROR,
CPP_LEX_PP_JUNK,
// NEVER ADD BELOW THIS
CPP_LEX_PP_COUNT
};
struct Cpp_Lex_Data{
Cpp_Preprocessor_State pp_state;
fcpp_i32 pos;
fcpp_bool32 complete;
};
struct Cpp_Read_Result{
Cpp_Token token;
fcpp_i32 pos;
fcpp_bool8 newline;
fcpp_bool8 has_result;
};
struct Cpp_Token_Stack{
Cpp_Token *tokens;
int count, max_count;
};
struct Cpp_Token_Merge{
Cpp_Token new_token;
fcpp_bool32 did_merge;
};
struct Seek_Result{
fcpp_i32 pos;
fcpp_bool32 new_line;
};
struct Cpp_Get_Token_Result{
fcpp_i32 token_index;
fcpp_bool32 in_whitespace;
};
struct Cpp_Relex_State{
Cpp_File file;
Cpp_Token_Stack *stack;
int start, end, amount;
int start_token_i;
int end_token_i;
int relex_start;
int tolerance;
int space_request;
};
#endif
// BOTTOM

View File

@ -32,7 +32,7 @@
#include "4tech_table.cpp"
#define FCPP_LEXER_IMPLEMENTATION
#include "4cpp_lexer.h"
#include "test/4cpp_new_lexer.h"
#include "4ed_template.cpp"

View File

@ -660,13 +660,13 @@ Job_Callback_Sig(job_full_lex){
tokens.count = 0;
Cpp_Lex_Data status;
status = cpp_lex_file_nonalloc(cpp_file, &tokens);
status = cpp_lex_nonalloc(cpp_file, &tokens);
while (!status.complete){
system->grow_thread_memory(memory);
tokens.tokens = (Cpp_Token*)memory->data;
tokens.max_count = memory->size / sizeof(Cpp_Token);
status = cpp_lex_file_nonalloc(cpp_file, &tokens, status);
status = cpp_lex_nonalloc(cpp_file, &tokens, status);
}
i32 new_max = LargeRoundUp(tokens.count+1, Kbytes(1));
@ -810,11 +810,17 @@ file_relex_parallel(System_Functions *system,
}
if (!inline_lex){
i32 end_token_i = cpp_get_end_token(&file->state.token_stack, end_i);
cpp_shift_token_starts(&file->state.token_stack, end_token_i, amount);
Cpp_Token_Stack *stack = &file->state.token_stack;
Cpp_Get_Token_Result get_token_result = cpp_get_token(stack, end_i);
i32 end_token_i = get_token_result.token_index;
if (end_token_i < 0) end_token_i = 0;
else if (end_i > stack->tokens[end_token_i].start) ++end_token_i;
cpp_shift_token_starts(stack, end_token_i, amount);
--end_token_i;
if (end_token_i >= 0){
Cpp_Token *token = file->state.token_stack.tokens + end_token_i;
Cpp_Token *token = stack->tokens + end_token_i;
if (token->start < end_i && token->start + token->size > end_i){
token->size += amount;
}

View File

@ -66,8 +66,8 @@
; [] tab character wrong width
; [] bouncing when scrolling down
; [] miblo's off screen cursor thing
; [] fyoucon's segfaults with malloc on win10
; [] open empty file bug
;
; [] open empty file bug ~ possibly a win10 issue?
;
;
@ -166,6 +166,7 @@
;
; HARD BUGS
; [] fyoucon's segfaults with malloc on win10
; [] repainting too slow for resize looks really dumb
; [] handling cursor in non-client part of window so it doesn't spaz
; [] fill screen right away

250
test/4cpp_new_lexer.h Normal file
View File

@ -0,0 +1,250 @@
// TOP
#ifndef FCPP_NEW_LEXER_INC
#define FCPP_NEW_LEXER_INC
#include "../4cpp_lexer_types.h"
#define lexer_link static
lexer_link Cpp_Get_Token_Result
cpp_get_token(Cpp_Token_Stack *token_stack, int pos){
Cpp_Get_Token_Result result = {};
Cpp_Token *token_array = token_stack->tokens;
Cpp_Token *token = 0;
int first = 0;
int count = token_stack->count;
int last = count;
int this_start = 0, next_start = 0;
if (count > 0){
for (;;){
result.token_index = (first + last)/2;
token = token_array + result.token_index;
this_start = token->start;
if (result.token_index + 1 < count){
next_start = (token + 1)->start;
}
else{
next_start = this_start + token->size;
}
if (this_start <= pos && pos < next_start){
break;
}
else if (pos < this_start){
last = result.token_index;
}
else{
first = result.token_index + 1;
}
if (first == last){
result.token_index = first;
break;
}
}
if (result.token_index == count){
--result.token_index;
result.in_whitespace = 1;
}
else{
if (token->start + token->size <= pos){
result.in_whitespace = 1;
}
}
}
else{
result.token_index = -1;
result.in_whitespace = 1;
}
return(result);
}
lexer_link void
cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amount){
Cpp_Token *token = stack->tokens + from_token_i;
int count = stack->count, i;
for (i = from_token_i; i < count; ++i, ++token){
token->start += shift_amount;
}
}
enum Lex_State{
LS_default,
LS_comment_pre,
LS_comment,
LS_comment_block,
LS_comment_block_ending,
LS_dot,
LS_less,
LS_more,
};
struct Lex_Data{
int token_start;
int token_end;
int completed;
};
lexer_link Lex_Data
cpp_lex_nonalloc(char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){
Cpp_Token *out_tokens = token_stack_out->tokens;
int token_i = token_stack_out->count;
int max_token_i = token_stack_out->max_count;
Cpp_Token token = {};
int pos = file_absolute_pos;
int end_pos = size + file_absolute_pos;
unsigned short state = LS_default;
unsigned short pp_state = 0;
Lex_Data lex_data = {};
int emit_token = 0;
char c;
chunk -= file_absolute_pos;
for (; pos < end_pos && token_i < max_token_i; ++pos){
for (; pos < end_pos;){
c = chunk[pos++];
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break;
}
--pos;
lex_data.token_start = pos;
state = LS_default;
emit_token = 0;
for (; emit_token == 0 && pos < end_pos;){
c = chunk[pos++];
switch (state){
case LS_default:
switch (c){
case '/': state = LS_comment_pre; break;
#define OperCase(op,type) case op: emit_token = 1; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
OperCase('~', CPP_TOKEN_TILDE);
OperCase(',', CPP_TOKEN_COMMA);
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
#undef OperCase
#if 0
case '.': state = LS_dot; break;
case '<': state = LS_less; break;
case '>': state = LS_more; break;
#endif
}
break;
case LS_dot:
break;
case LS_less:
break;
case LS_more:
break;
case LS_comment_pre:
switch (c){
case '/': state = LS_comment; break;
case '*': state = LS_comment_block; break;
}
break;
case LS_comment:
switch (c){
case '\n': emit_token = 1; break;
}
break;
case LS_comment_block:
switch (c){
case '*': state = LS_comment_block_ending; break;
}
break;
case LS_comment_block_ending:
switch (c){
case '*': state = LS_comment_block_ending; break;
case '/': emit_token = 1; break;
default: state = LS_comment_block; break;
}
break;
}
}
if (emit_token){
--pos;
lex_data.token_end = pos;
switch (state){
case LS_default:
switch (chunk[pos]){
#define OperCase(op,t) case op: token.type = t; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
OperCase('~', CPP_TOKEN_TILDE);
OperCase(',', CPP_TOKEN_COMMA);
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
#undef OperCase
}
token.flags = CPP_TFLAG_IS_OPERATOR;
break;
case LS_comment: case LS_comment_block_ending:
token.type = CPP_TOKEN_COMMENT;
token.flags = 0;
c = chunk[pos];
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
--pos;
c = chunk[pos];
}
++pos;
break;
}
token.start = lex_data.token_start;
token.size = pos - lex_data.token_start;
token.state_flags = pp_state;
out_tokens[token_i++] = token;
pos = lex_data.token_end;
}
}
token_stack_out->count = token_i;
if (pos == end_pos) lex_data.completed = 1;
return(lex_data);
}
#endif
// BOTTOM

View File

@ -15,57 +15,282 @@
#include "../4coder_string.h"
#include "../4cpp_types.h"
#include "../4cpp_lexer_types.h"
#define FCPP_LEXER_IMPLEMENTATION
#include "../4cpp_lexer.h"
#include "../4cpp_preprocessor.cpp"
namespace new_lex{
#include "4cpp_new_lexer.h"
}
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
Data
file_dump(char *filename){
Data result;
FILE *file;
result = {};
file = fopen(filename, "rb");
if (file){
fseek(file, 0, SEEK_END);
result.size = ftell(file);
fseek(file, 0, SEEK_SET);
result.data = (byte*)malloc(result.size);
fread(result.data, 1, result.size, file);
fclose(file);
static Data
dump_file(char *filename){
Data data = {};
HANDLE file;
DWORD hi, lo;
file = CreateFile(filename, GENERIC_READ, 0, 0,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
if (file != INVALID_HANDLE_VALUE){
lo = GetFileSize(file, &hi);
assert(hi == 0);
data.size = (int)lo;
data.data = (byte*)malloc(data.size + 1);
ReadFile(file, data.data, lo, &lo, 0);
assert((int)lo == data.size);
CloseHandle(file);
}
return(result);
return(data);
}
int main(int argc, char **argv){
Data target_file;
Cpp_File file;
Cpp_Token_Stack tokens;
Cpp_Token *token;
int i;
typedef struct File_Info{
String filename;
int folder;
} File_Info;
if (argc != 2){
printf("usage: %s <cpp-file>\n", argv[0]);
exit(1);
typedef struct File_List{
// Ignore this, it's for internal stuff.
void *block;
// The list of files and folders.
File_Info *infos;
int count;
// Ignore this, it's for internal stuff.
int block_size;
} File_List;
void*
Win32GetMemory(int size){
return (malloc(size));
}
void
Win32FreeMemory(void *ptr){
free(ptr);
}
static void
system_set_file_list(File_List *file_list, String directory){
if (directory.size > 0){
char dir_space[MAX_PATH + 32];
String dir = make_string(dir_space, 0, MAX_PATH + 32);
append(&dir, directory);
char trail_str[] = "\\*";
append(&dir, trail_str);
char *c_str_dir = make_c_str(dir);
WIN32_FIND_DATA find_data;
HANDLE search;
search = FindFirstFileA(c_str_dir, &find_data);
if (search != INVALID_HANDLE_VALUE){
i32 count = 0;
i32 file_count = 0;
BOOL more_files = 1;
do{
if (!match(find_data.cFileName, ".") &&
!match(find_data.cFileName, "..")){
++file_count;
i32 size = 0;
for(;find_data.cFileName[size];++size);
count += size + 1;
}
more_files = FindNextFile(search, &find_data);
}while(more_files);
FindClose(search);
i32 required_size = count + file_count * sizeof(File_Info);
if (file_list->block_size < required_size){
Win32FreeMemory(file_list->block);
file_list->block = Win32GetMemory(required_size);
file_list->block_size = required_size;
}
file_list->infos = (File_Info*)file_list->block;
char *name = (char*)(file_list->infos + file_count);
if (file_list->block){
search = FindFirstFileA(c_str_dir, &find_data);
if (search != INVALID_HANDLE_VALUE){
File_Info *info = file_list->infos;
more_files = 1;
do{
if (!match(find_data.cFileName, ".") &&
!match(find_data.cFileName, "..")){
info->folder = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0;
info->filename.str = name;
i32 i = 0;
for(;find_data.cFileName[i];++i) *name++ = find_data.cFileName[i];
info->filename.size = i;
info->filename.memory_size = info->filename.size + 1;
*name++ = 0;
replace_char(info->filename, '\\', '/');
++info;
}
more_files = FindNextFile(search, &find_data);
}while(more_files);
FindClose(search);
file_list->count = file_count;
}else{
Win32FreeMemory(file_list->block);
file_list->block = 0;
file_list->block_size = 0;
}
}
}
}
target_file = file_dump(argv[1]);
if (target_file.data == 0){
printf("couldn't open file %s\n", argv[1]);
exit(1);
else{
if (directory.str == 0){
Win32FreeMemory(file_list->block);
file_list->block = 0;
file_list->block_size = 0;
}
file_list->infos = 0;
file_list->count = 0;
}
}
tokens = cpp_make_token_stack(1 << 10);
#define TOKEN_MAX (1 << 12)
#define TOKEN_ARRAY_SIZE (TOKEN_MAX*sizeof(Cpp_Token))
file = data_as_cpp_file(target_file);
cpp_lex_file(file, &tokens);
static void
init_test_stack(Cpp_Token_Stack *stack){
stack->tokens = (Cpp_Token*)malloc(TOKEN_ARRAY_SIZE);
stack->count = 0;
stack->max_count = TOKEN_MAX;
}
token = tokens.tokens;
for (i = 0; i < tokens.count; ++i, ++token){
printf("%.*s\n", token->size, file.data + token->start);
Cpp_Lex_Data lex_data = {};
struct Experiment{
Cpp_Token_Stack correct_stack;
Cpp_Token_Stack testing_stack;
int passed_total, test_total;
};
static void
run_experiment(Experiment *exp, char *filename){
String extension = {};
Data file_data;
Cpp_File file_cpp;
int pass;
extension = file_extension(make_string_slowly(filename));
if (match(extension, "cpp") || match(extension, "h")){
pass = 1;
printf("testing on file: %s\n", filename);
file_data = dump_file(filename);
if (file_data.size < (100 << 10)){
exp->test_total++;
exp->correct_stack.count = 0;
exp->testing_stack.count = 0;
memset(exp->correct_stack.tokens, TOKEN_ARRAY_SIZE, 0);
memset(exp->testing_stack.tokens, TOKEN_ARRAY_SIZE, 0);
file_cpp.data = (char*)file_data.data;
file_cpp.size = file_data.size;
cpp_lex_file_nonalloc(file_cpp, &exp->correct_stack, lex_data);
new_lex::cpp_lex_nonalloc((char*)file_data.data, 0, file_data.size, &exp->testing_stack);
if (exp->correct_stack.count != exp->testing_stack.count){
pass = 0;
printf("error: stack size mismatch %d original and %d testing\n",
exp->correct_stack.count, exp->testing_stack.count);
}
int min_count = exp->correct_stack.count;
if (min_count > exp->testing_stack.count) min_count = exp->testing_stack.count;
for (int j = 0; j < min_count; ++j){
Cpp_Token *correct, *testing;
correct = exp->correct_stack.tokens + j;
testing = exp->testing_stack.tokens + j;
if (correct->type != testing->type){
pass = 0;
printf("type mismatch at token %d\n", j);
}
if (correct->start != testing->start || correct->size != testing->size){
pass = 0;
printf("token range mismatch at token %d\n"
"\t%d:%d original %d:%d testing\n"
"\t%.*s original %.*s testing\n",
j,
correct->start, correct->size, testing->start, testing->size,
correct->size, file_cpp.data + correct->start,
testing->size, file_cpp.data + testing->start);
}
if (correct->flags != testing->flags){
pass = 0;
printf("token flag mismatch at token %d\n", j);
}
}
if (pass){
exp->passed_total++;
printf("test passed!\n\n");
}
else{
printf("test failed, you failed, fix it now!\n\n");
}
}
free(file_data.data);
}
}
#define BASE_DIR "w:/4ed/data/test/"
int main(){
char test_directory[] = BASE_DIR;
File_List all_files = {};
Experiment exp = {};
init_test_stack(&exp.correct_stack);
init_test_stack(&exp.testing_stack);
AllowLocal(test_directory);
AllowLocal(all_files);
run_experiment(&exp, BASE_DIR "autotab.cpp");
#if 0
system_set_file_list(&all_files, make_lit_string(test_directory));
for (int i = 0; i < all_files.count; ++i){
if (all_files.infos[i].folder == 0){
run_experiment(&exp, all_files.infos[i].filename.str);
}
}
#endif
printf("you passed %d / %d tests\n", exp.passed_total, exp.test_total);
return(0);
}