chunks 20/20

master
Allen Webster 2016-03-17 20:27:34 -04:00
parent 24a5dd57b6
commit 5857726201
3 changed files with 767 additions and 667 deletions

View File

@ -78,10 +78,3 @@ NOTES ON USE:
# define FCPP_LINK static # define FCPP_LINK static
# endif # endif
#endif #endif
#ifndef DrBegin
#define DrBegin() switch (s.__pc__){ case 0:;
#define DrEnd() default: Assert(!"Invalid __pc__"); }
#define DrYield(pc, n) { s.__pc__ = pc; *state = s; return(n); case pc:; }
#define DrReturn(n) { s.__pc__ = -1; return(n); }
#endif

View File

@ -493,6 +493,11 @@ cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){
} }
} }
struct Whitespace_FSM{
unsigned char pp_state;
unsigned char white_done;
};
struct Lex_FSM{ struct Lex_FSM{
unsigned char state; unsigned char state;
unsigned char int_state; unsigned char int_state;
@ -502,55 +507,103 @@ struct Lex_FSM{
}; };
struct Lex_Data{ struct Lex_Data{
char *tb;
int tb_pos;
int token_start;
int pos;
int pos_overide;
Lex_FSM fsm; Lex_FSM fsm;
Whitespace_FSM wfsm;
unsigned char pp_state; unsigned char pp_state;
unsigned char completed; unsigned char completed;
int token_start;
Cpp_Token token;
int __pc__;
}; };
lexer_link Lex_Data Whitespace_FSM
cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){ whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
Cpp_Token *out_tokens = token_stack_out->tokens; if (wfsm.pp_state != LSPP_default){
int token_i = token_stack_out->count; if (c == '\n') wfsm.pp_state = LSPP_default;
int max_token_i = token_stack_out->max_count;
Cpp_Token token = {(Cpp_Token_Type)0};
Lex_FSM fsm = {0};
int pos = file_absolute_pos;
int end_pos = size + file_absolute_pos;
int restore_point = 0;
char c = 0;
Pos_Update_Rule pos_update_rule;
chunk -= file_absolute_pos;
for (; pos < end_pos && token_i < max_token_i;){
c = chunk[pos];
if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
for (; pos < end_pos;){
c = chunk[pos++];
if (S.pp_state != LSPP_default){
if (c == '\n') S.pp_state = LSPP_default;
} }
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break; if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')){
wfsm.white_done = 1;
} }
--pos; return(wfsm);
} }
S.token_start = pos; Lex_FSM
int_fsm(Lex_FSM fsm, char c){
switch (fsm.int_state){
case LSINT_default:
switch (c){
case 'u': case 'U': fsm.int_state = LSINT_u; break;
case 'l': fsm.int_state = LSINT_l; break;
case 'L': fsm.int_state = LSINT_L; break;
default: fsm.emit_token = 1; break;
}
break;
S.fsm = {0}; case LSINT_u:
for (; S.fsm.emit_token == 0 && pos < end_pos;){ switch (c){
c = chunk[pos++]; case 'l': fsm.int_state = LSINT_ul; break;
case 'L': fsm.int_state = LSINT_uL; break;
default: fsm.emit_token = 1; break;
}
break;
{ case LSINT_l:
fsm = S.fsm; switch (c){
case 'l': fsm.int_state = LSINT_ll; break;
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
switch (S.pp_state){ case LSINT_L:
switch (c){
case 'L': fsm.int_state = LSINT_ll; break;
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_ul:
switch (c){
case 'l': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_uL:
switch (c){
case 'L': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_ll:
switch (c){
case 'u': case 'U': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_extra:
fsm.emit_token = 1;
break;
}
return(fsm);
}
Lex_FSM
main_fsm(Lex_FSM fsm, unsigned char pp_state, char c){
if (c == 0) fsm.emit_token = 1;
else
switch (pp_state){
case LSPP_error: case LSPP_error:
fsm.state = LS_error_message; fsm.state = LS_error_message;
if (c == '\n') fsm.emit_token = 1; if (c == '\n') fsm.emit_token = 1;
@ -651,7 +704,7 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
break; break;
case LS_pound: case LS_pound:
if (S.pp_state == LSPP_default){ if (pp_state == LSPP_default){
if (c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){ if (c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
fsm.state = LS_pound; fsm.state = LS_pound;
} }
@ -932,33 +985,101 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
} }
break; break;
} }
return(fsm);
}
#define DrCase(PC) case PC: goto resumespot_##PC
#define DrYield(PC, n) {\
token_stack_out->count = token_i;\
*S_ptr = S; S_ptr->__pc__ = PC; return(n); resumespot_##PC:; }
#define DrReturn(n) {\
token_stack_out->count = token_i;\
*S_ptr = S; S_ptr->__pc__ = -1; return(n); }
lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
Lex_Data S = *S_ptr;
Cpp_Token *out_tokens = token_stack_out->tokens;
int token_i = token_stack_out->count;
int max_token_i = token_stack_out->max_count;
Lex_FSM fsm = {0};
Pos_Update_Rule pos_update_rule = PUR_none;
char c = 0;
int end_pos = size + S.pos;
chunk -= S.pos;
switch (S.__pc__){
DrCase(1);
DrCase(2);
DrCase(3);
DrCase(4);
DrCase(5);
}
for (;;){
S.wfsm.white_done = 0;
S.wfsm.pp_state = S.pp_state;
for(;;){
for (; S.wfsm.white_done == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
S.wfsm = whitespace_skip_fsm(S.wfsm, c);
}
if (S.wfsm.white_done == 0){
DrYield(4, 1);
}
else break;
}
--S.pos;
S.pp_state = S.wfsm.pp_state;
S.token_start = S.pos;
S.tb_pos = 0;
S.fsm = {0};
for(;;){
for (; S.fsm.emit_token == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
S.tb[S.tb_pos++] = c;
fsm = S.fsm;
fsm = main_fsm(fsm, S.pp_state, c);
S.fsm = fsm; S.fsm = fsm;
} }
if (S.fsm.emit_token == 0){
DrYield(3, 1);
}
else break;
} }
Assert(S.fsm.emit_token == 1);
if (c != 0){
pos_update_rule = PUR_none; pos_update_rule = PUR_none;
if (S.fsm.emit_token){
if (S.pp_state == LSPP_include){ if (S.pp_state == LSPP_include){
switch (S.fsm.state){ switch (S.fsm.state){
case LSINC_default:break; case LSINC_default:break;
case LSINC_quotes: case LSINC_quotes:
case LSINC_pointy: case LSINC_pointy:
token.type = CPP_TOKEN_INCLUDE_FILE; S.token.type = CPP_TOKEN_INCLUDE_FILE;
token.flags = 0; S.token.flags = 0;
break; break;
case LSINC_junk: case LSINC_junk:
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
token.flags = 0; S.token.flags = 0;
break; break;
} }
} }
else switch (S.fsm.state){ else switch (S.fsm.state){
case LS_default: case LS_default:
switch (c){ switch (c){
#define OperCase(op,t) case op: token.type = t; break; #define OperCase(op,t) case op: S.token.type = t; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN); OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE); OperCase('}', CPP_TOKEN_BRACE_CLOSE);
@ -979,102 +1100,109 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case '\\': case '\\':
if (S.pp_state == LSPP_default){ if (S.pp_state == LSPP_default){
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
} }
else{ else{
restore_point = pos; S.pos_overide = S.pos;
c = chunk[pos]; S.wfsm.white_done = 0;
while (c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){ for (;;){
c = chunk[pos++]; for (; S.wfsm.white_done == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1;
} }
if (S.wfsm.white_done == 0){
DrYield(1, 1);
}
else break;
}
if (c == '\n'){ if (c == '\n'){
S.fsm.emit_token = 0; S.fsm.emit_token = 0;
S.pos_overide = 0;
} }
else{ else{
pos = restore_point; S.token.type = CPP_TOKEN_JUNK;
token.type = CPP_TOKEN_JUNK;
} }
} }
break; break;
} }
if (c != '@' && c != '$' && c != '\\'){ if (c != '@' && c != '$' && c != '\\'){
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
} }
break; break;
case LS_identifier: case LS_identifier:
{ {
--pos; --S.pos;
int start = S.token_start;
int word_size = pos - S.token_start;
int word_size = S.pos - S.token_start;
if (S.pp_state == LSPP_body_if){ if (S.pp_state == LSPP_body_if){
if (match(make_string(chunk + start, word_size), make_lit_string("defined"))){ if (match(make_string(S.tb, word_size), make_lit_string("defined"))){
token.type = CPP_TOKEN_DEFINED; S.token.type = CPP_TOKEN_DEFINED;
token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD;
break; break;
} }
} }
Sub_Match_List_Result sub_match; Sub_Match_List_Result sub_match;
sub_match = sub_match_list(chunk, size, start, bool_lits, word_size); sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size);
if (sub_match.index != -1){ if (sub_match.index != -1){
token.type = CPP_TOKEN_BOOLEAN_CONSTANT; S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT;
token.flags = CPP_TFLAG_IS_KEYWORD; S.token.flags = CPP_TFLAG_IS_KEYWORD;
} }
else{ else{
sub_match = sub_match_list(chunk, size, start, keywords, word_size); sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size);
if (sub_match.index != -1){ if (sub_match.index != -1){
String_And_Flag data = keywords.data[sub_match.index]; String_And_Flag data = keywords.data[sub_match.index];
token.type = (Cpp_Token_Type)data.flags; S.token.type = (Cpp_Token_Type)data.flags;
token.flags = CPP_TFLAG_IS_KEYWORD; S.token.flags = CPP_TFLAG_IS_KEYWORD;
} }
else{ else{
token.type = CPP_TOKEN_IDENTIFIER; S.token.type = CPP_TOKEN_IDENTIFIER;
token.flags = 0; S.token.flags = 0;
} }
} }
}break; }break;
case LS_pound: case LS_pound:
token.flags = 0; S.token.flags = 0;
switch (c){ switch (c){
case '#': token.type = CPP_PP_CONCAT; break; case '#': S.token.type = CPP_PP_CONCAT; break;
default: default:
token.type = CPP_PP_STRINGIFY; S.token.type = CPP_PP_STRINGIFY;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_pp: case LS_pp:
{ {
--pos; --S.pos;
int start = S.token_start + 1; int start = 1;
c = chunk[start]; c = S.tb[start];
while (start < pos && (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f')){ while (start < S.tb_pos && (c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')){
++start; ++start;
c = chunk[start]; c = S.tb[start];
} }
int word_size = pos - start; int word_size = S.tb_pos - start - 1;
Sub_Match_List_Result match; Sub_Match_List_Result match;
match = sub_match_list(chunk, size, start, preprops, word_size); match = sub_match_list(S.tb, S.tb_pos, start, preprops, word_size);
if (match.index != -1){ if (match.index != -1){
String_And_Flag data = preprops.data[match.index]; String_And_Flag data = preprops.data[match.index];
token.type = (Cpp_Token_Type)data.flags; S.token.type = (Cpp_Token_Type)data.flags;
token.flags = CPP_TFLAG_PP_DIRECTIVE; S.token.flags = CPP_TFLAG_PP_DIRECTIVE;
S.pp_state = (unsigned char)cpp_pp_directive_to_state(token.type); S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type);
} }
else{ else{
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
token.flags = 0; S.token.flags = 0;
} }
}break; }break;
@ -1084,136 +1212,77 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
S.fsm.int_state = LSINT_default; S.fsm.int_state = LSINT_default;
{ {
int done = 0; S.fsm.emit_token = 0;
--pos; --S.pos;
for (; done == 0 && pos <= end_pos;){ for (;;){
if (pos < end_pos){ for (; S.fsm.emit_token == 0 && S.pos < end_pos;){
c = chunk[pos++]; c = chunk[S.pos++];
S.fsm = int_fsm(S.fsm, c);
} }
else{ if (S.fsm.emit_token == 0){
c = 0; DrYield(5, 1);
++pos; }
else break;
}
--S.pos;
} }
switch (S.fsm.int_state){ S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
case LSINT_default: S.token.flags = 0;
switch (c){
case 'u': case 'U': S.fsm.int_state = LSINT_u; break;
case 'l': S.fsm.int_state = LSINT_l; break;
case 'L': S.fsm.int_state = LSINT_L; break;
default: done = 1; break;
}
break;
case LSINT_u:
switch (c){
case 'l': S.fsm.int_state = LSINT_ul; break;
case 'L': S.fsm.int_state = LSINT_uL; break;
default: done = 1; break;
}
break;
case LSINT_l:
switch (c){
case 'l': S.fsm.int_state = LSINT_ll; break;
case 'U': case 'u': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_L:
switch (c){
case 'L': S.fsm.int_state = LSINT_ll; break;
case 'U': case 'u': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_ul:
switch (c){
case 'l': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_uL:
switch (c){
case 'L': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_ll:
switch (c){
case 'u': case 'U': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_extra:
done = 1;
break;
}
}
--pos;
}
token.type = CPP_TOKEN_INTEGER_CONSTANT;
token.flags = 0;
break; break;
case LS_float: case LS_float:
case LS_crazy_float0: case LS_crazy_float0:
case LS_crazy_float1: case LS_crazy_float1:
token.type = CPP_TOKEN_FLOATING_CONSTANT; S.token.type = CPP_TOKEN_FLOATING_CONSTANT;
token.flags = 0; S.token.flags = 0;
switch (c){ switch (c){
case 'f': case 'F': case 'f': case 'F':
case 'l': case 'L':break; case 'l': case 'L':break;
default: --pos; break; default: --S.pos; break;
} }
break; break;
case LS_char: case LS_char:
token.type = CPP_TOKEN_CHARACTER_CONSTANT; S.token.type = CPP_TOKEN_CHARACTER_CONSTANT;
token.flags = 0; S.token.flags = 0;
break; break;
case LS_string: case LS_string:
token.type = CPP_TOKEN_STRING_CONSTANT; S.token.type = CPP_TOKEN_STRING_CONSTANT;
token.flags = 0; S.token.flags = 0;
break; break;
case LS_comment_pre: case LS_comment_pre:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_DIVEQ; break; case '=': S.token.type = CPP_TOKEN_DIVEQ; break;
default: default:
token.type = CPP_TOKEN_DIV; S.token.type = CPP_TOKEN_DIV;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_comment: case LS_comment_block_ending: case LS_comment: case LS_comment_block_ending:
token.type = CPP_TOKEN_COMMENT; S.token.type = CPP_TOKEN_COMMENT;
token.flags = 0; S.token.flags = 0;
pos_update_rule = PUR_unget_whitespace; pos_update_rule = PUR_unget_whitespace;
break; break;
case LS_error_message: case LS_error_message:
token.type = CPP_TOKEN_ERROR_MESSAGE; S.token.type = CPP_TOKEN_ERROR_MESSAGE;
token.flags = 0; S.token.flags = 0;
pos_update_rule = PUR_unget_whitespace; pos_update_rule = PUR_unget_whitespace;
break; break;
case LS_dot: case LS_dot:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '*': token.type = CPP_TOKEN_PTRDOT; break; case '*': S.token.type = CPP_TOKEN_PTRDOT; break;
default: default:
token.type = CPP_TOKEN_DOT; S.token.type = CPP_TOKEN_DOT;
--pos; --S.pos;
break; break;
} }
break; break;
@ -1221,182 +1290,182 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case LS_ellipsis: case LS_ellipsis:
switch (c){ switch (c){
case '.': case '.':
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
token.type = CPP_TOKEN_ELLIPSIS; S.token.type = CPP_TOKEN_ELLIPSIS;
break; break;
default: default:
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_less: case LS_less:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_LESSEQ; break; case '=': S.token.type = CPP_TOKEN_LESSEQ; break;
default: default:
token.type = CPP_TOKEN_LESS; S.token.type = CPP_TOKEN_LESS;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_less_less: case LS_less_less:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_LSHIFTEQ; break; case '=': S.token.type = CPP_TOKEN_LSHIFTEQ; break;
default: default:
token.type = CPP_TOKEN_LSHIFT; S.token.type = CPP_TOKEN_LSHIFT;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_more: case LS_more:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_GRTREQ; break; case '=': S.token.type = CPP_TOKEN_GRTREQ; break;
default: default:
token.type = CPP_TOKEN_GRTR; S.token.type = CPP_TOKEN_GRTR;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_more_more: case LS_more_more:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_RSHIFTEQ; break; case '=': S.token.type = CPP_TOKEN_RSHIFTEQ; break;
default: default:
token.type = CPP_TOKEN_RSHIFT; S.token.type = CPP_TOKEN_RSHIFT;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_minus: case LS_minus:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '-': token.type = CPP_TOKEN_DECREMENT; break; case '-': S.token.type = CPP_TOKEN_DECREMENT; break;
case '=': token.type = CPP_TOKEN_SUBEQ; break; case '=': S.token.type = CPP_TOKEN_SUBEQ; break;
default: default:
token.type = CPP_TOKEN_MINUS; S.token.type = CPP_TOKEN_MINUS;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_arrow: case LS_arrow:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '*': token.type = CPP_TOKEN_PTRARROW; break; case '*': S.token.type = CPP_TOKEN_PTRARROW; break;
default: default:
token.type = CPP_TOKEN_ARROW; S.token.type = CPP_TOKEN_ARROW;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_and: case LS_and:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '&': token.type = CPP_TOKEN_AND; break; case '&': S.token.type = CPP_TOKEN_AND; break;
case '=': token.type = CPP_TOKEN_ANDEQ; break; case '=': S.token.type = CPP_TOKEN_ANDEQ; break;
default: default:
token.type = CPP_TOKEN_AMPERSAND; S.token.type = CPP_TOKEN_AMPERSAND;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_or: case LS_or:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '|': token.type = CPP_TOKEN_OR; break; case '|': S.token.type = CPP_TOKEN_OR; break;
case '=': token.type = CPP_TOKEN_OREQ; break; case '=': S.token.type = CPP_TOKEN_OREQ; break;
default: default:
token.type = CPP_TOKEN_BIT_OR; S.token.type = CPP_TOKEN_BIT_OR;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_plus: case LS_plus:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '+': token.type = CPP_TOKEN_INCREMENT; break; case '+': S.token.type = CPP_TOKEN_INCREMENT; break;
case '=': token.type = CPP_TOKEN_ADDEQ; break; case '=': S.token.type = CPP_TOKEN_ADDEQ; break;
default: default:
token.type = CPP_TOKEN_PLUS; S.token.type = CPP_TOKEN_PLUS;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_colon: case LS_colon:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case ':': token.type = CPP_TOKEN_SCOPE; break; case ':': S.token.type = CPP_TOKEN_SCOPE; break;
default: default:
token.type = CPP_TOKEN_COLON; S.token.type = CPP_TOKEN_COLON;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_star: case LS_star:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_MULEQ; break; case '=': S.token.type = CPP_TOKEN_MULEQ; break;
default: default:
token.type = CPP_TOKEN_STAR; S.token.type = CPP_TOKEN_STAR;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_modulo: case LS_modulo:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_MODEQ; break; case '=': S.token.type = CPP_TOKEN_MODEQ; break;
default: default:
token.type = CPP_TOKEN_MOD; S.token.type = CPP_TOKEN_MOD;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_caret: case LS_caret:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_XOREQ; break; case '=': S.token.type = CPP_TOKEN_XOREQ; break;
default: default:
token.type = CPP_TOKEN_BIT_XOR; S.token.type = CPP_TOKEN_BIT_XOR;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_eq: case LS_eq:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_EQEQ; break; case '=': S.token.type = CPP_TOKEN_EQEQ; break;
default: default:
token.type = CPP_TOKEN_EQ; S.token.type = CPP_TOKEN_EQ;
--pos; --S.pos;
break; break;
} }
break; break;
case LS_bang: case LS_bang:
token.flags = CPP_TFLAG_IS_OPERATOR; S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){ switch (c){
case '=': token.type = CPP_TOKEN_NOTEQ; break; case '=': S.token.type = CPP_TOKEN_NOTEQ; break;
default: default:
token.type = CPP_TOKEN_NOT; S.token.type = CPP_TOKEN_NOT;
--pos; --S.pos;
break; break;
} }
break; break;
@ -1404,27 +1473,26 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
switch (pos_update_rule){ switch (pos_update_rule){
case PUR_unget_whitespace: case PUR_unget_whitespace:
c = chunk[--pos]; c = chunk[--S.pos];
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){ while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
--pos; c = chunk[--S.pos];
c = chunk[pos];
} }
++pos; ++S.pos;
break; break;
} }
if ((token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
switch (S.pp_state){ switch (S.pp_state){
case LSPP_include: case LSPP_include:
if (token.type != CPP_TOKEN_INCLUDE_FILE){ if (S.token.type != CPP_TOKEN_INCLUDE_FILE){
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
} }
S.pp_state = LSPP_junk; S.pp_state = LSPP_junk;
break; break;
case LSPP_macro_identifier: case LSPP_macro_identifier:
if (S.fsm.state != LS_identifier){ if (S.fsm.state != LS_identifier){
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
S.pp_state = LSPP_junk; S.pp_state = LSPP_junk;
} }
else{ else{
@ -1434,14 +1502,14 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case LSPP_identifier: case LSPP_identifier:
if (S.fsm.state != LS_identifier){ if (S.fsm.state != LS_identifier){
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
} }
S.pp_state = LSPP_junk; S.pp_state = LSPP_junk;
break; break;
case LSPP_number: case LSPP_number:
if (token.type != CPP_TOKEN_INTEGER_CONSTANT){ if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
S.pp_state = LSPP_junk; S.pp_state = LSPP_junk;
} }
else{ else{
@ -1450,31 +1518,46 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
break; break;
case LSPP_junk: case LSPP_junk:
token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
break; break;
} }
} }
if (S.fsm.emit_token){ if (S.fsm.emit_token){
token.start = S.token_start; S.token.start = S.token_start;
token.size = pos - S.token_start; if (S.pos_overide){
token.flags |= (S.fsm.multi_line)?(CPP_TFLAG_MULTILINE):(0); S.token.size = S.pos_overide - S.token_start;
if ((token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){ S.pos_overide = 0;
token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0);
} }
token.state_flags = S.pp_state; else{
S.token.size = S.pos - S.token_start;
}
S.token.flags |= (S.fsm.multi_line)?(CPP_TFLAG_MULTILINE):(0);
if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
S.token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0);
}
S.token.state_flags = S.pp_state;
cpp_push_token_nonalloc(out_tokens, &token_i, token); cpp_push_token_nonalloc(out_tokens, &token_i, S.token);
if (token_i == max_token_i){
DrYield(2, 2);
} }
} }
} }
// NOTE(allen): else case for "if (c != 0) {...}
else{
S.completed = 1;
break;
}
}
token_stack_out->count = token_i; DrReturn(0);
if (pos == end_pos) S.completed = 1;
return(S);
} }
#undef DrYield
#undef DrReturn
#undef DrCase
} }
#endif #endif

View File

@ -227,6 +227,8 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
file_cpp.data = (char*)file_data.data; file_cpp.data = (char*)file_data.data;
file_cpp.size = file_data.size; file_cpp.size = file_data.size;
ld.tb = (char*)malloc(file_data.size + 1);
{ {
i64 start; i64 start;
@ -245,15 +247,18 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
is_last = 1; is_last = 1;
} }
ld = new_lex::cpp_lex_nonalloc(ld, (char*)file_data.data + k, k, chunk_size, &exp->testing_stack); int result = new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data + k, chunk_size, &exp->testing_stack);
if (result == 0 || result == 2) break;
} }
} }
else{ else{
new_lex::cpp_lex_nonalloc(ld, (char*)file_data.data, 0, file_data.size, &exp->testing_stack); new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data, file_data.size, &exp->testing_stack);
} }
time.fsm += (__rdtsc() - start); time.fsm += (__rdtsc() - start);
} }
free(ld.tb);
if (exp->correct_stack.count != exp->testing_stack.count){ if (exp->correct_stack.count != exp->testing_stack.count){
pass = 0; pass = 0;
if (verbose >= 0){ if (verbose >= 0){
@ -315,13 +320,13 @@ show_time(Times t, int repeats, char *type){
f32 speed_up = ((f32)t.handcoded) / t.fsm; f32 speed_up = ((f32)t.handcoded) / t.fsm;
printf( printf(
"\n%s time for %d repeates\n" "\n%s time for %d repeates\n"
OUTLINE("%d") OUTLINE("%lld")
OUTLINE("%d") OUTLINE("%lld")
OUTLINE("%f"), OUTLINE("%f"),
type, type,
repeats, repeats,
OUTLINE_VAR(i32, t.handcoded), OUTLINE_VAR(i64, t.handcoded),
OUTLINE_VAR(i32, t.fsm), OUTLINE_VAR(i64, t.fsm),
OUTLINE_VAR(f32, speed_up) OUTLINE_VAR(f32, speed_up)
); );
} }
@ -329,9 +334,17 @@ show_time(Times t, int repeats, char *type){
#define BASE_DIR "w:/4ed/data/test/" #define BASE_DIR "w:/4ed/data/test/"
int main(){ int main(){
int repeats = 1;
int verbose_level = 0; int repeats = 100;
int chunks = 64; int verbose_level = -1;
int chunk_start = 0;
int chunk_end = 1024;
#define TEST_FILE "lexer_test.cpp"
#define SINGLE_ITEM 0
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
int c = 0;
char test_directory[] = BASE_DIR; char test_directory[] = BASE_DIR;
File_List all_files = {}; File_List all_files = {};
Experiment exp = {}; Experiment exp = {};
@ -348,16 +361,16 @@ int main(){
AllowLocal(test_directory); AllowLocal(test_directory);
AllowLocal(all_files); AllowLocal(all_files);
#if 1 #if SINGLE_ITEM
(void)(repeats); (void)(repeats);
(void)(verbose_level); (void)(verbose_level);
#define TEST_FILE "crazywords.cpp"
if (chunks){ if (chunks){
begin_t(&chunk_exp_t); begin_t(&chunk_exp_t);
printf("With chunks of %d\n", chunks); printf("With chunks of %d\n", chunks);
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, chunks); for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c);
}
end_t(&chunk_exp_t); end_t(&chunk_exp_t);
} }
@ -375,11 +388,21 @@ int main(){
if (all_files.infos[i].folder == 0){ if (all_files.infos[i].folder == 0){
if (chunks){ if (chunks){
begin_t(&chunk_exp_t); begin_t(&chunk_exp_t);
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, chunks); for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c);
}
end_t(&chunk_exp_t); end_t(&chunk_exp_t);
} }
begin_t(&exp_t); begin_t(&exp_t);
if (verbose_level == -1 && chunks){
for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0); run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
}
}
else{
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
}
end_t(&exp_t); end_t(&exp_t);
} }
} }
@ -387,6 +410,7 @@ int main(){
#endif #endif
if (chunks){ if (chunks){
printf("chunks of sizes %d through %d tested\n", chunk_start, chunk_end);
printf("chunked passed %d / %d tests\n", chunk_exp.passed_total, chunk_exp.test_total); printf("chunked passed %d / %d tests\n", chunk_exp.passed_total, chunk_exp.test_total);
} }