chunks 20/20

master
Allen Webster 2016-03-17 20:27:34 -04:00
parent 24a5dd57b6
commit 5857726201
3 changed files with 767 additions and 667 deletions

View File

@ -78,10 +78,3 @@ NOTES ON USE:
# define FCPP_LINK static
# endif
#endif
#ifndef DrBegin
#define DrBegin() switch (s.__pc__){ case 0:;
#define DrEnd() default: Assert(!"Invalid __pc__"); }
#define DrYield(pc, n) { s.__pc__ = pc; *state = s; return(n); case pc:; }
#define DrReturn(n) { s.__pc__ = -1; return(n); }
#endif

View File

@ -493,6 +493,11 @@ cpp_push_token_nonalloc(Cpp_Token *out_tokens, int *token_i, Cpp_Token token){
}
}
struct Whitespace_FSM{
unsigned char pp_state;
unsigned char white_done;
};
struct Lex_FSM{
unsigned char state;
unsigned char int_state;
@ -502,55 +507,103 @@ struct Lex_FSM{
};
struct Lex_Data{
char *tb;
int tb_pos;
int token_start;
int pos;
int pos_overide;
Lex_FSM fsm;
Whitespace_FSM wfsm;
unsigned char pp_state;
unsigned char completed;
int token_start;
Cpp_Token token;
int __pc__;
};
lexer_link Lex_Data
cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){
Cpp_Token *out_tokens = token_stack_out->tokens;
int token_i = token_stack_out->count;
int max_token_i = token_stack_out->max_count;
Cpp_Token token = {(Cpp_Token_Type)0};
Lex_FSM fsm = {0};
int pos = file_absolute_pos;
int end_pos = size + file_absolute_pos;
int restore_point = 0;
char c = 0;
Pos_Update_Rule pos_update_rule;
chunk -= file_absolute_pos;
for (; pos < end_pos && token_i < max_token_i;){
c = chunk[pos];
if (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
for (; pos < end_pos;){
c = chunk[pos++];
if (S.pp_state != LSPP_default){
if (c == '\n') S.pp_state = LSPP_default;
Whitespace_FSM
whitespace_skip_fsm(Whitespace_FSM wfsm, char c){
if (wfsm.pp_state != LSPP_default){
if (c == '\n') wfsm.pp_state = LSPP_default;
}
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break;
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')){
wfsm.white_done = 1;
}
--pos;
return(wfsm);
}
Lex_FSM
int_fsm(Lex_FSM fsm, char c){
switch (fsm.int_state){
case LSINT_default:
switch (c){
case 'u': case 'U': fsm.int_state = LSINT_u; break;
case 'l': fsm.int_state = LSINT_l; break;
case 'L': fsm.int_state = LSINT_L; break;
default: fsm.emit_token = 1; break;
}
break;
S.token_start = pos;
case LSINT_u:
switch (c){
case 'l': fsm.int_state = LSINT_ul; break;
case 'L': fsm.int_state = LSINT_uL; break;
default: fsm.emit_token = 1; break;
}
break;
S.fsm = {0};
for (; S.fsm.emit_token == 0 && pos < end_pos;){
c = chunk[pos++];
case LSINT_l:
switch (c){
case 'l': fsm.int_state = LSINT_ll; break;
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
{
fsm = S.fsm;
case LSINT_L:
switch (c){
case 'L': fsm.int_state = LSINT_ll; break;
case 'U': case 'u': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
switch (S.pp_state){
case LSINT_ul:
switch (c){
case 'l': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_uL:
switch (c){
case 'L': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_ll:
switch (c){
case 'u': case 'U': fsm.int_state = LSINT_extra; break;
default: fsm.emit_token = 1; break;
}
break;
case LSINT_extra:
fsm.emit_token = 1;
break;
}
return(fsm);
}
Lex_FSM
main_fsm(Lex_FSM fsm, unsigned char pp_state, char c){
if (c == 0) fsm.emit_token = 1;
else
switch (pp_state){
case LSPP_error:
fsm.state = LS_error_message;
if (c == '\n') fsm.emit_token = 1;
@ -651,7 +704,7 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
break;
case LS_pound:
if (S.pp_state == LSPP_default){
if (pp_state == LSPP_default){
if (c == ' ' || c == '\t' || c == '\r' || c == '\f' || c == '\v'){
fsm.state = LS_pound;
}
@ -932,33 +985,101 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
}
break;
}
return(fsm);
}
#define DrCase(PC) case PC: goto resumespot_##PC
#define DrYield(PC, n) {\
token_stack_out->count = token_i;\
*S_ptr = S; S_ptr->__pc__ = PC; return(n); resumespot_##PC:; }
#define DrReturn(n) {\
token_stack_out->count = token_i;\
*S_ptr = S; S_ptr->__pc__ = -1; return(n); }
lexer_link int
cpp_lex_nonalloc(Lex_Data *S_ptr, char *chunk, int size, Cpp_Token_Stack *token_stack_out){
Lex_Data S = *S_ptr;
Cpp_Token *out_tokens = token_stack_out->tokens;
int token_i = token_stack_out->count;
int max_token_i = token_stack_out->max_count;
Lex_FSM fsm = {0};
Pos_Update_Rule pos_update_rule = PUR_none;
char c = 0;
int end_pos = size + S.pos;
chunk -= S.pos;
switch (S.__pc__){
DrCase(1);
DrCase(2);
DrCase(3);
DrCase(4);
DrCase(5);
}
for (;;){
S.wfsm.white_done = 0;
S.wfsm.pp_state = S.pp_state;
for(;;){
for (; S.wfsm.white_done == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
S.wfsm = whitespace_skip_fsm(S.wfsm, c);
}
if (S.wfsm.white_done == 0){
DrYield(4, 1);
}
else break;
}
--S.pos;
S.pp_state = S.wfsm.pp_state;
S.token_start = S.pos;
S.tb_pos = 0;
S.fsm = {0};
for(;;){
for (; S.fsm.emit_token == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
S.tb[S.tb_pos++] = c;
fsm = S.fsm;
fsm = main_fsm(fsm, S.pp_state, c);
S.fsm = fsm;
}
if (S.fsm.emit_token == 0){
DrYield(3, 1);
}
else break;
}
Assert(S.fsm.emit_token == 1);
if (c != 0){
pos_update_rule = PUR_none;
if (S.fsm.emit_token){
if (S.pp_state == LSPP_include){
switch (S.fsm.state){
case LSINC_default:break;
case LSINC_quotes:
case LSINC_pointy:
token.type = CPP_TOKEN_INCLUDE_FILE;
token.flags = 0;
S.token.type = CPP_TOKEN_INCLUDE_FILE;
S.token.flags = 0;
break;
case LSINC_junk:
token.type = CPP_TOKEN_JUNK;
token.flags = 0;
S.token.type = CPP_TOKEN_JUNK;
S.token.flags = 0;
break;
}
}
else switch (S.fsm.state){
case LS_default:
switch (c){
#define OperCase(op,t) case op: token.type = t; break;
#define OperCase(op,t) case op: S.token.type = t; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
@ -979,102 +1100,109 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case '\\':
if (S.pp_state == LSPP_default){
token.type = CPP_TOKEN_JUNK;
S.token.type = CPP_TOKEN_JUNK;
}
else{
restore_point = pos;
c = chunk[pos];
while (c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
c = chunk[pos++];
S.pos_overide = S.pos;
S.wfsm.white_done = 0;
for (;;){
for (; S.wfsm.white_done == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
if (!(c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')) S.wfsm.white_done = 1;
}
if (S.wfsm.white_done == 0){
DrYield(1, 1);
}
else break;
}
if (c == '\n'){
S.fsm.emit_token = 0;
S.pos_overide = 0;
}
else{
pos = restore_point;
token.type = CPP_TOKEN_JUNK;
S.token.type = CPP_TOKEN_JUNK;
}
}
break;
}
if (c != '@' && c != '$' && c != '\\'){
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
}
break;
case LS_identifier:
{
--pos;
int start = S.token_start;
int word_size = pos - S.token_start;
--S.pos;
int word_size = S.pos - S.token_start;
if (S.pp_state == LSPP_body_if){
if (match(make_string(chunk + start, word_size), make_lit_string("defined"))){
token.type = CPP_TOKEN_DEFINED;
token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD;
if (match(make_string(S.tb, word_size), make_lit_string("defined"))){
S.token.type = CPP_TOKEN_DEFINED;
S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD;
break;
}
}
Sub_Match_List_Result sub_match;
sub_match = sub_match_list(chunk, size, start, bool_lits, word_size);
sub_match = sub_match_list(S.tb, S.tb_pos, 0, bool_lits, word_size);
if (sub_match.index != -1){
token.type = CPP_TOKEN_BOOLEAN_CONSTANT;
token.flags = CPP_TFLAG_IS_KEYWORD;
S.token.type = CPP_TOKEN_BOOLEAN_CONSTANT;
S.token.flags = CPP_TFLAG_IS_KEYWORD;
}
else{
sub_match = sub_match_list(chunk, size, start, keywords, word_size);
sub_match = sub_match_list(S.tb, S.tb_pos, 0, keywords, word_size);
if (sub_match.index != -1){
String_And_Flag data = keywords.data[sub_match.index];
token.type = (Cpp_Token_Type)data.flags;
token.flags = CPP_TFLAG_IS_KEYWORD;
S.token.type = (Cpp_Token_Type)data.flags;
S.token.flags = CPP_TFLAG_IS_KEYWORD;
}
else{
token.type = CPP_TOKEN_IDENTIFIER;
token.flags = 0;
S.token.type = CPP_TOKEN_IDENTIFIER;
S.token.flags = 0;
}
}
}break;
case LS_pound:
token.flags = 0;
S.token.flags = 0;
switch (c){
case '#': token.type = CPP_PP_CONCAT; break;
case '#': S.token.type = CPP_PP_CONCAT; break;
default:
token.type = CPP_PP_STRINGIFY;
--pos;
S.token.type = CPP_PP_STRINGIFY;
--S.pos;
break;
}
break;
case LS_pp:
{
--pos;
int start = S.token_start + 1;
--S.pos;
int start = 1;
c = chunk[start];
while (start < pos && (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f')){
c = S.tb[start];
while (start < S.tb_pos && (c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f')){
++start;
c = chunk[start];
c = S.tb[start];
}
int word_size = pos - start;
int word_size = S.tb_pos - start - 1;
Sub_Match_List_Result match;
match = sub_match_list(chunk, size, start, preprops, word_size);
match = sub_match_list(S.tb, S.tb_pos, start, preprops, word_size);
if (match.index != -1){
String_And_Flag data = preprops.data[match.index];
token.type = (Cpp_Token_Type)data.flags;
token.flags = CPP_TFLAG_PP_DIRECTIVE;
S.pp_state = (unsigned char)cpp_pp_directive_to_state(token.type);
S.token.type = (Cpp_Token_Type)data.flags;
S.token.flags = CPP_TFLAG_PP_DIRECTIVE;
S.pp_state = (unsigned char)cpp_pp_directive_to_state(S.token.type);
}
else{
token.type = CPP_TOKEN_JUNK;
token.flags = 0;
S.token.type = CPP_TOKEN_JUNK;
S.token.flags = 0;
}
}break;
@ -1084,136 +1212,77 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
S.fsm.int_state = LSINT_default;
{
int done = 0;
--pos;
for (; done == 0 && pos <= end_pos;){
if (pos < end_pos){
c = chunk[pos++];
S.fsm.emit_token = 0;
--S.pos;
for (;;){
for (; S.fsm.emit_token == 0 && S.pos < end_pos;){
c = chunk[S.pos++];
S.fsm = int_fsm(S.fsm, c);
}
else{
c = 0;
++pos;
if (S.fsm.emit_token == 0){
DrYield(5, 1);
}
else break;
}
--S.pos;
}
switch (S.fsm.int_state){
case LSINT_default:
switch (c){
case 'u': case 'U': S.fsm.int_state = LSINT_u; break;
case 'l': S.fsm.int_state = LSINT_l; break;
case 'L': S.fsm.int_state = LSINT_L; break;
default: done = 1; break;
}
break;
case LSINT_u:
switch (c){
case 'l': S.fsm.int_state = LSINT_ul; break;
case 'L': S.fsm.int_state = LSINT_uL; break;
default: done = 1; break;
}
break;
case LSINT_l:
switch (c){
case 'l': S.fsm.int_state = LSINT_ll; break;
case 'U': case 'u': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_L:
switch (c){
case 'L': S.fsm.int_state = LSINT_ll; break;
case 'U': case 'u': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_ul:
switch (c){
case 'l': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_uL:
switch (c){
case 'L': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_ll:
switch (c){
case 'u': case 'U': S.fsm.int_state = LSINT_extra; break;
default: done = 1; break;
}
break;
case LSINT_extra:
done = 1;
break;
}
}
--pos;
}
token.type = CPP_TOKEN_INTEGER_CONSTANT;
token.flags = 0;
S.token.type = CPP_TOKEN_INTEGER_CONSTANT;
S.token.flags = 0;
break;
case LS_float:
case LS_crazy_float0:
case LS_crazy_float1:
token.type = CPP_TOKEN_FLOATING_CONSTANT;
token.flags = 0;
S.token.type = CPP_TOKEN_FLOATING_CONSTANT;
S.token.flags = 0;
switch (c){
case 'f': case 'F':
case 'l': case 'L':break;
default: --pos; break;
default: --S.pos; break;
}
break;
case LS_char:
token.type = CPP_TOKEN_CHARACTER_CONSTANT;
token.flags = 0;
S.token.type = CPP_TOKEN_CHARACTER_CONSTANT;
S.token.flags = 0;
break;
case LS_string:
token.type = CPP_TOKEN_STRING_CONSTANT;
token.flags = 0;
S.token.type = CPP_TOKEN_STRING_CONSTANT;
S.token.flags = 0;
break;
case LS_comment_pre:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_DIVEQ; break;
case '=': S.token.type = CPP_TOKEN_DIVEQ; break;
default:
token.type = CPP_TOKEN_DIV;
--pos;
S.token.type = CPP_TOKEN_DIV;
--S.pos;
break;
}
break;
case LS_comment: case LS_comment_block_ending:
token.type = CPP_TOKEN_COMMENT;
token.flags = 0;
S.token.type = CPP_TOKEN_COMMENT;
S.token.flags = 0;
pos_update_rule = PUR_unget_whitespace;
break;
case LS_error_message:
token.type = CPP_TOKEN_ERROR_MESSAGE;
token.flags = 0;
S.token.type = CPP_TOKEN_ERROR_MESSAGE;
S.token.flags = 0;
pos_update_rule = PUR_unget_whitespace;
break;
case LS_dot:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '*': token.type = CPP_TOKEN_PTRDOT; break;
case '*': S.token.type = CPP_TOKEN_PTRDOT; break;
default:
token.type = CPP_TOKEN_DOT;
--pos;
S.token.type = CPP_TOKEN_DOT;
--S.pos;
break;
}
break;
@ -1221,182 +1290,182 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case LS_ellipsis:
switch (c){
case '.':
token.flags = CPP_TFLAG_IS_OPERATOR;
token.type = CPP_TOKEN_ELLIPSIS;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.type = CPP_TOKEN_ELLIPSIS;
break;
default:
token.type = CPP_TOKEN_JUNK;
--pos;
S.token.type = CPP_TOKEN_JUNK;
--S.pos;
break;
}
break;
case LS_less:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_LESSEQ; break;
case '=': S.token.type = CPP_TOKEN_LESSEQ; break;
default:
token.type = CPP_TOKEN_LESS;
--pos;
S.token.type = CPP_TOKEN_LESS;
--S.pos;
break;
}
break;
case LS_less_less:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_LSHIFTEQ; break;
case '=': S.token.type = CPP_TOKEN_LSHIFTEQ; break;
default:
token.type = CPP_TOKEN_LSHIFT;
--pos;
S.token.type = CPP_TOKEN_LSHIFT;
--S.pos;
break;
}
break;
case LS_more:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_GRTREQ; break;
case '=': S.token.type = CPP_TOKEN_GRTREQ; break;
default:
token.type = CPP_TOKEN_GRTR;
--pos;
S.token.type = CPP_TOKEN_GRTR;
--S.pos;
break;
}
break;
case LS_more_more:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_RSHIFTEQ; break;
case '=': S.token.type = CPP_TOKEN_RSHIFTEQ; break;
default:
token.type = CPP_TOKEN_RSHIFT;
--pos;
S.token.type = CPP_TOKEN_RSHIFT;
--S.pos;
break;
}
break;
case LS_minus:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '-': token.type = CPP_TOKEN_DECREMENT; break;
case '=': token.type = CPP_TOKEN_SUBEQ; break;
case '-': S.token.type = CPP_TOKEN_DECREMENT; break;
case '=': S.token.type = CPP_TOKEN_SUBEQ; break;
default:
token.type = CPP_TOKEN_MINUS;
--pos;
S.token.type = CPP_TOKEN_MINUS;
--S.pos;
break;
}
break;
case LS_arrow:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '*': token.type = CPP_TOKEN_PTRARROW; break;
case '*': S.token.type = CPP_TOKEN_PTRARROW; break;
default:
token.type = CPP_TOKEN_ARROW;
--pos;
S.token.type = CPP_TOKEN_ARROW;
--S.pos;
break;
}
break;
case LS_and:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '&': token.type = CPP_TOKEN_AND; break;
case '=': token.type = CPP_TOKEN_ANDEQ; break;
case '&': S.token.type = CPP_TOKEN_AND; break;
case '=': S.token.type = CPP_TOKEN_ANDEQ; break;
default:
token.type = CPP_TOKEN_AMPERSAND;
--pos;
S.token.type = CPP_TOKEN_AMPERSAND;
--S.pos;
break;
}
break;
case LS_or:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '|': token.type = CPP_TOKEN_OR; break;
case '=': token.type = CPP_TOKEN_OREQ; break;
case '|': S.token.type = CPP_TOKEN_OR; break;
case '=': S.token.type = CPP_TOKEN_OREQ; break;
default:
token.type = CPP_TOKEN_BIT_OR;
--pos;
S.token.type = CPP_TOKEN_BIT_OR;
--S.pos;
break;
}
break;
case LS_plus:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '+': token.type = CPP_TOKEN_INCREMENT; break;
case '=': token.type = CPP_TOKEN_ADDEQ; break;
case '+': S.token.type = CPP_TOKEN_INCREMENT; break;
case '=': S.token.type = CPP_TOKEN_ADDEQ; break;
default:
token.type = CPP_TOKEN_PLUS;
--pos;
S.token.type = CPP_TOKEN_PLUS;
--S.pos;
break;
}
break;
case LS_colon:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case ':': token.type = CPP_TOKEN_SCOPE; break;
case ':': S.token.type = CPP_TOKEN_SCOPE; break;
default:
token.type = CPP_TOKEN_COLON;
--pos;
S.token.type = CPP_TOKEN_COLON;
--S.pos;
break;
}
break;
case LS_star:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_MULEQ; break;
case '=': S.token.type = CPP_TOKEN_MULEQ; break;
default:
token.type = CPP_TOKEN_STAR;
--pos;
S.token.type = CPP_TOKEN_STAR;
--S.pos;
break;
}
break;
case LS_modulo:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_MODEQ; break;
case '=': S.token.type = CPP_TOKEN_MODEQ; break;
default:
token.type = CPP_TOKEN_MOD;
--pos;
S.token.type = CPP_TOKEN_MOD;
--S.pos;
break;
}
break;
case LS_caret:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_XOREQ; break;
case '=': S.token.type = CPP_TOKEN_XOREQ; break;
default:
token.type = CPP_TOKEN_BIT_XOR;
--pos;
S.token.type = CPP_TOKEN_BIT_XOR;
--S.pos;
break;
}
break;
case LS_eq:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_EQEQ; break;
case '=': S.token.type = CPP_TOKEN_EQEQ; break;
default:
token.type = CPP_TOKEN_EQ;
--pos;
S.token.type = CPP_TOKEN_EQ;
--S.pos;
break;
}
break;
case LS_bang:
token.flags = CPP_TFLAG_IS_OPERATOR;
S.token.flags = CPP_TFLAG_IS_OPERATOR;
switch (c){
case '=': token.type = CPP_TOKEN_NOTEQ; break;
case '=': S.token.type = CPP_TOKEN_NOTEQ; break;
default:
token.type = CPP_TOKEN_NOT;
--pos;
S.token.type = CPP_TOKEN_NOT;
--S.pos;
break;
}
break;
@ -1404,27 +1473,26 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
switch (pos_update_rule){
case PUR_unget_whitespace:
c = chunk[--pos];
c = chunk[--S.pos];
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
--pos;
c = chunk[pos];
c = chunk[--S.pos];
}
++pos;
++S.pos;
break;
}
if ((token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
switch (S.pp_state){
case LSPP_include:
if (token.type != CPP_TOKEN_INCLUDE_FILE){
token.type = CPP_TOKEN_JUNK;
if (S.token.type != CPP_TOKEN_INCLUDE_FILE){
S.token.type = CPP_TOKEN_JUNK;
}
S.pp_state = LSPP_junk;
break;
case LSPP_macro_identifier:
if (S.fsm.state != LS_identifier){
token.type = CPP_TOKEN_JUNK;
S.token.type = CPP_TOKEN_JUNK;
S.pp_state = LSPP_junk;
}
else{
@ -1434,14 +1502,14 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
case LSPP_identifier:
if (S.fsm.state != LS_identifier){
token.type = CPP_TOKEN_JUNK;
S.token.type = CPP_TOKEN_JUNK;
}
S.pp_state = LSPP_junk;
break;
case LSPP_number:
if (token.type != CPP_TOKEN_INTEGER_CONSTANT){
token.type = CPP_TOKEN_JUNK;
if (S.token.type != CPP_TOKEN_INTEGER_CONSTANT){
S.token.type = CPP_TOKEN_JUNK;
S.pp_state = LSPP_junk;
}
else{
@ -1450,31 +1518,46 @@ cpp_lex_nonalloc(Lex_Data S, char *chunk, int file_absolute_pos, int size, Cpp_T
break;
case LSPP_junk:
token.type = CPP_TOKEN_JUNK;
S.token.type = CPP_TOKEN_JUNK;
break;
}
}
if (S.fsm.emit_token){
token.start = S.token_start;
token.size = pos - S.token_start;
token.flags |= (S.fsm.multi_line)?(CPP_TFLAG_MULTILINE):(0);
if ((token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0);
S.token.start = S.token_start;
if (S.pos_overide){
S.token.size = S.pos_overide - S.token_start;
S.pos_overide = 0;
}
token.state_flags = S.pp_state;
else{
S.token.size = S.pos - S.token_start;
}
S.token.flags |= (S.fsm.multi_line)?(CPP_TFLAG_MULTILINE):(0);
if ((S.token.flags & CPP_TFLAG_PP_DIRECTIVE) == 0){
S.token.flags |= (S.pp_state != LSPP_default)?(CPP_TFLAG_PP_BODY):(0);
}
S.token.state_flags = S.pp_state;
cpp_push_token_nonalloc(out_tokens, &token_i, token);
cpp_push_token_nonalloc(out_tokens, &token_i, S.token);
if (token_i == max_token_i){
DrYield(2, 2);
}
}
}
// NOTE(allen): else case for "if (c != 0) {...}
else{
S.completed = 1;
break;
}
}
token_stack_out->count = token_i;
if (pos == end_pos) S.completed = 1;
return(S);
DrReturn(0);
}
#undef DrYield
#undef DrReturn
#undef DrCase
}
#endif

View File

@ -227,6 +227,8 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
file_cpp.data = (char*)file_data.data;
file_cpp.size = file_data.size;
ld.tb = (char*)malloc(file_data.size + 1);
{
i64 start;
@ -245,15 +247,18 @@ run_experiment(Experiment *exp, char *filename, int verbose, int chunks){
is_last = 1;
}
ld = new_lex::cpp_lex_nonalloc(ld, (char*)file_data.data + k, k, chunk_size, &exp->testing_stack);
int result = new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data + k, chunk_size, &exp->testing_stack);
if (result == 0 || result == 2) break;
}
}
else{
new_lex::cpp_lex_nonalloc(ld, (char*)file_data.data, 0, file_data.size, &exp->testing_stack);
new_lex::cpp_lex_nonalloc(&ld, (char*)file_data.data, file_data.size, &exp->testing_stack);
}
time.fsm += (__rdtsc() - start);
}
free(ld.tb);
if (exp->correct_stack.count != exp->testing_stack.count){
pass = 0;
if (verbose >= 0){
@ -315,13 +320,13 @@ show_time(Times t, int repeats, char *type){
f32 speed_up = ((f32)t.handcoded) / t.fsm;
printf(
"\n%s time for %d repeates\n"
OUTLINE("%d")
OUTLINE("%d")
OUTLINE("%lld")
OUTLINE("%lld")
OUTLINE("%f"),
type,
repeats,
OUTLINE_VAR(i32, t.handcoded),
OUTLINE_VAR(i32, t.fsm),
OUTLINE_VAR(i64, t.handcoded),
OUTLINE_VAR(i64, t.fsm),
OUTLINE_VAR(f32, speed_up)
);
}
@ -329,9 +334,17 @@ show_time(Times t, int repeats, char *type){
#define BASE_DIR "w:/4ed/data/test/"
int main(){
int repeats = 1;
int verbose_level = 0;
int chunks = 64;
int repeats = 100;
int verbose_level = -1;
int chunk_start = 0;
int chunk_end = 1024;
#define TEST_FILE "lexer_test.cpp"
#define SINGLE_ITEM 0
int chunks = (chunk_start > 0 && chunk_start <= chunk_end);
int c = 0;
char test_directory[] = BASE_DIR;
File_List all_files = {};
Experiment exp = {};
@ -348,16 +361,16 @@ int main(){
AllowLocal(test_directory);
AllowLocal(all_files);
#if 1
#if SINGLE_ITEM
(void)(repeats);
(void)(verbose_level);
#define TEST_FILE "crazywords.cpp"
if (chunks){
begin_t(&chunk_exp_t);
printf("With chunks of %d\n", chunks);
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, chunks);
for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, BASE_DIR TEST_FILE, 1, c);
}
end_t(&chunk_exp_t);
}
@ -375,11 +388,21 @@ int main(){
if (all_files.infos[i].folder == 0){
if (chunks){
begin_t(&chunk_exp_t);
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, chunks);
for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&chunk_exp, all_files.infos[i].filename.str, verbose_level, c);
}
end_t(&chunk_exp_t);
}
begin_t(&exp_t);
if (verbose_level == -1 && chunks){
for (c = chunk_start; c <= chunk_end; ++c){
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
}
}
else{
run_experiment(&exp, all_files.infos[i].filename.str, verbose_level, 0);
}
end_t(&exp_t);
}
}
@ -387,6 +410,7 @@ int main(){
#endif
if (chunks){
printf("chunks of sizes %d through %d tested\n", chunk_start, chunk_end);
printf("chunked passed %d / %d tests\n", chunk_exp.passed_total, chunk_exp.test_total);
}