got 4cpp ready to ship

master
Allen Webster 2016-09-06 23:39:19 -04:00
parent 8cf49b566e
commit 723945bb03
6 changed files with 413 additions and 496 deletions

File diff suppressed because one or more lines are too long

View File

@ -215,20 +215,6 @@ DOC_SEE(Cpp_Get_Token_Result)
return(result); return(result);
} }
FCPP_LINK Cpp_Lex_Data
cpp_lex_data_init(char *mem_buffer)/*
DOC_PARAM(tb, The memory to use for initializing the lex state's temp memory buffer.)
DOC_RETURN(A brand new lex state ready to begin lexing a file from the beginning.)
DOC(Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct.
The system needs a temporary buffer that is as long as the longest token. 4096 is usually
enough but the buffer is not checked, so to be 100% bullet proof it has to be the same length
as the file being lexed.)
*/{
Cpp_Lex_Data data = {0};
data.tb = mem_buffer;
return(data);
}
FCPP_INTERNAL Cpp_Lex_PP_State FCPP_INTERNAL Cpp_Lex_PP_State
cpp_pp_directive_to_state(Cpp_Token_Type type){ cpp_pp_directive_to_state(Cpp_Token_Type type){
Cpp_Lex_PP_State result = LSPP_default; Cpp_Lex_PP_State result = LSPP_default;
@ -439,7 +425,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
if (S.pp_state == LSPP_body_if){ if (S.pp_state == LSPP_body_if){
if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){ if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){
S.token.type = CPP_TOKEN_DEFINED; S.token.type = CPP_PP_DEFINED;
S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD;
break; break;
} }
@ -567,7 +553,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
S.token.type = CPP_TOKEN_JUNK; S.token.type = CPP_TOKEN_JUNK;
if (S.pp_state == LSPP_include){ if (S.pp_state == LSPP_include){
if (c == '>' || c == '"'){ if (c == '>' || c == '"'){
S.token.type = CPP_TOKEN_INCLUDE_FILE; S.token.type = CPP_PP_INCLUDE_FILE;
} }
} }
else{ else{
@ -611,7 +597,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
break; break;
case LS_error_message: case LS_error_message:
S.token.type = CPP_TOKEN_ERROR_MESSAGE; S.token.type = CPP_PP_ERROR_MESSAGE;
S.token.flags = 0; S.token.flags = 0;
--S.pos; --S.pos;
break; break;
@ -954,7 +940,7 @@ cpp_lex_nonalloc_no_null_out_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz
#define NO_OUT_LIMIT ((int32_t)(-1)) #define NO_OUT_LIMIT ((int32_t)(-1))
FCPP_LINK Cpp_Lex_Result FCPP_LINK Cpp_Lex_Result
cpp_lex_nonalloc(Cpp_Lex_Data *S_ptr, char *chunk, int32_t size, int32_t full_size, cpp_lex_step(Cpp_Lex_Data *S_ptr, char *chunk, int32_t size, int32_t full_size,
Cpp_Token_Array *token_array_out, int32_t max_tokens_out)/* Cpp_Token_Array *token_array_out, int32_t max_tokens_out)/*
DOC_PARAM(S_ptr, The lexer state. Go to the Cpp_Lex_Data section to see how to initialize the state.) DOC_PARAM(S_ptr, The lexer state. Go to the Cpp_Lex_Data section to see how to initialize the state.)
DOC_PARAM(chunk, The first or next chunk of the file being lexed.) DOC_PARAM(chunk, The first or next chunk of the file being lexed.)
@ -971,9 +957,9 @@ a lot of different ways. I will explain the general rules first, and then give
ways it might be used. ways it might be used.
First a lexing state, Cpp_Lex_Data, must be initialized. The file to lex must be read into N contiguous chunks First a lexing state, Cpp_Lex_Data, must be initialized. The file to lex must be read into N contiguous chunks
of memory. An output Cpp_Token_Array must be allocated and initialized with the appropriate count and max_count values. of memory. An output Cpp_Token_Array must be allocated and initialized with the appropriate count and max_count
Then each chunk of the file must be passed to cpp_lex_nonalloc in order using the same lexing state for each call. values. Then each chunk of the file must be passed to cpp_lex_step in order using the same lexing state for each call.
Every time a call to cpp_lex_nonalloc returns LexResult_NeedChunk, the next call to cpp_lex_nonalloc should use the Every time a call to cpp_lex_step returns LexResult_NeedChunk, the next call to cpp_lex_step should use the
next chunk. If the return is some other value, the lexer hasn't finished with the current chunk and it sopped for some next chunk. If the return is some other value, the lexer hasn't finished with the current chunk and it sopped for some
other reason, so the same chunk should be used again in the next call. other reason, so the same chunk should be used again in the next call.
@ -981,51 +967,47 @@ If the file chunks contain a null terminator the lexer will return LexResult_Fin
At this point calling the lexer again with the same state will result in an error. If you do not have a null At this point calling the lexer again with the same state will result in an error. If you do not have a null
terminated chunk to end the file, you may instead pass the exact size in bytes of the entire file to the full_size terminated chunk to end the file, you may instead pass the exact size in bytes of the entire file to the full_size
parameter and it will automatically handle the termination of the lexing state when it has read that many bytes. parameter and it will automatically handle the termination of the lexing state when it has read that many bytes.
If a full_size is specified and the system terminates for having seen that many bytes, it will return LexResult_Finished. If a full_size is specified and the system terminates for having seen that many bytes, it will return
If a full_size is specified and a null character is read before the total number of bytes have been read the system will LexResult_Finished. If a full_size is specified and a null character is read before the total number of bytes have
still terminate as usual and return LexResult_Finished. been read the system will still terminate as usual and return LexResult_Finished.
If the system has filled the entire output array it will return LexResult_NeedTokenMemory. When this happens if you If the system has filled the entire output array it will return LexResult_NeedTokenMemory. When this happens if you
want to continue lexing the file you can grow the token array, or switch to a new output array and then call want to continue lexing the file you can grow the token array, or switch to a new output array and then call
cpp_lex_nonalloc again with the chunk that was being lexed and the new output. You can also specify a max_tokens_out cpp_lex_step again with the chunk that was being lexed and the new output. You can also specify a max_tokens_out
which is limits how many new tokens will be added to the token array. Even if token_array_out still had more space which is limits how many new tokens will be added to the token array. Even if token_array_out still had more space
to hold tokens, if the max_tokens_out limit is hit, the lexer will stop and return LexResult_HitTokenLimit. If this to hold tokens, if the max_tokens_out limit is hit, the lexer will stop and return LexResult_HitTokenLimit. If this
happens there is still space left in the token array, so you can resume simply by calling cpp_lex_nonalloc again with happens there is still space left in the token array, so you can resume simply by calling cpp_lex_step again with
the same chunk and the same output array. Also note that, unlike the chunks which must only be replaced when the system the same chunk and the same output array. Also note that, unlike the chunks which must only be replaced when the
says it needs a chunk. You may switch to or modify the output array in between calls as much as you like. system says it needs a chunk. You may switch to or modify the output array in between calls as much as you like.
The most basic use of this system is to get it all done in one big chunk and try to allocate a nearly "infinite" output The most basic use of this system is to get it all done in one big chunk and try to allocate a nearly "infinite" output
array so that it will not run out of memory. This way you can get the entire job done in one call and then just assert array so that it will not run out of memory. This way you can get the entire job done in one call and then just assert
to make sure it returns LexResult_Finished to you: to make sure it returns LexResult_Finished to you:
CODE_EXAMPLE( CODE_EXAMPLE(
Cpp_Token_Array lex_file(char *file_name){ Cpp_Token_Array lex_file(char *file_name){
File_Data file = read_whole_file(file_name); File_Data file = read_whole_file(file_name);
Cpp_Lex_Data lex_state = char *temp = (char*)malloc(4096); // hopefully big enough
cpp_lex_data_init((char*)malloc(4096)); // hopefully big enough Cpp_Lex_Data lex_state = cpp_lex_data_init(temp);
Cpp_Token_Array array = {0}; Cpp_Token_Array array = {0};
array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough
array.max_count = (1 << 20)/sizeof(Cpp_Token); array.max_count = (1 << 20)/sizeof(Cpp_Token);
Cpp_Lex_Result result = Cpp_Lex_Result result =
cpp_lex_nonalloc(&lex_state, file.data, file.size, file.size, cpp_lex_step(&lex_state, file.data, file.size, file.size,
&array, NO_OUT_LIMIT); &array, NO_OUT_LIMIT);
Assert(result == LexResult_Finished); Assert(result == LexResult_Finished);
free(lex_state.tb); free(temp);
return(array); return(array);
}) })
) )
DOC_SEE(Cpp_Lex_Data) DOC_SEE(Cpp_Lex_Data)
DOC_SEE(cpp_lex_file)
DOC_SEE(cpp_lex_nonalloc_null_end_no_limit)
DOC_SEE(cpp_lex_nonalloc_no_null_no_limit)
DOC_SEE(cpp_lex_nonalloc_null_end_out_limit)
DOC_SEE(cpp_lex_nonalloc_no_null_out_limit)
*/{ */{
Cpp_Lex_Result result = 0; Cpp_Lex_Result result = 0;
if (full_size == HAS_NULL_TERM){ if (full_size == HAS_NULL_TERM){
@ -1047,6 +1029,69 @@ DOC_SEE(cpp_lex_nonalloc_no_null_out_limit)
return(result); return(result);
} }
FCPP_LINK Cpp_Lex_Data
cpp_lex_data_init(char *mem_buffer)/*
DOC_PARAM(tb, The memory to use for initializing the lex state's temp memory buffer.)
DOC_RETURN(A brand new lex state ready to begin lexing a file from the beginning.)
DOC(Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct.
The system needs a temporary buffer that is as long as the longest token. 4096 is usually
enough but the buffer is not checked, so to be 100% bullet proof it has to be the same length
as the file being lexed.)
*/{
Cpp_Lex_Data data = {0};
data.tb = mem_buffer;
return(data);
}
FCPP_LINK int32_t
cpp_lex_data_temp_size(Cpp_Lex_Data *lex_data)/*
DOC_PARAM(lex_data, The lex state from which to get the temporary buffer size.)
DOC(This call gets the current size of the temporary buffer in the lexer state so
that you can move to a new temporary buffer by copying the data over.)
DOC_SEE(cpp_lex_data_temp_read)
DOC_SEE(cpp_lex_data_new_temp)
*/{
int32_t result = lex_data->tb_pos;
Assert(lex_data->tb != 0);
return(result);
}
FCPP_LINK void
cpp_lex_data_temp_read(Cpp_Lex_Data *lex_data, char *out_buffer)/*
DOC_PARAM(lex_data, The lex state from which to read the temporary buffer.)
DOC_PARAM(out_buffer, The buffer into which the contents of the temporary buffer will be written.
The size of the buffer must be at least the size as returned by cpp_lex_data_temp_size.)
DOC(This call reads the current contents of the temporary buffer.)
DOC_SEE(cpp_lex_data_temp_size)
DOC_SEE(cpp_lex_data_new_temp)
*/{
int32_t size = lex_data->tb_pos;
char *src = lex_data->tb;
char *end = src + size;
for (; src < end; ++src, ++out_buffer){
*out_buffer = *src;
}
}
FCPP_LINK void
cpp_lex_data_new_temp(Cpp_Lex_Data *lex_data, char *new_buffer)/*
DOC_PARAM(lex_data, The lex state that will receive the new temporary buffer.)
DOC_PARAM(new_buffer, The new temporary buffer that has the same contents as the old temporary buffer.)
DOC(This call can be used to set a new temporary buffer for the lex state. In cases where you want to
discontinue lexing, store the state, and resume later. In such a situation it may be necessary for you
to free the temp buffer that was originally used to make the lex state. This call allows you to supply
a new temp buffer when you are ready to resume lexing.
However the new buffer needs to have the same contents the old buffer had. To ensure this you have to
use cpp_lex_data_temp_size and cpp_lex_data_temp_read to get the relevant contents of the temp buffer
before you free it.)
DOC_SEE(cpp_lex_data_temp_size)
DOC_SEE(cpp_lex_data_temp_read)
*/{
lex_data->tb = new_buffer;
}
// TODO(allen): Get the relex system ready to work in chunks. // TODO(allen): Get the relex system ready to work in chunks.
FCPP_INTERNAL Cpp_Relex_State FCPP_INTERNAL Cpp_Relex_State
cpp_relex_nonalloc_start(char *data, int32_t size, Cpp_Token_Array *array, cpp_relex_nonalloc_start(char *data, int32_t size, Cpp_Token_Array *array,
@ -1188,8 +1233,7 @@ cpp_relex_nonalloc_main(Cpp_Relex_State *state,
} }
#if !defined(FCPP_FORBID_MALLOC)
#if defined(FCPP_ALLOW_MALLOC)
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@ -1200,10 +1244,7 @@ DOC_PARAM(starting_max, The number of tokens to initialize the array with.)
DOC_RETURN(An empty Cpp_Token_Array with memory malloc'd for storing tokens.) DOC_RETURN(An empty Cpp_Token_Array with memory malloc'd for storing tokens.)
DOC(This call allocates a Cpp_Token_Array with malloc for use in other DOC(This call allocates a Cpp_Token_Array with malloc for use in other
convenience functions. Stacks that are not allocated this way should not be convenience functions. Stacks that are not allocated this way should not be
used in the convenience functions. used in the convenience functions.)
This call is a part of the FCPP_ALLOW_MALLOC convenience functions.
If you want to use it defined the macro FCPP_ALLOW_MALLOC before including 4cpp_lexer.h)
*/{ */{
Cpp_Token_Array token_array; Cpp_Token_Array token_array;
token_array.tokens = (Cpp_Token*)malloc(sizeof(Cpp_Token)*starting_max); token_array.tokens = (Cpp_Token*)malloc(sizeof(Cpp_Token)*starting_max);
@ -1215,10 +1256,7 @@ If you want to use it defined the macro FCPP_ALLOW_MALLOC before including 4cpp_
FCPP_LINK void FCPP_LINK void
cpp_free_token_array(Cpp_Token_Array token_array)/* cpp_free_token_array(Cpp_Token_Array token_array)/*
DOC_PARAM(token_array, An array previously allocated by cpp_make_token_array) DOC_PARAM(token_array, An array previously allocated by cpp_make_token_array)
DOC(This call frees a Cpp_Token_Array. DOC(This call frees a Cpp_Token_Array.)
This call is a part of the FCPP_ALLOW_MALLOC convenience functions.
If you want to use it defined the macro FCPP_ALLOW_MALLOC before including 4cpp_lexer.h)
DOC_SEE(cpp_make_token_array) DOC_SEE(cpp_make_token_array)
*/{ */{
free(token_array.tokens); free(token_array.tokens);
@ -1230,10 +1268,7 @@ DOC_PARAM(token_array, An array previously allocated by cpp_make_token_array.)
DOC_PARAM(new_max, The new maximum size the array should support. If this is not greater DOC_PARAM(new_max, The new maximum size the array should support. If this is not greater
than the current size of the array the operation is ignored.) than the current size of the array the operation is ignored.)
DOC(This call allocates a new memory chunk and moves the existing tokens in the array DOC(This call allocates a new memory chunk and moves the existing tokens in the array
over to the new chunk. over to the new chunk.)
This call is a part of the FCPP_ALLOW_MALLOC convenience functions.
If you want to use it defined the macro FCPP_ALLOW_MALLOC before including 4cpp_lexer.h)
DOC_SEE(cpp_make_token_array) DOC_SEE(cpp_make_token_array)
*/{ */{
if (new_max > token_array->count){ if (new_max > token_array->count){
@ -1257,8 +1292,20 @@ This token array must be previously allocated with cpp_make_token_array)
DOC(Lexes an entire file and manages the interaction with the lexer system so that DOC(Lexes an entire file and manages the interaction with the lexer system so that
it is quick and convenient to lex files. it is quick and convenient to lex files.
This call is a part of the FCPP_ALLOW_MALLOC convenience functions. CODE_EXAMPLE(
If you want to use it defined the macro FCPP_ALLOW_MALLOC before including 4cpp_lexer.h) Cpp_Token_Array lex_file(char *file_name){
File_Data file = read_whole_file(file_name);
// This array will be automatically grown if it runs
// out of memory.
Cpp_Token_Array array = cpp_make_token_array(100);
cpp_lex_file(file.data, file.size, &array);
return(array);
})
)
DOC_SEE(cpp_make_token_array) DOC_SEE(cpp_make_token_array)
*/{ */{
Cpp_Lex_Data S = {0}; Cpp_Lex_Data S = {0};
@ -1267,7 +1314,7 @@ DOC_SEE(cpp_make_token_array)
token_array_out->count = 0; token_array_out->count = 0;
for (;!quit;){ for (;!quit;){
int32_t result = cpp_lex_nonalloc(&S, data, size, HAS_NULL_TERM, token_array_out, NO_OUT_LIMIT); int32_t result = cpp_lex_step(&S, data, size, HAS_NULL_TERM, token_array_out, NO_OUT_LIMIT);
switch (result){ switch (result){
case LexResult_Finished: case LexResult_Finished:
{ {
@ -1282,7 +1329,7 @@ DOC_SEE(cpp_make_token_array)
// terminator, but we didn't actually, so provide the null // terminator, but we didn't actually, so provide the null
// terminator via this one byte chunk. // terminator via this one byte chunk.
char empty = 0; char empty = 0;
cpp_lex_nonalloc(&S, &empty, 1, HAS_NULL_TERM, token_array_out, NO_OUT_LIMIT); cpp_lex_step(&S, &empty, 1, HAS_NULL_TERM, token_array_out, NO_OUT_LIMIT);
}break; }break;
case LexResult_NeedTokenMemory: case LexResult_NeedTokenMemory:

View File

@ -8,8 +8,12 @@
#define ENUM(type,name) typedef type name; enum name##_ #define ENUM(type,name) typedef type name; enum name##_
#endif #endif
#ifndef INTERNAL_ENUM #ifndef ENUM_INTERNAL
#define INTERNAL_ENUM(type,name) typedef type name; enum name##_ #define ENUM_INTERNAL(type,name) typedef type name; enum name##_
#endif
#ifndef struct_internal
#define struct_internal struct
#endif #endif
/* DOC(A Cpp_Token_Type classifies a token to make parsing easier. Some types are not /* DOC(A Cpp_Token_Type classifies a token to make parsing easier. Some types are not
@ -37,6 +41,10 @@ ENUM(uint32_t, Cpp_Token_Type){
CPP_PP_CONCAT, CPP_PP_CONCAT,
CPP_PP_UNKNOWN, CPP_PP_UNKNOWN,
CPP_PP_DEFINED,
CPP_PP_INCLUDE_FILE,
CPP_PP_ERROR_MESSAGE,
CPP_TOKEN_KEY_TYPE, CPP_TOKEN_KEY_TYPE,
CPP_TOKEN_KEY_MODIFIER, CPP_TOKEN_KEY_MODIFIER,
CPP_TOKEN_KEY_QUALIFIER, CPP_TOKEN_KEY_QUALIFIER,
@ -218,59 +226,84 @@ ENUM(uint32_t, Cpp_Token_Type){
// NOTE(allen): Precedence 16, LtoR // NOTE(allen): Precedence 16, LtoR
CPP_TOKEN_COMMA, CPP_TOKEN_COMMA,
CPP_TOKEN_DEFINED,
CPP_TOKEN_INCLUDE_FILE,
CPP_TOKEN_ERROR_MESSAGE,
/* DOC(This type is for parser use, it is not output by the lexer.) */ /* DOC(This type is for parser use, it is not output by the lexer.) */
CPP_TOKEN_EOF, CPP_TOKEN_EOF,
CPP_TOKEN_TYPE_COUNT CPP_TOKEN_TYPE_COUNT
}; };
/* DOC(Cpp_Token represents a single lexed token.
It is the primary output of the lexing system.)
DOC_SEE(Cpp_Token_Flag) */
struct Cpp_Token{ struct Cpp_Token{
/* DOC(The type field indicates the type of the token.
All tokens have a type no matter the circumstances.) */
Cpp_Token_Type type; Cpp_Token_Type type;
int32_t start, size;
/* DOC(The start field indicates the index of the first character
of this token's lexeme.) */
int32_t start;
/* DOC(The size field indicates the number of bytes in this token's lexeme.) */
int32_t size;
/* DOC(The state_flags should not be used outside of the lexer's implementation.) */
uint16_t state_flags; uint16_t state_flags;
/* DOC(The flags field contains extra useful information about the token.) */
uint16_t flags; uint16_t flags;
}; };
/* DOC(The Cpp_Token_Flags are used to mark up tokens with additional information.) */
ENUM(uint16_t, Cpp_Token_Flag){ ENUM(uint16_t, Cpp_Token_Flag){
CPP_TFLAG_IGNORE = 0x1, /* DOC(Indicates that the token is a preprocessor directive.) */
CPP_TFLAG_PP_DIRECTIVE = 0x2, CPP_TFLAG_PP_DIRECTIVE = 0x1,
CPP_TFLAG_PP_BODY = 0x4,
CPP_TFLAG_BAD_ENDING = 0x8, /* DOC(Indicates that the token is on the line of a preprocessor directive.) */
CPP_TFLAG_MULTILINE = 0x10, CPP_TFLAG_PP_BODY = 0x2,
CPP_TFLAG_PARAMETERIZED = 0x20,
CPP_TFLAG_IS_OPERATOR = 0x40, /* DOC(Indicates that the token spans across multiple lines. This can show up
CPP_TFLAG_IS_KEYWORD = 0x80 on line comments and string literals with back slash line continuation. ) */
}; CPP_TFLAG_MULTILINE = 0x4,
ENUM(uint16_t, Cpp_Preprocessor_State){ /* DOC(Indicates that the token is some kind of operator or punctuation like braces.) */
CPP_LEX_PP_DEFAULT, CPP_TFLAG_IS_OPERATOR = 0x8,
CPP_LEX_PP_IDENTIFIER,
CPP_LEX_PP_MACRO_IDENTIFIER, /* DOC(Indicates that the token is a keyword.) */
CPP_LEX_PP_INCLUDE, CPP_TFLAG_IS_KEYWORD = 0x10
CPP_LEX_PP_BODY,
CPP_LEX_PP_BODY_IF,
CPP_LEX_PP_NUMBER,
CPP_LEX_PP_ERROR,
CPP_LEX_PP_JUNK,
CPP_LEX_PP_COUNT
}; };
/* DOC(Cpp_Token_Array is used to bundle together the common elements
of a growing array of Cpp_Tokens. To initialize it the tokens field should
point to a block of memory with a size equal to max_count*sizeof(Cpp_Token)
and the count should be initialized to zero.) */
struct Cpp_Token_Array{ struct Cpp_Token_Array{
/* DOC(The tokens field points to the memory used to store the array of tokens.) */
Cpp_Token *tokens; Cpp_Token *tokens;
int32_t count, max_count;
/* DOC(The count field counts how many tokens in the array are currently used.) */
int32_t count;
/* DOC(The max_count field specifies the maximum size the count field may grow to before
the tokens array is out of space.) */
int32_t max_count;
}; };
static Cpp_Token_Array null_cpp_token_array = {0}; static Cpp_Token_Array null_cpp_token_array = {0};
/* DOC(Cpp_Get_Token_Result is the return result of the cpp_get_token call.)
DOC_SEE(cpp_get_token) */
struct Cpp_Get_Token_Result{ struct Cpp_Get_Token_Result{
/* DOC(The token_index field indicates which token answers the query. To get the token from
the source array CODE_EXAMPLE(array.tokens[result.token_index])) */
int32_t token_index; int32_t token_index;
/* DOC(The in_whitespace field is true when the query position was actually in whitespace
after the result token.) */
int32_t in_whitespace; int32_t in_whitespace;
}; };
struct Cpp_Relex_State{ struct_internal Cpp_Relex_State{
char *data; char *data;
int32_t size; int32_t size;
@ -283,7 +316,7 @@ struct Cpp_Relex_State{
int32_t space_request; int32_t space_request;
}; };
struct Cpp_Lex_FSM{ struct_internal Cpp_Lex_FSM{
uint8_t state; uint8_t state;
uint8_t int_state; uint8_t int_state;
uint8_t emit_token; uint8_t emit_token;
@ -291,8 +324,16 @@ struct Cpp_Lex_FSM{
}; };
static Cpp_Lex_FSM null_lex_fsm = {0}; static Cpp_Lex_FSM null_lex_fsm = {0};
/* DOC(Cpp_Lex_Data represents the state of the lexer so that the system may be resumable
and the user can manage the lexer state and decide when to resume lexing with it. To create
a new lexer state that has not begun doing any lexing work call cpp_lex_data_init.
The internals of the lex state should not be treated as a part of the public API.)
DOC_SEE(cpp_lex_data_init)
HIDE_MEMBERS()*/
struct Cpp_Lex_Data{ struct Cpp_Lex_Data{
char *tb; char *tb;
int32_t tb_pos; int32_t tb_pos;
int32_t token_start; int32_t token_start;
@ -310,14 +351,37 @@ struct Cpp_Lex_Data{
int32_t __pc__; int32_t __pc__;
}; };
/* DOC(Cpp_Lex_Result is returned from the lexing engine to indicate why it stopped lexing.) */
ENUM(int32_t, Cpp_Lex_Result){ ENUM(int32_t, Cpp_Lex_Result){
/* DOC(This indicates that the system got to the end of the file and will not accept more input.) */
LexResult_Finished, LexResult_Finished,
/* DOC(This indicates that the system got to the end of an input chunk and is ready to receive the
next input chunk.) */
LexResult_NeedChunk, LexResult_NeedChunk,
/* DOC(This indicates that the output array ran out of space to store tokens and needs to be
replaced or expanded before continuing.) */
LexResult_NeedTokenMemory, LexResult_NeedTokenMemory,
/* DOC(This indicates that the maximum number of output tokens as specified by the user was hit.) */
LexResult_HitTokenLimit, LexResult_HitTokenLimit,
}; };
INTERNAL_ENUM(uint8_t, Cpp_Lex_State){ ENUM_INTERNAL(uint16_t, Cpp_Preprocessor_State){
CPP_LEX_PP_DEFAULT,
CPP_LEX_PP_IDENTIFIER,
CPP_LEX_PP_MACRO_IDENTIFIER,
CPP_LEX_PP_INCLUDE,
CPP_LEX_PP_BODY,
CPP_LEX_PP_BODY_IF,
CPP_LEX_PP_NUMBER,
CPP_LEX_PP_ERROR,
CPP_LEX_PP_JUNK,
CPP_LEX_PP_COUNT
};
ENUM_INTERNAL(uint8_t, Cpp_Lex_State){
LS_default, LS_default,
LS_identifier, LS_identifier,
LS_pound, LS_pound,
@ -362,7 +426,7 @@ INTERNAL_ENUM(uint8_t, Cpp_Lex_State){
LS_count LS_count
}; };
INTERNAL_ENUM(uint8_t, Cpp_Lex_Int_State){ ENUM_INTERNAL(uint8_t, Cpp_Lex_Int_State){
LSINT_default, LSINT_default,
LSINT_u, LSINT_u,
LSINT_l, LSINT_l,
@ -375,7 +439,7 @@ INTERNAL_ENUM(uint8_t, Cpp_Lex_Int_State){
LSINT_count LSINT_count
}; };
INTERNAL_ENUM(uint8_t, Cpp_Lex_PP_State){ ENUM_INTERNAL(uint8_t, Cpp_Lex_PP_State){
LSPP_default, LSPP_default,
LSPP_include, LSPP_include,
LSPP_macro_identifier, LSPP_macro_identifier,

View File

@ -28,6 +28,7 @@
#include "4ed.h" #include "4ed.h"
#define FCPP_FORBID_MALLOC
#include "4cpp_lexer.h" #include "4cpp_lexer.h"
#include "4coder_table.cpp" #include "4coder_table.cpp"

View File

@ -1147,9 +1147,7 @@ Job_Callback_Sig(job_full_lex){
do{ do{
i32 result = i32 result =
cpp_lex_nonalloc(&lex, cpp_lex_step(&lex, text_data, text_size, text_size, &tokens, 2048);
text_data, text_size, text_size,
&tokens, 2048);
switch (result){ switch (result){
case LexResult_NeedChunk: Assert(!"Invalid Path"); break; case LexResult_NeedChunk: Assert(!"Invalid Path"); break;
@ -3014,7 +3012,7 @@ style_get_color(Style *style, Cpp_Token token){
result = &style->main.float_constant_color; result = &style->main.float_constant_color;
break; break;
case CPP_TOKEN_INCLUDE_FILE: case CPP_PP_INCLUDE_FILE:
result = &style->main.include_color; result = &style->main.include_color;
break; break;

View File

@ -13,7 +13,6 @@
#include "internal_4coder_string.cpp" #include "internal_4coder_string.cpp"
#define FCPP_ALLOW_MALLOC
#include "4cpp_lexer.h" #include "4cpp_lexer.h"
#include <stdlib.h> #include <stdlib.h>
@ -585,7 +584,8 @@ typedef enum Doc_Note_Type{
DOC_RETURN, DOC_RETURN,
DOC, DOC,
DOC_SEE, DOC_SEE,
DOC_HIDE DOC_HIDE,
HIDE_MEMBERS,
} Doc_Note_Type; } Doc_Note_Type;
static String static String
@ -595,6 +595,7 @@ doc_note_string[] = {
make_lit_string("DOC"), make_lit_string("DOC"),
make_lit_string("DOC_SEE"), make_lit_string("DOC_SEE"),
make_lit_string("DOC_HIDE"), make_lit_string("DOC_HIDE"),
make_lit_string("HIDE_MEMBERS"),
}; };
static int32_t static int32_t
@ -1700,7 +1701,7 @@ try_to_use(Used_Links *used, String str){
} }
static void static void
print_struct_html(String *out, Item_Node *member){ print_struct_html(String *out, Item_Node *member, int32_t hide_children){
String name = member->name; String name = member->name;
String type = member->type; String type = member->type;
String type_postfix = member->type_postfix; String type_postfix = member->type_postfix;
@ -1712,16 +1713,22 @@ print_struct_html(String *out, Item_Node *member){
if (match_ss(type, make_lit_string("struct")) || if (match_ss(type, make_lit_string("struct")) ||
match_ss(type, make_lit_string("union"))){ match_ss(type, make_lit_string("union"))){
if (hide_children){
append_sc(out, " { /* non-public internals */ } ;");
}
else{
append_sc(out, " {<br><div style='margin-left: 8mm;'>"); append_sc(out, " {<br><div style='margin-left: 8mm;'>");
for (Item_Node *member_iter = member->first_child; for (Item_Node *member_iter = member->first_child;
member_iter != 0; member_iter != 0;
member_iter = member_iter->next_sibling){ member_iter = member_iter->next_sibling){
print_struct_html(out, member_iter); print_struct_html(out, member_iter, hide_children);
} }
append_sc(out, "</div>};<br>"); append_sc(out, "</div>};<br>");
} }
}
else{ else{
append_sc(out, ";<br>"); append_sc(out, ";<br>");
} }
@ -1971,7 +1978,6 @@ get_first_doc_chunk(String source, Doc_Chunk_Type *type){
return(chunk); return(chunk);
} }
static void static void
print_doc_description(String *out, Partition *part, String src){ print_doc_description(String *out, Partition *part, String src){
Doc_Chunk_Type type; Doc_Chunk_Type type;
@ -2056,7 +2062,6 @@ print_struct_docs(String *out, Partition *part, Item_Node *member){
append_sc(out, DOC_ITEM_HEAD_INL_CLOSE"</div>"); append_sc(out, DOC_ITEM_HEAD_INL_CLOSE"</div>");
append_sc(out, "<div style='margin-bottom: 6mm;'>"DOC_ITEM_OPEN); append_sc(out, "<div style='margin-bottom: 6mm;'>"DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, doc.main_doc);
print_doc_description(out, part, doc.main_doc); print_doc_description(out, part, doc.main_doc);
append_sc(out, DOC_ITEM_CLOSE"</div>"); append_sc(out, DOC_ITEM_CLOSE"</div>");
@ -2172,7 +2177,6 @@ print_function_docs(String *out, Partition *part, String name, String doc_string
String main_doc = doc.main_doc; String main_doc = doc.main_doc;
if (main_doc.size != 0){ if (main_doc.size != 0){
append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE DOC_ITEM_OPEN); append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, main_doc);
print_doc_description(out, part, main_doc); print_doc_description(out, part, main_doc);
append_sc(out, DOC_ITEM_CLOSE); append_sc(out, DOC_ITEM_CLOSE);
} }
@ -2281,7 +2285,6 @@ print_item(String *out, Partition *part, Used_Links *used,
append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE); append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE);
append_sc(out, DOC_ITEM_OPEN); append_sc(out, DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, main_doc);
print_doc_description(out, part, main_doc); print_doc_description(out, part, main_doc);
append_sc(out, DOC_ITEM_CLOSE); append_sc(out, DOC_ITEM_CLOSE);
} }
@ -2313,7 +2316,6 @@ print_item(String *out, Partition *part, Used_Links *used,
append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE); append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE);
append_sc(out, DOC_ITEM_OPEN); append_sc(out, DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, main_doc);
print_doc_description(out, part, main_doc); print_doc_description(out, part, main_doc);
append_sc(out, DOC_ITEM_CLOSE); append_sc(out, DOC_ITEM_CLOSE);
} }
@ -2345,7 +2347,6 @@ print_item(String *out, Partition *part, Used_Links *used,
append_sc(out, "</span></div>"); append_sc(out, "</span></div>");
append_sc(out, "<div style='margin-bottom: 6mm;'>"DOC_ITEM_OPEN); append_sc(out, "<div style='margin-bottom: 6mm;'>"DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, doc.main_doc);
print_doc_description(out, part, doc.main_doc); print_doc_description(out, part, doc.main_doc);
append_sc(out, DOC_ITEM_CLOSE"</div>"); append_sc(out, DOC_ITEM_CLOSE"</div>");
@ -2359,17 +2360,32 @@ print_item(String *out, Partition *part, Used_Links *used,
case Item_Struct: case Item_Union: case Item_Struct: case Item_Union:
{ {
Item_Node *member = item; String doc_string = item->doc_string;
int32_t hide_members = 0;
if (doc_string.size == 0){
hide_members = 1;
}
else{
for (String word = get_first_word(doc_string);
word.str;
word = get_next_word(doc_string, word)){
if (match_ss(word, make_lit_string("HIDE_MEMBERS"))){
hide_members = 1;
break;
}
}
}
// NOTE(allen): Code box // NOTE(allen): Code box
print_struct_html(out, member); print_struct_html(out, item, hide_members);
// NOTE(allen): Close the code box // NOTE(allen): Close the code box
append_sc(out, "</div>"); append_sc(out, "</div>");
// NOTE(allen): Descriptive section // NOTE(allen): Descriptive section
{ {
String doc_string = member->doc_string;
Documentation doc = {0}; Documentation doc = {0};
perform_doc_parse(part, doc_string, &doc); perform_doc_parse(part, doc_string, &doc);
@ -2378,7 +2394,6 @@ print_item(String *out, Partition *part, Used_Links *used,
append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE); append_sc(out, DOC_HEAD_OPEN"Description"DOC_HEAD_CLOSE);
append_sc(out, DOC_ITEM_OPEN); append_sc(out, DOC_ITEM_OPEN);
// TODO(allen): append_ss(out, main_doc);
print_doc_description(out, part, main_doc); print_doc_description(out, part, main_doc);
append_sc(out, DOC_ITEM_CLOSE); append_sc(out, DOC_ITEM_CLOSE);
} }
@ -2386,9 +2401,11 @@ print_item(String *out, Partition *part, Used_Links *used,
fprintf(stderr, "warning: no documentation string for %.*s\n", name.size, name.str); fprintf(stderr, "warning: no documentation string for %.*s\n", name.size, name.str);
} }
if (member->first_child){ if (!hide_members){
if (item->first_child){
append_sc(out, DOC_HEAD_OPEN"Fields"DOC_HEAD_CLOSE); append_sc(out, DOC_HEAD_OPEN"Fields"DOC_HEAD_CLOSE);
print_struct_docs(out, part, member); print_struct_docs(out, part, item);
}
} }
print_see_also(out, &doc); print_see_also(out, &doc);
@ -2528,8 +2545,10 @@ generate_custom_headers(){
{make_lit_string("ENUM") , Item_Enum } , {make_lit_string("ENUM") , Item_Enum } ,
}; };
#if 0
Meta_Unit unit = compile_meta_unit(part, type_files, ArrayCount(type_files), Meta_Unit unit = compile_meta_unit(part, type_files, ArrayCount(type_files),
type_keys, ArrayCount(type_keys)); type_keys, ArrayCount(type_keys));
#endif
// NOTE(allen): Output // NOTE(allen): Output
String out = str_alloc(part, 10 << 20); String out = str_alloc(part, 10 << 20);
@ -2890,18 +2909,22 @@ generate_custom_headers(){
"<body>" "<body>"
"<div style='font-family:Arial; margin: 0 auto; " "<div style='font-family:Arial; margin: 0 auto; "
"width: 800px; text-align: justify; line-height: 1.25;'>" "width: 800px; text-align: justify; line-height: 1.25;'>"
"<h1 style='margin-top: 5mm; margin-bottom: 5mm;'>4coder API</h1>"); "<h1 style='margin-top: 5mm; margin-bottom: 5mm;'>4cpp Lexing Library</h1>");
// "<h1 style='margin-top: 5mm; margin-bottom: 5mm;'>4coder API</h1>");
struct Section{ struct Section{
char *id_string; char *id_string;
char *display_string; char *display_string;
}; };
static int32_t msection = -1;
static Section sections[] = { static Section sections[] = {
{"introduction", "Introduction"}, {"introduction", "Introduction"},
{"4coder_systems", "4coder Systems"}, // {"4coder_systems", "4coder Systems"},
{"types_and_functions", "Types and Functions"}, // {"types_and_functions", "Types and Functions"},
{"string_library", "String Library"}, // {"string_library", "String Library"},
{"lexer_library", "Lexer Library"} {"lexer_library", "Lexer Library"}
}; };
@ -2921,13 +2944,30 @@ generate_custom_headers(){
append_sc(&out, "</ul>"); append_sc(&out, "</ul>");
#define MAJOR_SECTION "1" #define MAJOR_SECTION "1"
msection = 0;
append_sc(&out, "\n<h2 id='section_"); append_sc(&out, "\n<h2 id='section_");
append_sc(&out, sections[0].id_string); append_sc(&out, sections[msection].id_string);
append_sc(&out, "'>&sect;"MAJOR_SECTION" "); append_sc(&out, "'>&sect;"MAJOR_SECTION" ");
append_sc(&out, sections[0].display_string); append_sc(&out, sections[msection].display_string);
append_sc(&out, "</h2>");
append_sc(&out,
"<div>"
"<p>This is the documentation for the 4cpp lexer version 1.0. "
"The documentation is the newest piece of this lexer project "
"so it may still have problems. What is here should be correct "
"and mostly complete.</p>"
"<p>If you have questions or discover errors please contact "
"<span style='"CODE_STYLE"'>editor@4coder.net</span> or "
"to get help from community members you can post on the "
"4coder forums hosted on handmade.network at "
"<span style='"CODE_STYLE"'>4coder.handmade.network</span></p>"
"</div>");
#if 0
append_sc(&out, append_sc(&out,
"</h2>"
"<div>" "<div>"
"<p>This is the documentation for " VERSION " The documentation is still " "<p>This is the documentation for " VERSION " The documentation is still "
"under construction so some of the links are linking to sections that " "under construction so some of the links are linking to sections that "
@ -2942,11 +2982,13 @@ generate_custom_headers(){
#undef MAJOR_SECTION #undef MAJOR_SECTION
#define MAJOR_SECTION "2" #define MAJOR_SECTION "2"
msection = 1;
// TODO(allen): Write the 4coder system descriptions. // TODO(allen): Write the 4coder system descriptions.
append_sc(&out, "\n<h2 id='section_"); append_sc(&out, "\n<h2 id='section_");
append_sc(&out, sections[1].id_string); append_sc(&out, sections[msection].id_string);
append_sc(&out, "'>&sect;"MAJOR_SECTION" "); append_sc(&out, "'>&sect;"MAJOR_SECTION" ");
append_sc(&out, sections[1].display_string); append_sc(&out, sections[msection].display_string);
append_sc(&out, "</h2>"); append_sc(&out, "</h2>");
append_sc(&out, "<div><i>Coming Soon</i><div>"); append_sc(&out, "<div><i>Coming Soon</i><div>");
@ -2955,9 +2997,9 @@ generate_custom_headers(){
#define MAJOR_SECTION "3" #define MAJOR_SECTION "3"
append_sc(&out, "\n<h2 id='section_"); append_sc(&out, "\n<h2 id='section_");
append_sc(&out, sections[2].id_string); append_sc(&out, sections[msection].id_string);
append_sc(&out, "'>&sect;"MAJOR_SECTION" "); append_sc(&out, "'>&sect;"MAJOR_SECTION" ");
append_sc(&out, sections[2].display_string); append_sc(&out, sections[msection].display_string);
append_sc(&out, "</h2>"); append_sc(&out, "</h2>");
#undef SECTION #undef SECTION
@ -3016,9 +3058,9 @@ generate_custom_headers(){
#define MAJOR_SECTION "4" #define MAJOR_SECTION "4"
append_sc(&out, "\n<h2 id='section_"); append_sc(&out, "\n<h2 id='section_");
append_sc(&out, sections[3].id_string); append_sc(&out, sections[msection].id_string);
append_sc(&out, "'>&sect;"MAJOR_SECTION" "); append_sc(&out, "'>&sect;"MAJOR_SECTION" ");
append_sc(&out, sections[3].display_string); append_sc(&out, sections[msection].display_string);
append_sc(&out, "</h2>"); append_sc(&out, "</h2>");
#undef SECTION #undef SECTION
@ -3051,10 +3093,17 @@ generate_custom_headers(){
#undef MAJOR_SECTION #undef MAJOR_SECTION
#define MAJOR_SECTION "5" #define MAJOR_SECTION "5"
#endif
#undef MAJOR_SECTION
#define MAJOR_SECTION "2"
msection = 1;
append_sc(&out, "\n<h2 id='section_"); append_sc(&out, "\n<h2 id='section_");
append_sc(&out, sections[4].id_string); append_sc(&out, sections[msection].id_string);
append_sc(&out, "'>&sect;"MAJOR_SECTION" "); append_sc(&out, "'>&sect;"MAJOR_SECTION" ");
append_sc(&out, sections[4].display_string); append_sc(&out, sections[msection].display_string);
append_sc(&out, "</h2>"); append_sc(&out, "</h2>");
#undef SECTION #undef SECTION
@ -3062,7 +3111,26 @@ generate_custom_headers(){
append_sc(&out, "<h3>&sect;"SECTION" Lexer Intro</h3>"); append_sc(&out, "<h3>&sect;"SECTION" Lexer Intro</h3>");
append_sc(&out, "<div><i>Coming Soon</i><div>"); append_sc(&out,
"<div>"
"The 4cpp lexer system provides a polished, fast, flexible system that "
"takes in C/C++ and outputs a tokenization of the text data. There are "
"two API levels. One level is setup to let you easily get a tokenization "
"of the file. This level manages memory for you with malloc to make it "
"as fast as possible to start getting your tokens. The second level "
"enables deep integration by allowing control over allocation, data "
"chunking, and output rate control.<br><br>"
"To use the quick setup API you simply include 4cpp_lexer.h and read the "
"documentation at <a href='#cpp_lex_file_doc'>cpp_lex_file</a>.<br><br>"
"To use the the fancier API include 4cpp_lexer.h and read the "
"documentation at <a href='#cpp_lex_step_doc'>cpp_lex_step</a>. "
"If you want to be absolutely sure you are not including malloc into "
"your program you can define FCPP_FORBID_MALLOC before the include and "
"the \"step\" API will continue to work.<br><br>"
"There are a few more features in 4cpp that are not documented yet. "
"You are free to try to use these, but I am not totally sure they are "
"ready yet, and when they are they will be documented."
"</div>");
#undef SECTION #undef SECTION
#define SECTION MAJOR_SECTION".2" #define SECTION MAJOR_SECTION".2"