From 1b03a6a2bed6f3731b6733234953b8318d74e8e1 Mon Sep 17 00:00:00 2001 From: Allen Webster Date: Tue, 20 Sep 2016 14:10:05 -0400 Subject: [PATCH] added clparams to app step on linux; got the lexer API working with a fixed with tb --- 4coder_API.html | 17 +- 4cpp_lexer.h | 122 ++++++------- 4cpp_lexer_types.h | 3 +- 4ed_file_view.cpp | 22 +-- linux_4ed.cpp | 3 +- test/dll_reader.cpp | 412 ++++++++++++++++++++++---------------------- 6 files changed, 276 insertions(+), 303 deletions(-) diff --git a/4coder_API.html b/4coder_API.html index 9115a47a..efc26962 100644 --- a/4coder_API.html +++ b/4coder_API.html @@ -292,7 +292,7 @@ It should point at the String in the first element of the array.
count
The count parameter specifies the number of elements in the str_set array.
str
The str parameter specifies the string to match against the str_set.
match_index
If this call succeeds match_index is filled with the index into str_set where the match occurred.
Description
This call tries to see if str matches any of the strings in str_set. If there is a match the call succeeds and returns non-zero. The matching rule is equivalent to the matching rule for match.

See Also
match

§4.3.116: string_set_match

fstr_bool string_set_match(
String *str_set,
int32_t count,
String str,
int32_t *match_index
)
Parameters
str_set
The str_set parameter is an array of String structs specifying matchable strings.
count
The count parameter specifies the number of String structs in the str_set array.
str
The str parameter specifies the string to match against the str_set.
match_index
If this call succeeds match_index is filled with the index into str_set where the match occurred.
Description
This call tries to see if str matches any of the strings in str_set. If there is a match the call succeeds and returns non-zero. The matching rule is equivalent to the matching rule for match.

See Also
match

-

§5 Lexer Library

§5.1 Lexer Intro

The 4cpp lexer system provides a polished, fast, flexible system that takes in C/C++ and outputs a tokenization of the text data. There are two API levels. One level is setup to let you easily get a tokenization of the file. This level manages memory for you with malloc to make it as fast as possible to start getting your tokens. The second level enables deep integration by allowing control over allocation, data chunking, and output rate control.

To use the quick setup API you simply include 4cpp_lexer.h and read the documentation at cpp_lex_file.

To use the the fancier API include 4cpp_lexer.h and read the documentation at cpp_lex_step. If you want to be absolutely sure you are not including malloc into your program you can define FCPP_FORBID_MALLOC before the include and the "step" API will continue to work.

There are a few more features in 4cpp that are not documented yet. You are free to try to use these, but I am not totally sure they are ready yet, and when they are they will be documented.

§5.2 Lexer Function List

§5.3 Lexer Types List

§5.4 Lexer Function Descriptions

§5.4.1: cpp_get_token

Cpp_Get_Token_Result cpp_get_token(
Cpp_Token_Array *token_array_in,
int32_t pos
)
Parameters
token_array
The array of tokens from which to get a token.
pos
The position, measured in bytes, to get the token for.
Return
A Cpp_Get_Token_Result struct is returned containing the index +

§5 Lexer Library

§5.1 Lexer Intro

The 4cpp lexer system provides a polished, fast, flexible system that takes in C/C++ and outputs a tokenization of the text data. There are two API levels. One level is setup to let you easily get a tokenization of the file. This level manages memory for you with malloc to make it as fast as possible to start getting your tokens. The second level enables deep integration by allowing control over allocation, data chunking, and output rate control.

To use the quick setup API you simply include 4cpp_lexer.h and read the documentation at cpp_lex_file.

To use the the fancier API include 4cpp_lexer.h and read the documentation at cpp_lex_step. If you want to be absolutely sure you are not including malloc into your program you can define FCPP_FORBID_MALLOC before the include and the "step" API will continue to work.

There are a few more features in 4cpp that are not documented yet. You are free to try to use these, but I am not totally sure they are ready yet, and when they are they will be documented.

§5.2 Lexer Function List

§5.3 Lexer Types List

§5.4 Lexer Function Descriptions

§5.4.1: cpp_get_token

Cpp_Get_Token_Result cpp_get_token(
Cpp_Token_Array *token_array_in,
int32_t pos
)
Parameters
token_array
The array of tokens from which to get a token.
pos
The position, measured in bytes, to get the token for.
Return
A Cpp_Get_Token_Result struct is returned containing the index of a token and a flag indicating whether the pos is contained in the token or in whitespace after the token.
Description
This call performs a binary search over all of the tokens looking for the token that contains the specified position. If the position @@ -328,29 +328,22 @@ system says it needs a chunk. You may switch to or modify the output array in b The most basic use of this system is to get it all done in one big chunk and try to allocate a nearly "infinite" output array so that it will not run out of memory. This way you can get the entire job done in one call and then just assert to make sure it returns LexResult_Finished to you:

-

Cpp_Token_Array lex_file(char *file_name){
    File_Data file = read_whole_file(file_name);
    
    char *temp = (char*)malloc(4096); // hopefully big enough
    Cpp_Lex_Data lex_state = cpp_lex_data_init(temp);
    
    Cpp_Token_Array array = {0};
    array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough
    array.max_count = (1 << 20)/sizeof(Cpp_Token);
    
    Cpp_Lex_Result result =
        cpp_lex_step(&lex_state, file.data, file.size, file.size,
                     &array, NO_OUT_LIMIT);
    Assert(result == LexResult_Finished);
    
    free(temp);
    
    return(array);
}
See Also
Cpp_Lex_Data
Cpp_Lex_Result

§5.4.3: cpp_lex_data_init

Cpp_Lex_Data cpp_lex_data_init(
char *mem_buffer
)
Parameters
mem_buffer
The memory to use for initializing the lex state's temp memory buffer.
Return
A brand new lex state ready to begin lexing a file from the beginning.
Description
Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct. +

Cpp_Token_Array lex_file(char *file_name){
    File_Data file = read_whole_file(file_name);
    
    char *temp = (char*)malloc(4096); // hopefully big enough
    Cpp_Lex_Data lex_state = cpp_lex_data_init(temp);
    
    Cpp_Token_Array array = {0};
    array.tokens = (Cpp_Token*)malloc(1 << 20); // hopefully big enough
    array.max_count = (1 << 20)/sizeof(Cpp_Token);
    
    Cpp_Lex_Result result =
        cpp_lex_step(&lex_state, file.data, file.size, file.size,
                     &array, NO_OUT_LIMIT);
    Assert(result == LexResult_Finished);
    
    free(temp);
    
    return(array);
}
See Also
Cpp_Lex_Data
Cpp_Lex_Result

§5.4.3: cpp_lex_data_init

Cpp_Lex_Data cpp_lex_data_init(

)
Return
A brand new lex state ready to begin lexing a file from the beginning.
Description
Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct. The system needs a temporary buffer that is as long as the longest token. 4096 is usually enough but the buffer is not checked, so to be 100% bullet proof it has to be the same length as the file being lexed.


§5.4.4: cpp_lex_data_temp_size

int32_t cpp_lex_data_temp_size(
Cpp_Lex_Data *lex_data
)
Parameters
lex_data
The lex state from which to get the temporary buffer size.
Description
This call gets the current size of the temporary buffer in the lexer state so that you can move to a new temporary buffer by copying the data over.

See Also
cpp_lex_data_temp_read
cpp_lex_data_new_temp

§5.4.5: cpp_lex_data_temp_read

void cpp_lex_data_temp_read(
Cpp_Lex_Data *lex_data,
char *out_buffer
)
Parameters
lex_data
The lex state from which to read the temporary buffer.
out_buffer
The buffer into which the contents of the temporary buffer will be written. -The size of the buffer must be at least the size as returned by cpp_lex_data_temp_size.
Description
This call reads the current contents of the temporary buffer.

See Also
cpp_lex_data_temp_size
cpp_lex_data_new_temp

§5.4.6: cpp_lex_data_new_temp

void cpp_lex_data_new_temp(
Cpp_Lex_Data *lex_data,
char *new_buffer
)
Parameters
lex_data
The lex state that will receive the new temporary buffer.
new_buffer
The new temporary buffer that has the same contents as the old temporary buffer.
Description
This call can be used to set a new temporary buffer for the lex state. In cases where you want to -discontinue lexing, store the state, and resume later. In such a situation it may be necessary for you -to free the temp buffer that was originally used to make the lex state. This call allows you to supply -a new temp buffer when you are ready to resume lexing.

-However the new buffer needs to have the same contents the old buffer had. To ensure this you have to -use cpp_lex_data_temp_size and cpp_lex_data_temp_read to get the relevant contents of the temp buffer -before you free it.

See Also
cpp_lex_data_temp_size
cpp_lex_data_temp_read

§5.4.7: cpp_get_relex_range

Cpp_Relex_Range cpp_get_relex_range(
Cpp_Token_Array *array,
int32_t start_pos,
int32_t end_pos
)
Parameters
array
A pointer to the token array that will be modified by the relex, +The size of the buffer must be at least the size as returned by cpp_lex_data_temp_size.
Description
This call reads the current contents of the temporary buffer.

See Also
cpp_lex_data_temp_size
cpp_lex_data_new_temp

§5.4.6: cpp_lex_data_new_temp_DEP

void cpp_lex_data_new_temp_DEP(
Cpp_Lex_Data *lex_data,
char *new_buffer
)

§5.4.7: cpp_get_relex_range

Cpp_Relex_Range cpp_get_relex_range(
Cpp_Token_Array *array,
int32_t start_pos,
int32_t end_pos
)
Parameters
array
A pointer to the token array that will be modified by the relex, this array should already contain the tokens for the previous state of the file.
start_pos
The start position of the edited region of the file. The start and end points are based on the edited region of the file before the edit.
end_pos
The end position of the edited region of the file. In particular, end_pos is the first character after the edited region not effected by the edit. Thus if the edited region contained one character end_pos - start_pos should equal 1. -The start and end points are based on the edited region of the file before the edit.

§5.4.8: cpp_relex_init

Cpp_Relex_Data cpp_relex_init(
Cpp_Token_Array *array,
int32_t start_pos,
int32_t end_pos,
int32_t character_shift_amount,
char *spare
)
Parameters
array
A pointer to the token array that will be modified by the relex, +The start and end points are based on the edited region of the file before the edit.

§5.4.8: cpp_relex_init

Cpp_Relex_Data cpp_relex_init(
Cpp_Token_Array *array,
int32_t start_pos,
int32_t end_pos,
int32_t character_shift_amount
)
Parameters
array
A pointer to the token array that will be modified by the relex, this array should already contain the tokens for the previous state of the file.
start_pos
The start position of the edited region of the file. The start and end points are based on the edited region of the file before the edit.
end_pos
The end position of the edited region of the file. In particular, end_pos is the first character after the edited region not effected by the edit. Thus if the edited region contained one character end_pos - start_pos should equal 1. -The start and end points are based on the edited region of the file before the edit.
character_shift_amount
The shift in the characters after the edited region.
spare
The spare space for the lexing state. -Should be big enough to store the largest token in the file.
Return
Returns a partially initialized relex state.
Description
This call does the first setup step of initializing a relex state. To finish initializing the relex state +The start and end points are based on the edited region of the file before the edit.
character_shift_amount
The shift in the characters after the edited region.
Return
Returns a partially initialized relex state.
Description
This call does the first setup step of initializing a relex state. To finish initializing the relex state you must tell the state about the positioning of the first chunk it will be fed. There are two methods of doing this, the direct method is with cpp_relex_declare_first_chunk_position, the method that is often more convenient is with cpp_relex_is_start_chunk. If the file is not chunked the second step of initialization can be skipped.

See Also
cpp_relex_declare_first_chunk_position
cpp_relex_is_start_chunk

§5.4.9: cpp_relex_start_position

int32_t cpp_relex_start_position(
Cpp_Relex_Data *S_ptr
)
Parameters
S_ptr
Return
Returns the first position in the file the relexer wants to read. This is usually a position slightly diff --git a/4cpp_lexer.h b/4cpp_lexer.h index 7cec85cd..f5c1d879 100644 --- a/4cpp_lexer.h +++ b/4cpp_lexer.h @@ -314,7 +314,9 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz S.chunk_pos += size; DrYield(4, LexResult_NeedChunk); } - else break; + else{ + break; + } } --S.pos; if (S.pp_state >= LSPP_count){ @@ -333,7 +335,7 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz for (; S.fsm.state < LS_count && S.pos < end_pos;){ c = chunk[S.pos++]; - S.tb[S.tb_pos++] = c; + S.tb[(S.tb_pos++) & (sizeof(S.tb)-1)] = c; int32_t i = S.fsm.state + eq_classes[c]; S.fsm.state = fsm_table[i]; @@ -346,7 +348,9 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz S.chunk_pos += size; DrYield(3, LexResult_NeedChunk); } - else break; + else{ + break; + } } Assert(S.fsm.emit_token == 1); @@ -426,27 +430,29 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz int32_t word_size = S.pos - S.token_start; - if (S.pp_state == LSPP_body_if){ - if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){ - S.token.type = CPP_PP_DEFINED; - S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; + if (word_size < sizeof(S.tb)){ + if (S.pp_state == LSPP_body_if){ + if (match_ss(make_string(S.tb, word_size), make_lit_string("defined"))){ + S.token.type = CPP_PP_DEFINED; + S.token.flags = CPP_TFLAG_IS_OPERATOR | CPP_TFLAG_IS_KEYWORD; + break; + } + } + + int32_t sub_match = -1; + string_set_match_table(keywords, sizeof(*keywords), ArrayCount(keywords), + make_string(S.tb, S.tb_pos-1), &sub_match); + + if (sub_match != -1){ + String_And_Flag data = keywords[sub_match]; + S.token.type = (Cpp_Token_Type)data.flags; + S.token.flags = CPP_TFLAG_IS_KEYWORD; break; } } - int32_t sub_match = -1; - string_set_match_table(keywords, sizeof(*keywords), ArrayCount(keywords), - make_string(S.tb, S.tb_pos-1), &sub_match); - - if (sub_match != -1){ - String_And_Flag data = keywords[sub_match]; - S.token.type = (Cpp_Token_Type)data.flags; - S.token.flags = CPP_TFLAG_IS_KEYWORD; - } - else{ - S.token.type = CPP_TOKEN_IDENTIFIER; - S.token.flags = 0; - } + S.token.type = CPP_TOKEN_IDENTIFIER; + S.token.flags = 0; }break; case LS_pound: @@ -471,28 +477,30 @@ cpp_lex_nonalloc_null_end_no_limit(Cpp_Lex_Data *S_ptr, char *chunk, int32_t siz { --S.pos; - int32_t pos = S.tb_pos-1; - int32_t i = 1; - for (;i < pos; ++i){ - if (S.tb[i] != ' '){ + if (S.tb_pos < sizeof(S.tb)){ + int32_t pos = S.tb_pos-1; + int32_t i = 1; + for (;i < pos; ++i){ + if (S.tb[i] != ' '){ + break; + } + } + + int32_t sub_match = -1; + string_set_match_table(preprops, sizeof(*preprops), ArrayCount(preprops), + make_string(S.tb+i, pos-i), &sub_match); + + if (sub_match != -1){ + String_And_Flag data = preprops[sub_match]; + S.token.type = (Cpp_Token_Type)data.flags; + S.token.flags = CPP_TFLAG_PP_DIRECTIVE; + S.pp_state = (uint8_t)cpp_pp_directive_to_state(S.token.type); break; } } - int32_t sub_match = -1; - string_set_match_table(preprops, sizeof(*preprops), ArrayCount(preprops), - make_string(S.tb+i, pos-i), &sub_match); - - if (sub_match != -1){ - String_And_Flag data = preprops[sub_match]; - S.token.type = (Cpp_Token_Type)data.flags; - S.token.flags = CPP_TFLAG_PP_DIRECTIVE; - S.pp_state = (uint8_t)cpp_pp_directive_to_state(S.token.type); - } - else{ - S.token.type = CPP_TOKEN_JUNK; - S.token.flags = 0; - } + S.token.type = CPP_TOKEN_JUNK; + S.token.flags = 0; }break; case LS_number: @@ -1034,8 +1042,7 @@ DOC_SEE(Cpp_Lex_Result) } FCPP_LINK Cpp_Lex_Data -cpp_lex_data_init(char *mem_buffer)/* -DOC_PARAM(mem_buffer, The memory to use for initializing the lex state's temp memory buffer.) +cpp_lex_data_init()/* DOC_RETURN(A brand new lex state ready to begin lexing a file from the beginning.) DOC(Creates a new lex state in the form of a Cpp_Lex_Data struct and returns the struct. @@ -1044,7 +1051,6 @@ enough but the buffer is not checked, so to be 100% bullet proof it has to be th as the file being lexed.) */{ Cpp_Lex_Data data = {0}; - data.tb = mem_buffer; return(data); } @@ -1079,24 +1085,8 @@ DOC_SEE(cpp_lex_data_new_temp) } FCPP_LINK void -cpp_lex_data_new_temp(Cpp_Lex_Data *lex_data, char *new_buffer)/* -DOC_PARAM(lex_data, The lex state that will receive the new temporary buffer.) -DOC_PARAM(new_buffer, The new temporary buffer that has the same contents as the old temporary buffer.) - -DOC(This call can be used to set a new temporary buffer for the lex state. In cases where you want to -discontinue lexing, store the state, and resume later. In such a situation it may be necessary for you -to free the temp buffer that was originally used to make the lex state. This call allows you to supply -a new temp buffer when you are ready to resume lexing. - -However the new buffer needs to have the same contents the old buffer had. To ensure this you have to -use cpp_lex_data_temp_size and cpp_lex_data_temp_read to get the relevant contents of the temp buffer -before you free it.) - -DOC_SEE(cpp_lex_data_temp_size) -DOC_SEE(cpp_lex_data_temp_read) -*/{ - lex_data->tb = new_buffer; -} +cpp_lex_data_new_temp_DEP(Cpp_Lex_Data *lex_data, char *new_buffer) +/*DOC(Deprecated in 4cpp Lexer 1.0.1*/{} FCPP_INTERNAL char cpp_token_get_pp_state(uint16_t bitfield){ @@ -1162,7 +1152,7 @@ The start and end points are based on the edited region of the file before the e } FCPP_LINK Cpp_Relex_Data -cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount, char *spare) +cpp_relex_init(Cpp_Token_Array *array, int32_t start_pos, int32_t end_pos, int32_t character_shift_amount) /* DOC_PARAM(array, A pointer to the token array that will be modified by the relex, this array should already contain the tokens for the previous state of the file.) @@ -1173,8 +1163,6 @@ In particular, end_pos is the first character after the edited region not effect Thus if the edited region contained one character end_pos - start_pos should equal 1. The start and end points are based on the edited region of the file before the edit.) DOC_PARAM(character_shift_amount, The shift in the characters after the edited region.) -DOC_PARAM(spare, The spare space for the lexing state. -Should be big enough to store the largest token in the file.) DOC_RETURN(Returns a partially initialized relex state.) DOC(This call does the first setup step of initializing a relex state. To finish initializing the relex state @@ -1200,7 +1188,7 @@ DOC_SEE(cpp_relex_is_start_chunk) state.character_shift_amount = character_shift_amount; - state.lex = cpp_lex_data_init(spare); + state.lex = cpp_lex_data_init(); state.lex.pp_state = cpp_token_get_pp_state(array->tokens[state.start_token_index].state_flags); state.lex.pos = state.relex_start_position; @@ -1348,6 +1336,7 @@ DOC_SEE(cpp_relex_abort) */{ Cpp_Relex_Data S = *S_ptr; + Cpp_Lex_Result step_result = LexResult_Finished; switch (S.__pc__){ DrCase(1); @@ -1359,7 +1348,7 @@ DOC_SEE(cpp_relex_abort) // TODO(allen): This can be better I suspect. for (;;){ - Cpp_Lex_Result step_result = + step_result = cpp_lex_nonalloc_no_null_out_limit(&S.lex, chunk, chunk_size, full_size, relex_array, 1); @@ -1564,8 +1553,7 @@ Cpp_Token_Array lex_file(char *file_name){ ) DOC_SEE(cpp_make_token_array) */{ - Cpp_Lex_Data S = {0}; - S.tb = (char*)malloc(size); + Cpp_Lex_Data S = cpp_lex_data_init(); int32_t quit = 0; char empty = 0; @@ -1600,8 +1588,6 @@ DOC_SEE(cpp_make_token_array) }break; } } - - free(S.tb); } #endif diff --git a/4cpp_lexer_types.h b/4cpp_lexer_types.h index 5f337b37..e5632174 100644 --- a/4cpp_lexer_types.h +++ b/4cpp_lexer_types.h @@ -332,8 +332,7 @@ The internals of the lex state should not be treated as a part of the public API DOC_SEE(cpp_lex_data_init) HIDE_MEMBERS()*/ struct Cpp_Lex_Data{ - char *tb; - + char tb[32]; int32_t tb_pos; int32_t token_start; diff --git a/4ed_file_view.cpp b/4ed_file_view.cpp index 1f69f1d6..36aac0bb 100644 --- a/4ed_file_view.cpp +++ b/4ed_file_view.cpp @@ -1127,20 +1127,18 @@ Job_Callback_Sig(job_full_lex){ i32 buffer_size = (text_size + 3)&(~3); - while (memory->size < buffer_size*2){ + while (memory->size < buffer_size){ system->grow_thread_memory(memory); } - char *tb = (char*)memory->data; - Cpp_Token_Array tokens; - tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size); - tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token); + tokens.tokens = (Cpp_Token*)(memory->data); + tokens.max_count = memory->size / sizeof(Cpp_Token); tokens.count = 0; b32 still_lexing = 1; - Cpp_Lex_Data lex = cpp_lex_data_init(tb); + Cpp_Lex_Data lex = cpp_lex_data_init(); // TODO(allen): deduplicate this against relex char *chunks[3]; @@ -1165,18 +1163,15 @@ Job_Callback_Sig(job_full_lex){ cpp_lex_step(&lex, chunk, chunk_size, text_size, &tokens, 2048); switch (result){ - case LexResult_NeedChunk: - ++chunk_index; - break; + case LexResult_NeedChunk: ++chunk_index; break; case LexResult_NeedTokenMemory: if (system->check_cancel(thread)){ return; } system->grow_thread_memory(memory); - lex.tb = (char*)memory->data; - tokens.tokens = (Cpp_Token*)((char*)memory->data + buffer_size); - tokens.max_count = (memory->size - buffer_size) / sizeof(Cpp_Token); + tokens.tokens = (Cpp_Token*)(memory->data); + tokens.max_count = memory->size / sizeof(Cpp_Token); break; case LexResult_HitTokenLimit: @@ -1294,9 +1289,8 @@ file_relex_parallel(System_Functions *system, relex_array.tokens = push_array(part, Cpp_Token, relex_array.max_count); i32 size = buffer_size(buffer); - char *spare = push_array(part, char, size+1); - Cpp_Relex_Data state = cpp_relex_init(array, start_i, end_i, shift_amount, spare); + Cpp_Relex_Data state = cpp_relex_init(array, start_i, end_i, shift_amount); char *chunks[3]; i32 chunk_sizes[3]; diff --git a/linux_4ed.cpp b/linux_4ed.cpp index 7d4cb425..ca308685 100644 --- a/linux_4ed.cpp +++ b/linux_4ed.cpp @@ -3470,7 +3470,8 @@ main(int argc, char **argv) &linuxvars.target, &memory_vars, &linuxvars.input, - &result + &result, + clparams ); if(result.perform_kill){ diff --git a/test/dll_reader.cpp b/test/dll_reader.cpp index 3871d591..11f2ee6f 100644 --- a/test/dll_reader.cpp +++ b/test/dll_reader.cpp @@ -1,206 +1,206 @@ -/* - * Mr. 4th Dimention - Allen Webster - * - * 12.12.2014 - * - * Application layer for project codename "4ed" - * - */ - -// TOP - -#include "4ed_meta.h" -#include "4ed_dll_reader.h" -#include "4ed_dll_reader.cpp" - -i32 -compare(char *a, char *b, i32 len){ - i32 result; - char *e; - - result = 0; - e = a + len; - for (;a < e && *a == *b; ++a, ++b); - if (a < e){ - if (*a < *b) result = -1; - else result = 1; - } - - return(result); -} - -#include -#include -#include - -Data -load_file(char *filename){ - Data result; - FILE * file; - - result = {}; - file = fopen(filename, "rb"); - if (!file){ - printf("file %s not found\n", filename); - } - else{ - fseek(file, 0, SEEK_END); - result.size = ftell(file); - fseek(file, 0, SEEK_SET); - result.data = (byte*)malloc(result.size); - fread(result.data, 1, result.size, file); - fclose(file); - } - - return(result); -} - -void -show_reloc_block(Data file, DLL_Data *dll, PE_Section_Definition *reloc_section){ - byte *base; - Relocation_Block_Header *header; - Relocation_Block_Entry *entry; - u32 cursor; - u32 bytes_in_table; - u32 block_end; - - base = file.data + reloc_section->disk_location; - if (dll->is_64bit) bytes_in_table = dll->opt_header_64->data_directory[image_dir_base_reloc_table].size; - else bytes_in_table = dll->opt_header_32->data_directory[image_dir_base_reloc_table].size; - - for (cursor = 0; cursor < bytes_in_table;){ - header = (Relocation_Block_Header*)(base + cursor); - block_end = cursor + header->block_size; - cursor += sizeof(Relocation_Block_Header); - - printf("block-size: %d\n", header->block_size); - printf("offset-base: %d\n", header->page_base_offset); - - for (;cursor < block_end;){ - entry = (Relocation_Block_Entry*)(base + cursor); - cursor += sizeof(Relocation_Block_Entry); - printf("reloc: type %d offset %d\n", - (i32)(entry->entry & reloc_entry_type_mask) >> reloc_entry_type_shift, - (i32)(entry->entry & reloc_entry_offset_mask)); - } - } -} - -typedef int32_t (Function)(int a, int b); - -#include - -#define UseWinDll 0 - -int -main(int argc, char **argv){ - Function *func; - i32 x; - -#if UseWinDll - HMODULE module; - - if (argc < 2){ - printf("usage: dll_reader \n"); - exit(1); - } - - module = LoadLibraryA(argv[1]); - - if (!module){ - printf("failed to load file %s\n", argv[1]); - exit(1); - } - - func = (Function*)GetProcAddress(module, "test_func"); - -#else - Data file, img; - DLL_Data dll; - DLL_Loaded dll_loaded; - PE_Section_Definition *section_def; - i32 error; - i32 i; - - if (argc < 2){ - printf("usage: dll_reader \n"); - exit(1); - } - - file = load_file(argv[1]); - - if (!file.data){ - printf("failed to load file %s\n", argv[1]); - exit(1); - } - - if (!dll_parse_headers(file, &dll, &error)){ - printf("header error %d\n", error); - exit(1); - } - - printf("this appears to be a dll\n"); - - printf("symbol-count: %d symbol-addr: %d\n", - dll.coff_header->number_of_symbols, - dll.coff_header->pointer_to_symbol_table); - - if (dll.is_64bit) printf("64bit\n"); - else printf("32bit\n"); - - printf("built for machine: %s\n", dll_machine_type_str(dll.coff_header->machine, 0)); - - if (dll.is_64bit){ - printf("number of directories: %d\n", dll.opt_header_64->number_of_rva_and_sizes); - } - else{ - printf("number of directories: %d\n", dll.opt_header_32->number_of_rva_and_sizes); - } - - printf("\nbeginning section decode now\n"); - - section_def = dll.section_defs; - for (i = 0; i < dll.coff_header->number_of_sections; ++i, ++section_def){ - if (section_def->name[7] == 0){ - printf("name: %s\n", section_def->name); - } - else{ - printf("name: %.*s\n", 8, section_def->name); - } - printf("img-size: %d img-loc: %d\ndisk-size: %d disk-loc: %d\n", - section_def->loaded_size, section_def->loaded_location, - section_def->disk_size, section_def->disk_location); - - if (compare(section_def->name, ".reloc", 6) == 0){ - show_reloc_block(file, &dll, section_def); - } - } - - img.size = dll_total_loaded_size(&dll); - printf("image size: %d\n", img.size); - - img.data = (byte*) - VirtualAlloc((LPVOID)Tbytes(3), img.size, - MEM_COMMIT | MEM_RESERVE, - PAGE_READWRITE); - dll_load(img, &dll_loaded, file, &dll); - - DWORD _extra; - VirtualProtect(img.data + dll_loaded.text_start, - dll_loaded.text_size, - PAGE_EXECUTE_READ, - &_extra); - - func = (Function*)dll_load_function(&dll_loaded, "test_func", 9); -#endif - - x = func(10, 20); - printf("%d\n", x); - - x = func(1, 2); - printf("%d\n", x); - - return(0); -} - -// BOTTOM +/* + * Mr. 4th Dimention - Allen Webster + * + * 12.12.2014 + * + * Application layer for project codename "4ed" + * + */ + +// TOP + +#include "4ed_meta.h" +#include "4ed_dll_reader.h" +#include "4ed_dll_reader.cpp" + +i32 +compare(char *a, char *b, i32 len){ + i32 result; + char *e; + + result = 0; + e = a + len; + for (;a < e && *a == *b; ++a, ++b); + if (a < e){ + if (*a < *b) result = -1; + else result = 1; + } + + return(result); +} + +#include +#include +#include + +Data +load_file(char *filename){ + Data result; + FILE * file; + + result = {}; + file = fopen(filename, "rb"); + if (!file){ + printf("file %s not found\n", filename); + } + else{ + fseek(file, 0, SEEK_END); + result.size = ftell(file); + fseek(file, 0, SEEK_SET); + result.data = (byte*)malloc(result.size); + fread(result.data, 1, result.size, file); + fclose(file); + } + + return(result); +} + +void +show_reloc_block(Data file, DLL_Data *dll, PE_Section_Definition *reloc_section){ + byte *base; + Relocation_Block_Header *header; + Relocation_Block_Entry *entry; + u32 cursor; + u32 bytes_in_table; + u32 block_end; + + base = file.data + reloc_section->disk_location; + if (dll->is_64bit) bytes_in_table = dll->opt_header_64->data_directory[image_dir_base_reloc_table].size; + else bytes_in_table = dll->opt_header_32->data_directory[image_dir_base_reloc_table].size; + + for (cursor = 0; cursor < bytes_in_table;){ + header = (Relocation_Block_Header*)(base + cursor); + block_end = cursor + header->block_size; + cursor += sizeof(Relocation_Block_Header); + + printf("block-size: %d\n", header->block_size); + printf("offset-base: %d\n", header->page_base_offset); + + for (;cursor < block_end;){ + entry = (Relocation_Block_Entry*)(base + cursor); + cursor += sizeof(Relocation_Block_Entry); + printf("reloc: type %d offset %d\n", + (i32)(entry->entry & reloc_entry_type_mask) >> reloc_entry_type_shift, + (i32)(entry->entry & reloc_entry_offset_mask)); + } + } +} + +typedef int32_t (Function)(int a, int b); + +#include + +#define UseWinDll 0 + +int +main(int argc, char **argv){ + Function *func; + i32 x; + +#if UseWinDll + HMODULE module; + + if (argc < 2){ + printf("usage: dll_reader \n"); + exit(1); + } + + module = LoadLibraryA(argv[1]); + + if (!module){ + printf("failed to load file %s\n", argv[1]); + exit(1); + } + + func = (Function*)GetProcAddress(module, "test_func"); + +#else + Data file, img; + DLL_Data dll; + DLL_Loaded dll_loaded; + PE_Section_Definition *section_def; + i32 error; + i32 i; + + if (argc < 2){ + printf("usage: dll_reader \n"); + exit(1); + } + + file = load_file(argv[1]); + + if (!file.data){ + printf("failed to load file %s\n", argv[1]); + exit(1); + } + + if (!dll_parse_headers(file, &dll, &error)){ + printf("header error %d\n", error); + exit(1); + } + + printf("this appears to be a dll\n"); + + printf("symbol-count: %d symbol-addr: %d\n", + dll.coff_header->number_of_symbols, + dll.coff_header->pointer_to_symbol_table); + + if (dll.is_64bit) printf("64bit\n"); + else printf("32bit\n"); + + printf("built for machine: %s\n", dll_machine_type_str(dll.coff_header->machine, 0)); + + if (dll.is_64bit){ + printf("number of directories: %d\n", dll.opt_header_64->number_of_rva_and_sizes); + } + else{ + printf("number of directories: %d\n", dll.opt_header_32->number_of_rva_and_sizes); + } + + printf("\nbeginning section decode now\n"); + + section_def = dll.section_defs; + for (i = 0; i < dll.coff_header->number_of_sections; ++i, ++section_def){ + if (section_def->name[7] == 0){ + printf("name: %s\n", section_def->name); + } + else{ + printf("name: %.*s\n", 8, section_def->name); + } + printf("img-size: %d img-loc: %d\ndisk-size: %d disk-loc: %d\n", + section_def->loaded_size, section_def->loaded_location, + section_def->disk_size, section_def->disk_location); + + if (compare(section_def->name, ".reloc", 6) == 0){ + show_reloc_block(file, &dll, section_def); + } + } + + img.size = dll_total_loaded_size(&dll); + printf("image size: %d\n", img.size); + + img.data = (byte*) + VirtualAlloc((LPVOID)Tbytes(3), img.size, + MEM_COMMIT | MEM_RESERVE, + PAGE_READWRITE); + dll_load(img, &dll_loaded, file, &dll); + + DWORD _extra; + VirtualProtect(img.data + dll_loaded.text_start, + dll_loaded.text_size, + PAGE_EXECUTE_READ, + &_extra); + + func = (Function*)dll_load_function(&dll_loaded, "test_func", 9); +#endif + + x = func(10, 20); + printf("%d\n", x); + + x = func(1, 2); + printf("%d\n", x); + + return(0); +} + +// BOTTOM