4coder/4coder_search.cpp

483 lines
15 KiB
C++

#ifndef FCODER_SEARCH
#define FCODER_SEARCH
enum Search_Range_Type{
SearchRange_FrontToBack,
SearchRange_BackToFront,
SearchRange_Wave,
};
enum Search_Range_Flag{
SearchFlag_MatchWholeWord = 0x00,
SearchFlag_MatchWordPrefix = 0x01,
SearchFlag_MatchSubstring = 0x02,
SearchFlag_MatchMask = 0xFF,
SearchFlag_CaseInsensitive = 0x0100,
};
struct Search_Range{
int32_t type;
uint32_t flags;
int32_t buffer;
int32_t start;
int32_t size;
int32_t mid_start;
int32_t mid_size;
};
struct Search_Set{
Search_Range *ranges;
int32_t count;
int32_t max;
};
struct Search_Iter{
String word;
int32_t pos;
int32_t back_pos;
int32_t i;
int32_t range_initialized;
};
struct Search_Match{
Buffer_Summary buffer;
int32_t start;
int32_t end;
int32_t found_match;
};
static void
search_iter_init(General_Memory *general, Search_Iter *iter, int32_t size){
int32_t str_max = size*2;
if (iter->word.str == 0){
iter->word.str = (char*)general_memory_allocate(general, str_max);
iter->word.memory_size = str_max;
}
else if (iter->word.memory_size < size){
iter->word.str = (char*)general_memory_reallocate_nocopy(general, iter->word.str, str_max);
iter->word.memory_size = str_max;
}
iter->i = 0;
iter->range_initialized = 0;
}
static void
search_set_init(General_Memory *general, Search_Set *set, int32_t range_count){
int32_t max = range_count*2;
if (set->ranges == 0){
set->ranges = (Search_Range*)general_memory_allocate(general, sizeof(Search_Range)*max);
set->max = max;
}
else if (set->max < range_count){
set->ranges = (Search_Range*)general_memory_reallocate_nocopy(
general, set->ranges, sizeof(Search_Range)*max);
set->max = max;
}
set->count = range_count;
}
static void
search_hits_table_alloc(General_Memory *general, Table *hits, int32_t table_size){
void *mem = 0;
int32_t mem_size = table_required_mem_size(table_size, sizeof(Offset_String));
if (hits->hash_array == 0){
mem = general_memory_allocate(general, mem_size);
}
else{
mem = general_memory_reallocate_nocopy(general, hits->hash_array, mem_size);
}
table_init_memory(hits, mem, table_size, sizeof(Offset_String));
}
static void
search_hits_init(General_Memory *general, Table *hits, String_Space *str, int32_t table_size, int32_t str_size){
if (hits->hash_array == 0){
search_hits_table_alloc(general, hits, table_size);
}
else{
int32_t mem_size = table_required_mem_size(table_size, sizeof(Offset_String));
void *mem = general_memory_reallocate_nocopy(general, hits->hash_array, mem_size);
table_init_memory(hits, mem, table_size, sizeof(Offset_String));
}
if (str->space == 0){
str->space = (char*)general_memory_allocate(general, str_size);
str->max = str_size;
}
else if (str->max < str_size){
str->space = (char*)general_memory_reallocate_nocopy(general, str->space, str_size);
str->max = str_size;
}
str->pos = str->new_pos = 0;
table_clear(hits);
}
static int32_t
search_hit_add(General_Memory *general, Table *hits, String_Space *space, char *str, int32_t len){
int32_t result = false;
assert(len != 0);
Offset_String ostring = strspace_append(space, str, len);
if (ostring.size == 0){
int32_t new_size = space->max*2;
if (new_size < space->max + len){
new_size = space->max + len;
}
space->space = (char*)general_memory_reallocate(
general, space->space, space->new_pos, new_size);
ostring = strspace_append(space, str, len);
}
assert(ostring.size != 0);
if (table_at_capacity(hits)){
Table new_hits = {0};
search_hits_table_alloc(general, &new_hits, hits->max*2);
table_clear(&new_hits);
table_rehash(hits, &new_hits, space->space, tbl_offset_string_hash, tbl_offset_string_compare);
general_memory_free(general, hits->hash_array);
*hits = new_hits;
}
if (!table_add(hits, &ostring, space->space, tbl_offset_string_hash, tbl_offset_string_compare)){
result = true;
strspace_keep_prev(space);
}
else{
strspace_discard_prev(space);
}
return(result);
}
static int32_t
buffer_seek_alpha_numeric_end(Application_Links *app, Buffer_Summary *buffer, int32_t pos){
char space[1024];
Stream_Chunk chunk = {0};
if (init_stream_chunk(&chunk, app, buffer, pos, space, sizeof(space))){
int32_t still_looping = true;
do{
for (; pos < chunk.end; ++pos){
char at_pos = chunk.data[pos];
if (!char_is_alpha_numeric(at_pos)) goto double_break;
}
still_looping = forward_stream_chunk(&chunk);
}while(still_looping);
}
double_break:;
return(pos);
}
static void
search_iter_next_range(Search_Iter *it){
++it->i;
it->pos = 0;
it->back_pos = 0;
it->range_initialized = 0;
}
enum{
FindResult_None,
FindResult_FoundMatch,
FindResult_PastEnd
};
static int32_t
match_check(Application_Links *app, Search_Range *range, int32_t *pos, Search_Match *result_ptr, String word){
int32_t found_match = FindResult_None;
Search_Match result = *result_ptr;
int32_t end_pos = range->start + range->size;
int32_t type = (range->flags & SearchFlag_MatchMask);
switch (type){
case SearchFlag_MatchWholeWord:
{
char first = word.str[0];
char prev = ' ';
if (char_is_alpha_numeric(first)){
prev = buffer_get_char(app, &result.buffer, result.start - 1);
}
if (!char_is_alpha_numeric(prev)){
result.end = result.start + word.size;
if (result.end <= end_pos){
char last = word.str[word.size-1];
char next = ' ';
if (char_is_alpha_numeric(last)){
next = buffer_get_char(app, &result.buffer, result.end);
}
if (!char_is_alpha_numeric(next)){
result.found_match = true;
found_match = FindResult_FoundMatch;
}
}
else{
found_match = FindResult_PastEnd;
}
}
}break;
case SearchFlag_MatchWordPrefix:
{
char prev = buffer_get_char(app, &result.buffer, result.start - 1);
if (!char_is_alpha_numeric(prev)){
result.end =
buffer_seek_alpha_numeric_end(
app, &result.buffer, result.start);
if (result.end <= end_pos){
result.found_match = true;
found_match = FindResult_FoundMatch;
}
else{
found_match = FindResult_PastEnd;
}
}
}break;
case SearchFlag_MatchSubstring:
{
result.end = result.start + word.size;
if (result.end <= end_pos){
result.found_match = true;
found_match = FindResult_FoundMatch;
}
else{
found_match = FindResult_PastEnd;
}
}break;
}
*result_ptr = result;
return(found_match);
}
static int32_t
search_front_to_back_step(Application_Links *app, Search_Range *range, String word, int32_t *pos, Search_Match *result_ptr){
int32_t found_match = FindResult_None;
Search_Match result = *result_ptr;
int32_t end_pos = range->start + range->size;
if (*pos + word.size < end_pos){
int32_t start_pos = *pos;
if (start_pos < range->start){
start_pos = range->start;
}
int32_t case_insensitive = (range->flags & SearchFlag_CaseInsensitive);
result.buffer = get_buffer(app, range->buffer, AccessAll);
if (case_insensitive){
buffer_seek_string_insensitive_forward(app, &result.buffer, start_pos, end_pos, word.str, word.size, &result.start);
}
else{
buffer_seek_string_forward(app, &result.buffer, start_pos, end_pos, word.str, word.size, &result.start);
}
if (result.start < end_pos){
*pos = result.start + 1;
found_match = match_check(app, range, pos, &result, word);
if (found_match == FindResult_FoundMatch){
*pos = result.end;
}
}
else{
found_match = FindResult_PastEnd;
*pos = end_pos + 1;
}
}
else{
found_match = FindResult_PastEnd;
*pos = end_pos + 1;
}
*result_ptr = result;
return(found_match);
}
static int32_t
search_front_to_back(Application_Links *app, Search_Range *range, String word, int32_t *pos, Search_Match *result_ptr){
int32_t found_match = FindResult_None;
for (;found_match == FindResult_None;){
found_match = search_front_to_back_step(app, range, word, pos, result_ptr);
}
return(found_match);
}
static int32_t
search_back_to_front_step(Application_Links *app, Search_Range *range, String word, int32_t *pos, Search_Match *result_ptr){
int32_t found_match = FindResult_None;
Search_Match result = *result_ptr;
if (*pos > range->start){
int32_t start_pos = *pos;
result.buffer = get_buffer(app, range->buffer, AccessAll);
buffer_seek_string_backward(app, &result.buffer,
start_pos, range->start,
word.str, word.size,
&result.start);
// TODO(allen): deduplicate the match checking code.
if (result.start >= range->start){
*pos = result.start - 1;
found_match = match_check(app, range, pos, &result, word);
if (found_match == FindResult_FoundMatch){
*pos = result.start - word.size;
}
}
else{
found_match = FindResult_PastEnd;
}
}
else{
found_match = FindResult_PastEnd;
}
*result_ptr = result;
return(found_match);
}
static int32_t
search_back_to_front(Application_Links *app, Search_Range *range, String word, int32_t *pos, Search_Match *result_ptr){
int32_t found_match = FindResult_None;
for (;found_match == FindResult_None;){
found_match = search_back_to_front_step(app, range, word, pos, result_ptr);
}
return(found_match);
}
static Search_Match
search_next_match(Application_Links *app, Search_Set *set, Search_Iter *it_ptr){
Search_Match result = {0};
Search_Iter iter = *it_ptr;
int32_t count = set->count;
for (; iter.i < count;){
Search_Range *range = set->ranges + iter.i;
int32_t find_result = FindResult_None;
if (!iter.range_initialized){
iter.range_initialized = true;
switch (range->type){
case SearchRange_BackToFront:
{
iter.back_pos = range->start+range->size-1;
}break;
case SearchRange_Wave:
{
iter.back_pos = range->mid_start-1;
iter.pos = range->mid_start + range->mid_size;
}break;
}
}
switch (range->type){
case SearchRange_FrontToBack:
{
find_result =
search_front_to_back(app, range,
iter.word,
&iter.pos,
&result);
}break;
case SearchRange_BackToFront:
{
find_result =
search_back_to_front(app, range,
iter.word,
&iter.back_pos,
&result);
}break;
case SearchRange_Wave:
{
Search_Match forward_match = {0};
Search_Match backward_match = {0};
int32_t forward_result = FindResult_PastEnd;
int32_t backward_result = FindResult_PastEnd;
if (iter.pos < range->start + range->size){
forward_result = search_front_to_back(app, range,
iter.word,
&iter.pos,
&forward_match);
}
if (iter.back_pos > range->start){
backward_result = search_back_to_front(app, range,
iter.word,
&iter.back_pos,
&backward_match);
}
if (forward_result == FindResult_FoundMatch){
if (backward_result == FindResult_FoundMatch){
find_result = FindResult_FoundMatch;
int32_t forward_start = range->mid_start + range->mid_size;
int32_t forward_distance = (forward_match.start - forward_start);
int32_t backward_distance = (range->mid_start - backward_match.end);
if (backward_distance < forward_distance){
iter.pos = forward_match.start;
result = backward_match;
}
else{
iter.back_pos = backward_match.start;
result = forward_match;
}
}
else{
find_result = FindResult_FoundMatch;
result = forward_match;
}
}
else{
if (backward_result == FindResult_FoundMatch){
find_result = FindResult_FoundMatch;
result = backward_match;
--iter.pos;
}
else{
find_result = FindResult_PastEnd;
}
}
}break;
}
if (find_result == FindResult_FoundMatch){
goto double_break;
}
else if (find_result == FindResult_PastEnd){
search_iter_next_range(&iter);
}
}
double_break:;
*it_ptr = iter;
return(result);
}
#endif