#include "platform.h"
#include "gnunet_util_lib.h"
#include "gnunet_regex_service.h"
#include "regex_internal_lib.h"
#include "regex_internal.h"
Go to the source code of this file.
Data Structures | |
struct | REGEX_INTERNAL_StateSet_MDLL |
Set of states using MDLL API. More... | |
struct | StringBuffer |
String container for faster string operations. More... | |
struct | REGEX_INTERNAL_Strided_Context |
Context for adding strided transitions to a DFA. More... | |
struct | temporal_state_store |
Struct to hold all the relevant state information in the HashMap. More... | |
struct | client_iterator |
Store regex iterator and cls in one place to pass to the hashmap iterator. More... | |
Macros | |
#define | REGEX_DEBUG_DFA GNUNET_NO |
Set this to GNUNET_YES to enable state naming. More... | |
#define | PRIS(a) |
Functions | |
static void | state_set_append (struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state) |
Append state to the given StateSet. More... | |
static int | nullstrcmp (const char *str1, const char *str2) |
Compare two strings for equality. More... | |
static void | state_add_transition (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state) |
Adds a transition from one state to another on label. More... | |
static void | state_remove_transition (struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition) |
Remove a 'transition' from 'state'. More... | |
static int | state_compare (const void *a, const void *b) |
Compare two states. More... | |
static unsigned int | state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges) |
Get all edges leaving state s. More... | |
static int | state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2) |
Compare to state sets by comparing the id's of the states that are contained in each set. More... | |
static void | state_set_clear (struct REGEX_INTERNAL_StateSet *set) |
Clears the given StateSet 'set'. More... | |
static void | automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a) |
Clears an automaton fragment. More... | |
static void | automaton_destroy_state (struct REGEX_INTERNAL_State *s) |
Frees the memory used by State s. More... | |
static void | automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s) |
Remove a state from the given automaton 'a'. More... | |
static void | automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2) |
Merge two states into one. More... | |
static void | automaton_add_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s) |
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton. More... | |
static void | automaton_state_traverse (struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls) |
Depth-first traversal (DFS) of all states that are reachable from state 's'. More... | |
void | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls) |
Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them. More... | |
static int | sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) |
Compare two strings for equality. More... | |
static int | sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) |
Compare two strings for equality. More... | |
static void | sb_realloc (struct StringBuffer *ret, size_t nlen) |
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer. More... | |
static void | sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg) |
Append a string. More... | |
static void | sb_append_cstr (struct StringBuffer *ret, const char *cstr) |
Append a C string. More... | |
static void | sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars) |
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'. More... | |
static void | sb_printf1 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg) |
Format a string buffer. More... | |
static void | sb_printf2 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2) |
Format a string buffer. More... | |
static void | sb_printf3 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3) |
Format a string buffer. More... | |
static void | sb_free (struct StringBuffer *sb) |
Free resources of the given string buffer. More... | |
static void | sb_strdup (struct StringBuffer *out, const struct StringBuffer *in) |
Copy the given string buffer from 'in' to 'out'. More... | |
static void | sb_strdup_cstr (struct StringBuffer *out, const char *cstr) |
Copy the given string buffer from 'in' to 'out'. More... | |
static int | needs_parentheses (const struct StringBuffer *str) |
Check if the given string str needs parentheses around it when using it to generate a regex. More... | |
static void | remove_parentheses (struct StringBuffer *str) |
Remove parentheses surrounding string str. More... | |
static int | has_epsilon (const struct StringBuffer *str) |
Check if the string 'str' starts with an epsilon (empty string). More... | |
static void | remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret) |
Remove an epsilon from the string str. More... | |
static int | sb_strncmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n) |
Compare n bytes of 'str1' and 'str2'. More... | |
static int | sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n) |
Compare n bytes of 'str1' and 'str2'. More... | |
static void | sb_init (struct StringBuffer *sb, size_t n) |
Initialize string buffer for storing strings of up to n characters. More... | |
static int | sb_strkcmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k) |
Compare 'str1', starting from position 'k', with whole 'str2'. More... | |
static void | number_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states. More... | |
static void | automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r) |
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. More... | |
static int | automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a) |
Create proofs for all states in the given automaton. More... | |
static struct REGEX_INTERNAL_State * | dfa_state_create (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states) |
Creates a new DFA state based on a set of NFA states. More... | |
static unsigned int | dfa_move (struct REGEX_INTERNAL_State **s, const char *str) |
Move from the given state 's' to the next state on transition 'str'. More... | |
static void | mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Set the given state 'marked' to GNUNET_YES. More... | |
static void | dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a) |
Remove all unreachable states from DFA 'a'. More... | |
static void | dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a) |
Remove all dead states from the DFA 'a'. More... | |
static int | dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a) |
Merge all non distinguishable states in the DFA 'a'. More... | |
static int | dfa_minimize (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a) |
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states. More... | |
static void | dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s) |
Recursive helper function to add strides to a DFA. More... | |
static void | dfa_add_multi_strides (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Function called for each state in the DFA. More... | |
void | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len) |
Adds multi-strided transitions to the given 'dfa'. More... | |
static void | dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail) |
Recursive Helper function for DFA path compression. More... | |
static void | dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len) |
Compress paths in the given 'dfa'. More... | |
static struct REGEX_INTERNAL_Automaton * | nfa_fragment_create (struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end) |
Creates a new NFA fragment. More... | |
static void | nfa_add_states (struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail) |
Adds a list of states to the given automaton 'n'. More... | |
static struct REGEX_INTERNAL_State * | nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting) |
Creates a new NFA state. More... | |
static void | nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label) |
Calculates the closure set for the given set of states. More... | |
static void | nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx) |
Pops two NFA fragments (a, b) from the stack and concatenates them (ab) More... | |
static void | nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx) |
Pops a NFA fragment from the stack (a) and adds a new fragment (a*) More... | |
static void | nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx) |
Pops an NFA fragment (a) from the stack and adds a new fragment (a+) More... | |
static void | nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx) |
Pops an NFA fragment (a) from the stack and adds a new fragment (a?) More... | |
static void | nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx) |
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b) More... | |
static void | nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label) |
Adds a new nfa fragment to the stack. More... | |
static void | REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx) |
Initialize a new context. More... | |
struct REGEX_INTERNAL_Automaton * | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len) |
Construct an NFA by parsing the regex string of length 'len'. More... | |
static void | construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state) |
Create DFA states based on given 'nfa' and starting with 'dfa_state'. More... | |
struct REGEX_INTERNAL_Automaton * | REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len, unsigned int max_path_len) |
Construct DFA for the given 'regex' of length 'len'. More... | |
void | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a) |
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton. More... | |
static int | evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given string using the given DFA automaton. More... | |
static int | evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given string using the given NFA automaton. More... | |
int | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given 'string' against the given compiled regex. More... | |
const char * | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a) |
Get the canonical regex of the given automaton. More... | |
unsigned int | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a) |
Get the number of transitions that are contained in the given automaton. More... | |
size_t | REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len, struct GNUNET_HashCode *key) |
Get the first key for the given input_string. More... | |
static void | iterate_initial_edge (unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Recursive function that calls the iterator for each synthetic start state. More... | |
void | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Iterate over all edges starting from start state of automaton 'a'. More... | |
static void | store_all_states (void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges) |
Iterator over all edges of a dfa. More... | |
static void | mark_as_reachable (struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm) |
Mark state as reachable and call recursively on all its edges. More... | |
static int | reachability_iterator (void *cls, const struct GNUNET_HashCode *key, void *value) |
Iterator over hash map entries to mark the ones that are reachable. More... | |
static int | iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value) |
Iterator over hash map entries. More... | |
void | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. More... | |
#define REGEX_DEBUG_DFA GNUNET_NO |
Set this to GNUNET_YES to enable state naming.
Used to debug NFA->DFA creation. Disabled by default for better performance.
Definition at line 37 of file regex_internal.c.
#define PRIS | ( | a | ) |
Definition at line 1140 of file regex_internal.c.
|
static |
Append state to the given StateSet.
set | set to be modified |
state | state to be appended |
Definition at line 68 of file regex_internal.c.
References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, state, and REGEX_INTERNAL_StateSet::states.
Referenced by evaluate_nfa(), nfa_closure_set_create(), and REGEX_INTERNAL_construct_dfa().
|
static |
Compare two strings for equality.
If either is NULL they are not equal.
str1 | first string for comparison. |
str2 | second string for comparison. |
Definition at line 86 of file regex_internal.c.
Referenced by nfa_closure_set_create(), and state_add_transition().
|
static |
Adds a transition from one state to another on label.
Does not add duplicate states.
ctx | context |
from_state | starting state for the transition |
label | transition label |
to_state | state to where the transition should point to |
Definition at line 107 of file regex_internal.c.
References ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_insert_before, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Transition::next, nullstrcmp(), t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::transitions_tail.
Referenced by automaton_merge_states(), dfa_compress_paths(), dfa_state_create(), nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), and REGEX_INTERNAL_dfa_add_multi_strides().
|
static |
Remove a 'transition' from 'state'.
state | state from which the to-be-removed transition originates. |
transition | transition that should be removed from state 'state'. |
Definition at line 162 of file regex_internal.c.
References REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_Transition::label, and state.
Referenced by automaton_destroy_state(), automaton_merge_states(), and automaton_remove_state().
|
static |
Compare two states.
Used for sorting.
a | first state |
b | second state |
Definition at line 193 of file regex_internal.c.
References REGEX_INTERNAL_State::id.
Referenced by nfa_closure_set_create(), and state_set_compare().
|
static |
Get all edges leaving state s.
s | state. |
edges | all edges leaving s, expected to be allocated and have enough space for s->transitions_count elements. |
Definition at line 212 of file regex_internal.c.
References REGEX_BLOCK_Edge::destination, REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, t, and REGEX_INTERNAL_State::transitions_head.
Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().
|
static |
Compare to state sets by comparing the id's of the states that are contained in each set.
Both sets are expected to be sorted by id!
sset1 | first state set |
sset2 | second state set |
Definition at line 244 of file regex_internal.c.
References REGEX_INTERNAL_StateSet::off, result, state_compare(), and REGEX_INTERNAL_StateSet::states.
Referenced by construct_dfa_states().
|
static |
Clears the given StateSet 'set'.
set | set to be cleared |
Definition at line 271 of file regex_internal.c.
References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, and REGEX_INTERNAL_StateSet::states.
Referenced by automaton_destroy_state(), construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().
|
static |
Clears an automaton fragment.
Does not destroy the states inside the automaton.
a | automaton to be cleared |
Definition at line 285 of file regex_internal.c.
References REGEX_INTERNAL_Automaton::end, GNUNET_free, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Frees the memory used by State s.
s | state that should be destroyed |
Definition at line 305 of file regex_internal.c.
References GNUNET_free, REGEX_INTERNAL_State::name, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_State::proof, state_remove_transition(), state_set_clear(), t, and REGEX_INTERNAL_State::transitions_head.
Referenced by automaton_merge_states(), automaton_remove_state(), and REGEX_INTERNAL_automaton_destroy().
|
static |
Remove a state from the given automaton 'a'.
Always use this function when altering the states of an automaton. Will also remove all transitions leading to this state, before destroying it.
a | automaton |
s | state to remove |
Definition at line 335 of file regex_internal.c.
References automaton_destroy_state(), GNUNET_CONTAINER_DLL_remove, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_compress_paths(), dfa_remove_dead_states(), and dfa_remove_unreachable_states().
|
static |
Merge two states into one.
Will merge 's1' and 's2' into 's1' and destroy 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
ctx | context |
a | automaton |
s1 | first state |
s2 | second state, will be destroyed |
Definition at line 375 of file regex_internal.c.
References automaton_destroy_state(), ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::name, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_merge_nondistinguishable_states().
|
static |
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
a | automaton to add the state to |
s | state that should be added |
Definition at line 444 of file regex_internal.c.
References GNUNET_CONTAINER_DLL_insert, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by construct_dfa_states(), nfa_fragment_create(), and REGEX_INTERNAL_construct_dfa().
|
static |
Depth-first traversal (DFS) of all states that are reachable from state 's'.
Performs 'action' on each visited state.
s | start state. |
marks | an array of size a->state_count to remember which state was already visited. |
count | current count of the state. |
check | function that is checked before advancing on each transition in the DFS. |
check_cls | closure for check. |
action | action to be performed on each state. |
action_cls | closure for action. |
Definition at line 467 of file regex_internal.c.
References consensus-simulation::action, automaton_state_traverse(), GNUNET_YES, GNUNET_SCHEDULER_Task::next, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::traversal_id.
Referenced by automaton_state_traverse(), and REGEX_INTERNAL_automaton_traverse().
void REGEX_INTERNAL_automaton_traverse | ( | const struct REGEX_INTERNAL_Automaton * | a, |
struct REGEX_INTERNAL_State * | start, | ||
REGEX_INTERNAL_traverse_check | check, | ||
void * | check_cls, | ||
REGEX_INTERNAL_traverse_action | action, | ||
void * | action_cls | ||
) |
Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them.
a | automaton to be traversed. |
start | start state, pass a->start or NULL to traverse the whole automaton. |
check | function that is checked before advancing on each transition in the DFS. |
check_cls | closure for check. |
action | action to be performed on each state. |
action_cls | closure for action |
Definition at line 505 of file regex_internal.c.
References consensus-simulation::action, automaton_state_traverse(), GNUNET_NO, REGEX_INTERNAL_State::next, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::traversal_id.
Referenced by automaton_create_proofs(), dfa_remove_unreachable_states(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_dfa_add_multi_strides(), and REGEX_TEST_automaton_save_graph().
|
static |
Compare two strings for equality.
If either is NULL they are not equal.
s1 | first string for comparison. |
s2 | second string for comparison. |
Definition at line 599 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare two strings for equality.
s1 | first string for comparison. |
s2 | second string for comparison. |
Definition at line 622 of file regex_internal.c.
References StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer.
ret | current buffer, to be updated |
nlen | target length for the buffer, must be at least ret->slen |
Definition at line 640 of file regex_internal.c.
References GNUNET_assert, GNUNET_free, GNUNET_malloc, GNUNET_memcpy, and ret.
Referenced by sb_append(), sb_append_cstr(), sb_printf1(), sb_printf2(), and sb_printf3().
|
static |
Append a string.
ret | where to write the result |
sarg | string to append |
Definition at line 661 of file regex_internal.c.
References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Append a C string.
ret | where to write the result |
cstr | string to append |
Definition at line 680 of file regex_internal.c.
References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, and sb_realloc().
Referenced by automaton_create_proofs().
|
static |
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result and take the input for %.*s from |
format | format string, fprintf-style, with exactly one "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg' length |
Definition at line 705 of file regex_internal.c.
References GNUNET_free, GNUNET_malloc, GNUNET_NO, GNUNET_snprintf(), GNUNET_YES, and ret.
Referenced by automaton_create_proofs().
|
static |
Format a string buffer.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result |
format | format string, fprintf-style, with exactly one "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg' length |
sarg | string to print into the format |
Definition at line 736 of file regex_internal.c.
References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Format a string buffer.
ret | where to write the result |
format | format string, fprintf-style, with exactly two "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg1/2' length |
sarg1 | first string to print into the format |
sarg2 | second string to print into the format |
Definition at line 760 of file regex_internal.c.
References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Format a string buffer.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result |
format | format string, fprintf-style, with exactly three "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg1/2/3' length |
sarg1 | first string to print into the format |
sarg2 | second string to print into the format |
sarg3 | third string to print into the format |
Definition at line 793 of file regex_internal.c.
References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Free resources of the given string buffer.
sb | buffer to free (actual pointer is not freed, as they should not be individually allocated) |
Definition at line 824 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Copy the given string buffer from 'in' to 'out'.
in | input string |
out | output string |
Definition at line 840 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify(), and remove_epsilon().
|
static |
Copy the given string buffer from 'in' to 'out'.
cstr | input string |
out | output string |
Definition at line 863 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Check if the given string str needs parentheses around it when using it to generate a regex.
str | string |
Definition at line 890 of file regex_internal.c.
References end, GNUNET_break, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Remove parentheses surrounding string str.
Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. You need to GNUNET_free() the returned string.
str | string, modified to contain a |
Definition at line 939 of file regex_internal.c.
References end, GNUNET_break, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Check if the string 'str' starts with an epsilon (empty string).
Example: "(|a)" is starting with an epsilon.
str | string to test |
Definition at line 996 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Remove an epsilon from the string str.
Where epsilon is an empty string Example: str = "(|a|b|c)", result: "a|b|c" The returned string needs to be freed.
str | original string |
ret | where to return string without preceding epsilon, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL |
Definition at line 1014 of file regex_internal.c.
References GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, ret, sb_strdup(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare n bytes of 'str1' and 'str2'.
str1 | first string to compare |
str2 | second string for comparison |
n | number of bytes to compare |
Definition at line 1048 of file regex_internal.c.
References GNUNET_MAX, max, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare n bytes of 'str1' and 'str2'.
str1 | first string to compare |
str2 | second C string for comparison |
n | number of bytes to compare (and length of str2) |
Definition at line 1073 of file regex_internal.c.
References StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Initialize string buffer for storing strings of up to n characters.
sb | buffer to initialize |
n | desired target length |
Definition at line 1089 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_malloc, GNUNET_NO, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Compare 'str1', starting from position 'k', with whole 'str2'.
str1 | first string to compare, starting from position 'k' |
str2 | second string for comparison |
k | starting position in 'str1' |
Definition at line 1108 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states.
cls | states array. |
count | current state counter. |
s | current state. |
Definition at line 1128 of file regex_internal.c.
References REGEX_INTERNAL_State::dfs_id.
Referenced by automaton_create_proofs(), and REGEX_INTERNAL_construct_nfa().
|
static |
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
R_last_ij | value of $R^{(k-1)_{ij}. |
R_last_ik | value of $R^{(k-1)_{ik}. |
R_last_kk | value of $R^{(k-1)_{kk}. |
R_last_kj | value of $R^{(k-1)_{kj}. |
R_cur_ij | result for this inductive step is saved in R_cur_ij, R_cur_ij is expected to be NULL when called! |
R_cur_l | optimization – kept between iterations to avoid realloc |
R_cur_r | optimization – kept between iterations to avoid realloc |
Definition at line 1160 of file regex_internal.c.
References StringBuffer::abuf, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, has_epsilon(), needs_parentheses(), StringBuffer::null_flag, remove_epsilon(), remove_parentheses(), sb_free(), sb_init(), sb_nullstrcmp(), sb_printf1(), sb_printf2(), sb_printf3(), sb_strcmp(), sb_strdup(), sb_strkcmp(), sb_strncmp(), sb_strncmp_cstr(), StringBuffer::sbuf, StringBuffer::slen, and StringBuffer::synced.
Referenced by automaton_create_proofs().
|
static |
Create proofs for all states in the given automaton.
Implementation of the algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
Each state in the automaton gets assigned 'proof' and 'hash' (hash of the proof) fields. The starting state will only have a valid proof/hash if it has any incoming transitions.
a | automaton for which to assign proofs and hashes, must not be NULL |
Definition at line 1567 of file regex_internal.c.
References automaton_create_proofs_simplify(), REGEX_INTERNAL_Automaton::canonical_regex, REGEX_INTERNAL_State::dfs_id, GNUNET_assert, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_NO, GNUNET_OK, GNUNET_strndup, GNUNET_SYSERR, GNUNET_YES, needs_parentheses(), GNUNET_SCHEDULER_Task::next, StringBuffer::null_flag, number_states(), proof, REGEX_INTERNAL_State::proof, REGEX_INTERNAL_automaton_traverse(), sb_append(), sb_append_cstr(), sb_free(), sb_init(), sb_strdup_cstr(), sb_wrap(), StringBuffer::sbuf, StringBuffer::slen, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and t.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Creates a new DFA state based on a set of NFA states.
Needs to be freed using automaton_destroy_state.
ctx | context |
nfa_states | set of NFA states on which the DFA should be based on |
Definition at line 1733 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, ctx, GNUNET_asprintf(), GNUNET_malloc, GNUNET_new, GNUNET_realloc, GNUNET_snprintf(), REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_StateSet::off, state_add_transition(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_State::transitions_head.
Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().
|
static |
Move from the given state 's' to the next state on transition 'str'.
Consumes as much of the given 'str' as possible (useful for strided DFAs). On return 's' will point to the next state, and the length of the substring used for this transition will be returned. If no transition possible 0 is returned and 's' points to NULL.
s | starting state, will point to the next state or NULL (if no transition possible) |
str | edge label to follow (will match longest common prefix) |
Definition at line 1803 of file regex_internal.c.
References GNUNET_SCHEDULER_Task::next, and t.
Referenced by evaluate_dfa().
|
static |
Set the given state 'marked' to GNUNET_YES.
Used by the dfa_remove_unreachable_states() function to detect unreachable states in the automaton.
cls | closure, not used. |
count | count, not used. |
s | state where the marked attribute will be set to GNUNET_YES. |
Definition at line 1844 of file regex_internal.c.
References GNUNET_YES, and REGEX_INTERNAL_State::marked.
Referenced by dfa_remove_unreachable_states().
|
static |
Remove all unreachable states from DFA 'a'.
Unreachable states are those states that are not reachable from the starting state.
a | DFA automaton |
Definition at line 1859 of file regex_internal.c.
References automaton_remove_state(), GNUNET_NO, mark_states(), REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::states_head.
Referenced by dfa_minimize().
|
static |
Remove all dead states from the DFA 'a'.
Dead states are those states that do not transition to any other state but themselves.
a | DFA automaton |
Definition at line 1893 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_remove_state(), DFA, GNUNET_assert, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_Automaton::type.
Referenced by dfa_minimize().
|
static |
Merge all non distinguishable states in the DFA 'a'.
ctx | context |
a | DFA automaton |
Definition at line 1936 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_merge_states(), ctx, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_OK, GNUNET_SYSERR, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, table, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_minimize().
|
static |
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states.
ctx | context |
a | DFA automaton |
Definition at line 2056 of file regex_internal.c.
References ctx, DFA, dfa_merge_nondistinguishable_states(), dfa_remove_dead_states(), dfa_remove_unreachable_states(), GNUNET_assert, GNUNET_OK, GNUNET_SYSERR, and REGEX_INTERNAL_Automaton::type.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Recursive helper function to add strides to a DFA.
cls | context, contains stride length and strided transitions DLL. |
depth | current depth of the depth-first traversal of the graph. |
label | current label, string that contains all labels on the path from 'start' to 's'. |
start | start state for the depth-first traversal of the graph. |
s | current state in the depth-first traversal |
Definition at line 2111 of file regex_internal.c.
References ctx, dfa_add_multi_strides_helper(), GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, GNUNET_SCHEDULER_Task::next, start, t, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_add_multi_strides(), and dfa_add_multi_strides_helper().
|
static |
Function called for each state in the DFA.
Starts a traversal of depth set in context starting from state 's'.
cls | context. |
count | not used. |
s | current state. |
Definition at line 2167 of file regex_internal.c.
References dfa_add_multi_strides_helper().
Referenced by REGEX_INTERNAL_dfa_add_multi_strides().
void REGEX_INTERNAL_dfa_add_multi_strides | ( | struct REGEX_INTERNAL_Context * | regex_ctx, |
struct REGEX_INTERNAL_Automaton * | dfa, | ||
const unsigned int | stride_len | ||
) |
Adds multi-strided transitions to the given 'dfa'.
regex_ctx | regex context needed to add transitions to the automaton. |
dfa | DFA to which the multi strided transitions should be added. |
stride_len | length of the strides. |
Definition at line 2183 of file regex_internal.c.
References ctx, dfa_add_multi_strides(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_YES, REGEX_INTERNAL_Automaton::is_multistrided, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, state_add_transition(), and t.
|
static |
Recursive Helper function for DFA path compression.
Does DFS on the DFA graph and adds new transitions to the given transitions DLL and marks states that should be removed by setting state->contained to GNUNET_YES.
dfa | DFA for which the paths should be compressed. |
start | starting state for linear path search. |
cur | current state in the recursive DFS. |
label | current label (string of traversed labels). |
max_len | maximal path compression length. |
transitions_head | transitions DLL. |
transitions_tail | transitions DLL. |
Definition at line 2231 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_strdup, GNUNET_YES, REGEX_INTERNAL_State::incoming_transition_count, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, GNUNET_SCHEDULER_Task::next, start, REGEX_INTERNAL_Automaton::start, t, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_compress_paths(), and dfa_compress_paths_helper().
|
static |
Compress paths in the given 'dfa'.
Linear paths like 0->1->2->3 will be compressed to 0->3 by combining transitions.
regex_ctx | context for adding new transitions. |
dfa | DFA representation, will directly modify the given DFA. |
max_len | maximal length of the compressed paths. |
Definition at line 2310 of file regex_internal.c.
References automaton_remove_state(), REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_State::marked, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, t, and REGEX_INTERNAL_State::transitions_head.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Creates a new NFA fragment.
Needs to be cleared using automaton_fragment_clear.
start | starting state |
end | end state |
Definition at line 2380 of file regex_internal.c.
References automaton_add_state(), end, REGEX_INTERNAL_Automaton::end, GNUNET_new, NFA, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and REGEX_INTERNAL_Automaton::type.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Adds a list of states to the given automaton 'n'.
n | automaton to which the states should be added |
states_head | head of the DLL of states |
states_tail | tail of the DLL of states |
Definition at line 2415 of file regex_internal.c.
References GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Creates a new NFA state.
Needs to be freed using automaton_destroy_state.
ctx | context |
accepting | is it an accepting state or not |
Definition at line 2454 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, ctx, GNUNET_asprintf(), GNUNET_new, GNUNET_NO, REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::name, and REGEX_INTERNAL_State::scc_id.
Referenced by nfa_add_alternation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Calculates the closure set for the given set of states.
ret | set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) |
nfa | the NFA containing 's' |
states | list of states on which to base the closure on |
label | transitioning label for which to base the closure on, pass NULL for epsilon transition |
Definition at line 2483 of file regex_internal.c.
References REGEX_INTERNAL_State::contained, GNUNET_CONTAINER_MDLL_insert, GNUNET_CONTAINER_MDLL_insert_tail, GNUNET_CONTAINER_MDLL_remove, REGEX_INTERNAL_StateSet_MDLL::head, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_Transition::next, nullstrcmp(), REGEX_INTERNAL_StateSet::off, ret, state_compare(), state_set_append(), REGEX_INTERNAL_StateSet::states, REGEX_INTERNAL_StateSet_MDLL::tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().
|
static |
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
ctx | context |
Definition at line 2556 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
ctx | context |
Definition at line 2592 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
ctx | context |
Definition at line 2636 of file regex_internal.c.
References ctx, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and state_add_transition().
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
ctx | context |
Definition at line 2664 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b)
ctx | context |
Definition at line 2705 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Adds a new nfa fragment to the stack.
ctx | context |
label | label for nfa transition |
Definition at line 2749 of file regex_internal.c.
References ctx, end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, nfa_fragment_create(), nfa_state_create(), start, and state_add_transition().
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Initialize a new context.
ctx | context |
Definition at line 2772 of file regex_internal.c.
References ctx, GNUNET_ERROR_TYPE_ERROR, and GNUNET_log.
Referenced by REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa | ( | const char * | regex, |
const size_t | len | ||
) |
Construct an NFA by parsing the regex string of length 'len'.
regex | regular expression string |
len | length of the string |
Definition at line 2795 of file regex_internal.c.
References ctx, GNUNET_array_grow, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_NO, GNUNET_strdup, REGEX_INTERNAL_Automaton::is_multistrided, nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), number_states(), p, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_automaton_traverse(), and REGEX_INTERNAL_context_init().
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
ctx | context. |
nfa | NFA automaton. |
dfa | DFA automaton. |
dfa_state | current dfa state, pass epsilon closure of first nfa state for starting. |
Definition at line 2989 of file regex_internal.c.
References automaton_add_state(), construct_dfa_states(), ctx, dfa_state_create(), REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, nfa_closure_set_create(), REGEX_INTERNAL_State::nfa_set, state_set_clear(), state_set_compare(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa | ( | const char * | regex, |
const size_t | len, | ||
unsigned int | max_path_len | ||
) |
Construct DFA for the given 'regex' of length 'len'.
Path compression means, that for example a DFA o -> a -> b -> c -> o will be compressed to o -> abc -> o. Note that this parameter influences the non-determinism of states of the resulting NFA in the DHT (number of outgoing edges with the same label). For example for an application that stores IPv4 addresses as bitstrings it could make sense to limit the path compression to 4 or 8.
regex | regular expression string. |
len | length of the regular expression. |
max_path_len | limit the path compression length to the given value. If set to 1, no path compression is applied. Set to 0 for maximal possible path compression (generally not desirable). |
Definition at line 3037 of file regex_internal.c.
References automaton_add_state(), automaton_create_proofs(), construct_dfa_states(), ctx, DFA, dfa_compress_paths(), dfa_minimize(), dfa_state_create(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_OK, GNUNET_strdup, nfa_closure_set_create(), REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_context_init(), REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), and REGEX_INTERNAL_Automaton::type.
Referenced by main(), and REGEX_INTERNAL_announce().
void REGEX_INTERNAL_automaton_destroy | ( | struct REGEX_INTERNAL_Automaton * | a | ) |
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
data structure.
a | automaton to be destroyed. |
Definition at line 3097 of file regex_internal.c.
References automaton_destroy_state(), REGEX_INTERNAL_Automaton::canonical_regex, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by main(), REGEX_INTERNAL_announce_cancel(), REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().
|
static |
Evaluates the given string using the given DFA automaton.
a | automaton, type must be DFA |
string | string that should be evaluated |
Definition at line 3128 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, DFA, dfa_move(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::type.
Referenced by REGEX_INTERNAL_eval().
|
static |
Evaluates the given string using the given NFA automaton.
a | automaton, type must be NFA |
string | string that should be evaluated |
Definition at line 3170 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, NFA, nfa_closure_set_create(), REGEX_INTERNAL_StateSet::off, result, REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_Automaton::type.
Referenced by REGEX_INTERNAL_eval().
int REGEX_INTERNAL_eval | ( | struct REGEX_INTERNAL_Automaton * | a, |
const char * | string | ||
) |
Evaluates the given 'string' against the given compiled regex.
a | automaton. |
string | string to check. |
Definition at line 3224 of file regex_internal.c.
References DFA, evaluate_dfa(), evaluate_nfa(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_SYSERR, NFA, result, and REGEX_INTERNAL_Automaton::type.
const char * REGEX_INTERNAL_get_canonical_regex | ( | struct REGEX_INTERNAL_Automaton * | a | ) |
Get the canonical regex of the given automaton.
When constructing the automaton a proof is computed for each state, consisting of the regular expression leading to this state. A complete regex for the automaton can be computed by combining these proofs. As of now this function is only useful for testing.
a | automaton for which the canonical regex should be returned. |
Definition at line 3250 of file regex_internal.c.
References REGEX_INTERNAL_Automaton::canonical_regex.
unsigned int REGEX_INTERNAL_get_transition_count | ( | struct REGEX_INTERNAL_Automaton * | a | ) |
Get the number of transitions that are contained in the given automaton.
a | automaton for which the number of transitions should be returned. |
Definition at line 3267 of file regex_internal.c.
References REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::transition_count.
size_t REGEX_INTERNAL_get_first_key | ( | const char * | input_string, |
size_t | string_len, | ||
struct GNUNET_HashCode * | key | ||
) |
Get the first key for the given input_string.
This hashes the first x bits of the input_string.
input_string | string. |
string_len | length of the input_string. |
key | pointer to where to write the hash code. |
Definition at line 3294 of file regex_internal.c.
References GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_REGEX_INITIAL_BYTES, key, and size.
Referenced by REGEX_INTERNAL_search().
|
static |
Recursive function that calls the iterator for each synthetic start state.
min_len | minimum length of the path in the graph. |
max_len | maximum length of the path in the graph. |
consumed_string | string consumed by traversing the graph till this state. |
state | current state of the automaton. |
iterator | iterator function called for each edge. |
iterator_cls | closure for the iterator function. |
Definition at line 3324 of file regex_internal.c.
References REGEX_BLOCK_Edge::destination, GNUNET_asprintf(), GNUNET_break, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_DEBUG, GNUNET_free, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_strdup, GNUNET_YES, iterate_initial_edge(), REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, state, state_get_edges(), and t.
Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().
void REGEX_INTERNAL_iterate_all_edges | ( | struct REGEX_INTERNAL_Automaton * | a, |
REGEX_INTERNAL_KeyIterator | iterator, | ||
void * | iterator_cls | ||
) |
Iterate over all edges starting from start state of automaton 'a'.
Calling iterator for each edge.
a | automaton. |
iterator | iterator called for each edge. |
iterator_cls | closure. |
Definition at line 3438 of file regex_internal.c.
References GNUNET_ERROR_TYPE_DEBUG, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, iterate_initial_edge(), REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_get_edges(), and REGEX_INTERNAL_Automaton::states_head.
Referenced by main(), and REGEX_INTERNAL_iterate_reachable_edges().
|
static |
Iterator over all edges of a dfa.
Stores all of them in a HashMap for later reachability marking.
cls | Closure (HashMap) |
key | hash for current state. |
proof | proof for current state |
accepting | GNUNET_YES if this is an accepting state, GNUNET_NO if not. |
num_edges | number of edges leaving current state. |
edges | edges leaving current state. |
Definition at line 3514 of file regex_internal.c.
References temporal_state_store::accepting, temporal_state_store::edges, GNUNET_assert, GNUNET_CONTAINER_multihashmap_put(), GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST, GNUNET_malloc, GNUNET_memcpy, GNUNET_new, GNUNET_NO, GNUNET_strdup, GNUNET_YES, key, temporal_state_store::num_edges, proof, temporal_state_store::proof, and temporal_state_store::reachable.
Referenced by REGEX_INTERNAL_iterate_reachable_edges().
|
static |
Mark state as reachable and call recursively on all its edges.
If already marked as reachable, do nothing.
state | State to mark as reachable. |
hm | HashMap which stores all the states indexed by key. |
Definition at line 3551 of file regex_internal.c.
References child, GNUNET_break, GNUNET_CONTAINER_multihashmap_get(), GNUNET_YES, mark_as_reachable(), and state.
Referenced by mark_as_reachable(), and reachability_iterator().
|
static |
Iterator over hash map entries to mark the ones that are reachable.
cls | closure |
key | current key code |
value | value in the hash map |
Definition at line 3586 of file regex_internal.c.
References GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_YES, mark_as_reachable(), state, and value.
Referenced by REGEX_INTERNAL_iterate_reachable_edges().
|
static |
Iterator over hash map entries.
Calling the callback on the ones marked as reachables.
cls | closure |
key | current key code |
value | value in the hash map |
Definition at line 3618 of file regex_internal.c.
References GNUNET_free, GNUNET_YES, client_iterator::iterator, client_iterator::iterator_cls, key, state, and value.
Referenced by REGEX_INTERNAL_iterate_reachable_edges().
void REGEX_INTERNAL_iterate_reachable_edges | ( | struct REGEX_INTERNAL_Automaton * | a, |
REGEX_INTERNAL_KeyIterator | iterator, | ||
void * | iterator_cls | ||
) |
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
Call the iterator for each such edge.
a | automaton. |
iterator | iterator called for each reachable edge. |
iterator_cls | closure. |
Definition at line 3640 of file regex_internal.c.
References GNUNET_CONTAINER_multihashmap_create(), GNUNET_CONTAINER_multihashmap_destroy(), GNUNET_CONTAINER_multihashmap_iterate(), GNUNET_NO, iterate_reachables(), client_iterator::iterator, client_iterator::iterator_cls, reachability_iterator(), REGEX_INTERNAL_iterate_all_edges(), REGEX_INTERNAL_Automaton::state_count, and store_all_states().
Referenced by main(), and REGEX_INTERNAL_reannounce().