![]() |
GNUnet
0.11.x
|
library to create Deterministic Finite Automatons (DFAs) from regular expressions (regexes). More...
#include "platform.h"
#include "gnunet_util_lib.h"
#include "gnunet_regex_service.h"
#include "regex_internal_lib.h"
#include "regex_internal.h"
Go to the source code of this file.
Data Structures | |
struct | REGEX_INTERNAL_StateSet_MDLL |
Set of states using MDLL API. More... | |
struct | StringBuffer |
String container for faster string operations. More... | |
struct | REGEX_INTERNAL_Strided_Context |
Context for adding strided transitions to a DFA. More... | |
struct | temporal_state_store |
Struct to hold all the relevant state information in the HashMap. More... | |
struct | client_iterator |
Store regex iterator and cls in one place to pass to the hashmap iterator. More... | |
Macros | |
#define | REGEX_DEBUG_DFA GNUNET_NO |
Set this to GNUNET_YES to enable state naming. More... | |
#define | PRIS(a) |
Functions | |
static void | state_set_append (struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state) |
Append state to the given StateSet. More... | |
static int | nullstrcmp (const char *str1, const char *str2) |
Compare two strings for equality. More... | |
static void | state_add_transition (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state) |
Adds a transition from one state to another on label. More... | |
static void | state_remove_transition (struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition) |
Remove a 'transition' from 'state'. More... | |
static int | state_compare (const void *a, const void *b) |
Compare two states. More... | |
static unsigned int | state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges) |
Get all edges leaving state s. More... | |
static int | state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2) |
Compare to state sets by comparing the id's of the states that are contained in each set. More... | |
static void | state_set_clear (struct REGEX_INTERNAL_StateSet *set) |
Clears the given StateSet 'set'. More... | |
static void | automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a) |
Clears an automaton fragment. More... | |
static void | automaton_destroy_state (struct REGEX_INTERNAL_State *s) |
Frees the memory used by State s. More... | |
static void | automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s) |
Remove a state from the given automaton 'a'. More... | |
static void | automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2) |
Merge two states into one. More... | |
static void | automaton_add_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s) |
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton. More... | |
static void | automaton_state_traverse (struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls) |
Depth-first traversal (DFS) of all states that are reachable from state 's'. More... | |
void | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls) |
Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them. More... | |
static int | sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) |
Compare two strings for equality. More... | |
static int | sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) |
Compare two strings for equality. More... | |
static void | sb_realloc (struct StringBuffer *ret, size_t nlen) |
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer. More... | |
static void | sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg) |
Append a string. More... | |
static void | sb_append_cstr (struct StringBuffer *ret, const char *cstr) |
Append a C string. More... | |
static void | sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars) |
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'. More... | |
static void | sb_printf1 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg) |
Format a string buffer. More... | |
static void | sb_printf2 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2) |
Format a string buffer. More... | |
static void | sb_printf3 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3) |
Format a string buffer. More... | |
static void | sb_free (struct StringBuffer *sb) |
Free resources of the given string buffer. More... | |
static void | sb_strdup (struct StringBuffer *out, const struct StringBuffer *in) |
Copy the given string buffer from 'in' to 'out'. More... | |
static void | sb_strdup_cstr (struct StringBuffer *out, const char *cstr) |
Copy the given string buffer from 'in' to 'out'. More... | |
static int | needs_parentheses (const struct StringBuffer *str) |
Check if the given string str needs parentheses around it when using it to generate a regex. More... | |
static void | remove_parentheses (struct StringBuffer *str) |
Remove parentheses surrounding string str. More... | |
static int | has_epsilon (const struct StringBuffer *str) |
Check if the string 'str' starts with an epsilon (empty string). More... | |
static void | remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret) |
Remove an epsilon from the string str. More... | |
static int | sb_strncmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n) |
Compare n bytes of 'str1' and 'str2'. More... | |
static int | sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n) |
Compare n bytes of 'str1' and 'str2'. More... | |
static void | sb_init (struct StringBuffer *sb, size_t n) |
Initialize string buffer for storing strings of up to n characters. More... | |
static int | sb_strkcmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k) |
Compare 'str1', starting from position 'k', with whole 'str2'. More... | |
static void | number_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states. More... | |
static void | automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r) |
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. More... | |
static int | automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a) |
Create proofs for all states in the given automaton. More... | |
static struct REGEX_INTERNAL_State * | dfa_state_create (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states) |
Creates a new DFA state based on a set of NFA states. More... | |
static unsigned int | dfa_move (struct REGEX_INTERNAL_State **s, const char *str) |
Move from the given state 's' to the next state on transition 'str'. More... | |
static void | mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Set the given state 'marked' to GNUNET_YES. More... | |
static void | dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a) |
Remove all unreachable states from DFA 'a'. More... | |
static void | dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a) |
Remove all dead states from the DFA 'a'. More... | |
static int | dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a) |
Merge all non distinguishable states in the DFA 'a'. More... | |
static int | dfa_minimize (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a) |
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states. More... | |
static void | dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s) |
Recursive helper function to add strides to a DFA. More... | |
static void | dfa_add_multi_strides (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s) |
Function called for each state in the DFA. More... | |
void | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len) |
Adds multi-strided transitions to the given 'dfa'. More... | |
void | dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail) |
Recursive Helper function for DFA path compression. More... | |
static void | dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len) |
Compress paths in the given 'dfa'. More... | |
static struct REGEX_INTERNAL_Automaton * | nfa_fragment_create (struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end) |
Creates a new NFA fragment. More... | |
static void | nfa_add_states (struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail) |
Adds a list of states to the given automaton 'n'. More... | |
static struct REGEX_INTERNAL_State * | nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting) |
Creates a new NFA state. More... | |
static void | nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label) |
Calculates the closure set for the given set of states. More... | |
static void | nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx) |
Pops two NFA fragments (a, b) from the stack and concatenates them (ab) More... | |
static void | nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx) |
Pops a NFA fragment from the stack (a) and adds a new fragment (a*) More... | |
static void | nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx) |
Pops an NFA fragment (a) from the stack and adds a new fragment (a+) More... | |
static void | nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx) |
Pops an NFA fragment (a) from the stack and adds a new fragment (a?) More... | |
static void | nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx) |
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b) More... | |
static void | nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label) |
Adds a new nfa fragment to the stack. More... | |
static void | REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx) |
Initialize a new context. More... | |
struct REGEX_INTERNAL_Automaton * | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len) |
Construct an NFA by parsing the regex string of length 'len'. More... | |
static void | construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state) |
Create DFA states based on given 'nfa' and starting with 'dfa_state'. More... | |
struct REGEX_INTERNAL_Automaton * | REGEX_INTERNAL_construct_dfa (const char *regex, const size_t len, unsigned int max_path_len) |
Construct DFA for the given 'regex' of length 'len'. More... | |
void | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a) |
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data structure. More... | |
static int | evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given string using the given DFA automaton. More... | |
static int | evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given string using the given NFA automaton. More... | |
int | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string) |
Evaluates the given string against the given compiled regex a. More... | |
const char * | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a) |
Get the canonical regex of the given automaton. More... | |
unsigned int | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a) |
Get the number of transitions that are contained in the given automaton. More... | |
size_t | REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len, struct GNUNET_HashCode *key) |
Get the first key for the given input_string. More... | |
static void | iterate_initial_edge (unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Recursive function that calls the iterator for each synthetic start state. More... | |
void | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Iterate over all edges starting from start state of automaton 'a'. More... | |
static void | store_all_states (void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges) |
Iterator over all edges of a dfa. More... | |
static void | mark_as_reachable (struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm) |
Mark state as reachable and call recursively on all its edges. More... | |
static int | reachability_iterator (void *cls, const struct GNUNET_HashCode *key, void *value) |
Iterator over hash map entries to mark the ones that are reachable. More... | |
static int | iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value) |
Iterator over hash map entries. More... | |
void | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. More... | |
library to create Deterministic Finite Automatons (DFAs) from regular expressions (regexes).
Definition in file regex_internal.c.
#define REGEX_DEBUG_DFA GNUNET_NO |
Set this to GNUNET_YES to enable state naming.
Used to debug NFA->DFA creation. Disabled by default for better performance.
Definition at line 37 of file regex_internal.c.
#define PRIS | ( | a | ) |
Definition at line 1151 of file regex_internal.c.
|
static |
Append state to the given StateSet.
set | set to be modified |
state | state to be appended |
Definition at line 68 of file regex_internal.c.
References GNUNET_array_grow, and state.
Referenced by evaluate_nfa(), nfa_closure_set_create(), and REGEX_INTERNAL_construct_dfa().
|
static |
Compare two strings for equality.
If either is NULL they are not equal.
str1 | first string for comparison. |
str2 | second string for comparison. |
Definition at line 86 of file regex_internal.c.
Referenced by nfa_closure_set_create(), and state_add_transition().
|
static |
Adds a transition from one state to another on label.
Does not add duplicate states.
ctx | context |
from_state | starting state for the transition |
label | transition label |
to_state | state to where the transition should point to |
Definition at line 107 of file regex_internal.c.
References REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_insert_before, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::id, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, nullstrcmp(), t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, REGEX_INTERNAL_Context::transition_id, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::transitions_tail.
Referenced by automaton_merge_states(), dfa_compress_paths(), dfa_state_create(), nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), and REGEX_INTERNAL_dfa_add_multi_strides().
|
static |
Remove a 'transition' from 'state'.
state | state from which the to-be-removed transition originates. |
transition | transition that should be removed from state 'state'. |
Definition at line 162 of file regex_internal.c.
References REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::transition_count, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::transitions_tail.
Referenced by automaton_destroy_state(), automaton_merge_states(), and automaton_remove_state().
|
static |
Compare two states.
Used for sorting.
a | first state |
b | second state |
Definition at line 193 of file regex_internal.c.
References REGEX_INTERNAL_State::id.
Referenced by nfa_closure_set_create(), and state_set_compare().
|
static |
Get all edges leaving state s.
s | state. |
edges | all edges leaving s, expected to be allocated and have enough space for s->transitions_count elements. |
Definition at line 212 of file regex_internal.c.
References REGEX_BLOCK_Edge::destination, REGEX_INTERNAL_State::hash, REGEX_BLOCK_Edge::label, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().
|
static |
Compare to state sets by comparing the id's of the states that are contained in each set.
Both sets are expected to be sorted by id!
sset1 | first state set |
sset2 | second state set |
Definition at line 244 of file regex_internal.c.
References REGEX_INTERNAL_StateSet::off, result, state_compare(), and REGEX_INTERNAL_StateSet::states.
Referenced by construct_dfa_states().
|
static |
Clears the given StateSet 'set'.
set | set to be cleared |
Definition at line 271 of file regex_internal.c.
References GNUNET_array_grow.
Referenced by automaton_destroy_state(), construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().
|
static |
Clears an automaton fragment.
Does not destroy the states inside the automaton.
a | automaton to be cleared |
Definition at line 285 of file regex_internal.c.
References REGEX_INTERNAL_Automaton::end, GNUNET_free, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Frees the memory used by State s.
s | state that should be destroyed |
Definition at line 305 of file regex_internal.c.
References GNUNET_free, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_State::proof, state_remove_transition(), state_set_clear(), t, and REGEX_INTERNAL_State::transitions_head.
Referenced by automaton_merge_states(), automaton_remove_state(), and REGEX_INTERNAL_automaton_destroy().
|
static |
Remove a state from the given automaton 'a'.
Always use this function when altering the states of an automaton. Will also remove all transitions leading to this state, before destroying it.
a | automaton |
s | state to remove |
Definition at line 335 of file regex_internal.c.
References automaton_destroy_state(), GNUNET_CONTAINER_DLL_remove, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_compress_paths(), dfa_remove_dead_states(), and dfa_remove_unreachable_states().
|
static |
Merge two states into one.
Will merge 's1' and 's2' into 's1' and destroy 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
ctx | context |
a | automaton |
s1 | first state |
s2 | second state, will be destroyed |
Definition at line 375 of file regex_internal.c.
References automaton_destroy_state(), REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_merge_nondistinguishable_states().
|
static |
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
a | automaton to add the state to |
s | state that should be added |
Definition at line 444 of file regex_internal.c.
References GNUNET_CONTAINER_DLL_insert, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by construct_dfa_states(), nfa_fragment_create(), and REGEX_INTERNAL_construct_dfa().
|
static |
Depth-first traversal (DFS) of all states that are reachable from state 's'.
Performs 'action' on each visited state.
s | start state. |
marks | an array of size a->state_count to remember which state was already visited. |
count | current count of the state. |
check | function that is checked before advancing on each transition in the DFS. |
check_cls | closure for check. |
action | action to be performed on each state. |
action_cls | closure for action. |
Definition at line 467 of file regex_internal.c.
References GNUNET_YES, REGEX_INTERNAL_Transition::next, t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::traversal_id.
Referenced by REGEX_INTERNAL_automaton_traverse().
void REGEX_INTERNAL_automaton_traverse | ( | const struct REGEX_INTERNAL_Automaton * | a, |
struct REGEX_INTERNAL_State * | start, | ||
REGEX_INTERNAL_traverse_check | check, | ||
void * | check_cls, | ||
REGEX_INTERNAL_traverse_action | action, | ||
void * | action_cls | ||
) |
Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them.
a | automaton to be traversed. |
start | start state, pass a->start or NULL to traverse the whole automaton. |
check | function that is checked before advancing on each transition in the DFS. |
check_cls | closure for check. |
action | action to be performed on each state. |
action_cls | closure for action |
Definition at line 518 of file regex_internal.c.
References automaton_state_traverse(), GNUNET_NO, REGEX_INTERNAL_State::next, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::traversal_id.
Referenced by automaton_create_proofs(), dfa_remove_unreachable_states(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_dfa_add_multi_strides(), and REGEX_TEST_automaton_save_graph().
|
static |
Compare two strings for equality.
If either is NULL they are not equal.
s1 | first string for comparison. |
s2 | second string for comparison. |
Definition at line 610 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare two strings for equality.
s1 | first string for comparison. |
s2 | second string for comparison. |
Definition at line 633 of file regex_internal.c.
References StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer.
ret | current buffer, to be updated |
nlen | target length for the buffer, must be at least ret->slen |
Definition at line 651 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_assert, GNUNET_free, GNUNET_malloc, GNUNET_memcpy, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by sb_append(), sb_append_cstr(), sb_printf1(), sb_printf2(), and sb_printf3().
|
static |
Append a string.
ret | where to write the result |
sarg | string to append |
Definition at line 672 of file regex_internal.c.
References StringBuffer::blen, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Append a C string.
ret | where to write the result |
cstr | string to append |
Definition at line 691 of file regex_internal.c.
References StringBuffer::blen, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result and take the input for %.*s from |
format | format string, fprintf-style, with exactly one "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg' length |
Definition at line 716 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_free, GNUNET_malloc, GNUNET_NO, GNUNET_snprintf(), GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Format a string buffer.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result |
format | format string, fprintf-style, with exactly one "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg' length |
sarg | string to print into the format |
Definition at line 747 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_NO, GNUNET_snprintf(), StringBuffer::null_flag, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Format a string buffer.
ret | where to write the result |
format | format string, fprintf-style, with exactly two "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg1/2' length |
sarg1 | first string to print into the format |
sarg2 | second string to print into the format |
Definition at line 771 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_NO, GNUNET_snprintf(), StringBuffer::null_flag, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Format a string buffer.
Note that optimizing this function is not really worth it, it is rarely called.
ret | where to write the result |
format | format string, fprintf-style, with exactly three "%.*s" |
extra_chars | how long will the result be, in addition to 'sarg1/2/3' length |
sarg1 | first string to print into the format |
sarg2 | second string to print into the format |
sarg3 | third string to print into the format |
Definition at line 804 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_NO, GNUNET_snprintf(), StringBuffer::null_flag, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Free resources of the given string buffer.
sb | buffer to free (actual pointer is not freed, as they should not be individually allocated) |
Definition at line 835 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Copy the given string buffer from 'in' to 'out'.
in | input string |
out | output string |
Definition at line 851 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify(), and remove_epsilon().
|
static |
Copy the given string buffer from 'in' to 'out'.
cstr | input string |
out | output string |
Definition at line 874 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs().
|
static |
Check if the given string str needs parentheses around it when using it to generate a regex.
str | string |
Definition at line 901 of file regex_internal.c.
References end, GNUNET_break, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Remove parentheses surrounding string str.
Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. You need to GNUNET_free() the returned string.
str | string, modified to contain a |
Definition at line 950 of file regex_internal.c.
References end, GNUNET_break, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Check if the string 'str' starts with an epsilon (empty string).
Example: "(|a)" is starting with an epsilon.
str | string to test |
Definition at line 1007 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Remove an epsilon from the string str.
Where epsilon is an empty string Example: str = "(|a|b|c)", result: "a|b|c" The returned string needs to be freed.
str | original string |
ret | where to return string without preceding epsilon, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL |
Definition at line 1025 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, sb_strdup(), StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare n bytes of 'str1' and 'str2'.
str1 | first string to compare |
str2 | second string for comparison |
n | number of bytes to compare |
Definition at line 1059 of file regex_internal.c.
References GNUNET_MAX, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Compare n bytes of 'str1' and 'str2'.
str1 | first string to compare |
str2 | second C string for comparison |
n | number of bytes to compare (and length of str2) |
Definition at line 1084 of file regex_internal.c.
References StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Initialize string buffer for storing strings of up to n characters.
sb | buffer to initialize |
n | desired target length |
Definition at line 1100 of file regex_internal.c.
References StringBuffer::abuf, StringBuffer::blen, GNUNET_malloc, GNUNET_NO, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().
|
static |
Compare 'str1', starting from position 'k', with whole 'str2'.
str1 | first string to compare, starting from position 'k' |
str2 | second string for comparison |
k | starting position in 'str1' |
Definition at line 1119 of file regex_internal.c.
References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.
Referenced by automaton_create_proofs_simplify().
|
static |
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states.
cls | states array. |
count | current state counter. |
s | current state. |
Definition at line 1139 of file regex_internal.c.
References REGEX_INTERNAL_State::dfs_id.
Referenced by automaton_create_proofs(), and REGEX_INTERNAL_construct_nfa().
|
static |
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
R_last_ij | value of $R^{(k-1)_{ij}. |
R_last_ik | value of $R^{(k-1)_{ik}. |
R_last_kk | value of $R^{(k-1)_{kk}. |
R_last_kj | value of $R^{(k-1)_{kj}. |
R_cur_ij | result for this inductive step is saved in R_cur_ij, R_cur_ij is expected to be NULL when called! |
R_cur_l | optimization – kept between iterations to avoid realloc |
R_cur_r | optimization – kept between iterations to avoid realloc |
Definition at line 1171 of file regex_internal.c.
References StringBuffer::abuf, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, has_epsilon(), needs_parentheses(), StringBuffer::null_flag, remove_epsilon(), remove_parentheses(), sb_free(), sb_init(), sb_nullstrcmp(), sb_printf1(), sb_printf2(), sb_printf3(), sb_strcmp(), sb_strdup(), sb_strkcmp(), sb_strncmp(), sb_strncmp_cstr(), StringBuffer::sbuf, StringBuffer::slen, and StringBuffer::synced.
Referenced by automaton_create_proofs().
|
static |
Create proofs for all states in the given automaton.
Implementation of the algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
Each state in the automaton gets assigned 'proof' and 'hash' (hash of the proof) fields. The starting state will only have a valid proof/hash if it has any incoming transitions.
a | automaton for which to assign proofs and hashes, must not be NULL |
Definition at line 1575 of file regex_internal.c.
References automaton_create_proofs_simplify(), REGEX_INTERNAL_Automaton::canonical_regex, REGEX_INTERNAL_State::dfs_id, GNUNET_assert, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_NO, GNUNET_OK, GNUNET_strndup, GNUNET_SYSERR, GNUNET_YES, REGEX_INTERNAL_Transition::label, needs_parentheses(), REGEX_INTERNAL_Transition::next, StringBuffer::null_flag, number_states(), proof, REGEX_INTERNAL_State::proof, REGEX_INTERNAL_automaton_traverse(), sb_append(), sb_append_cstr(), sb_free(), sb_init(), sb_strdup_cstr(), sb_wrap(), StringBuffer::sbuf, StringBuffer::slen, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, t, and REGEX_INTERNAL_Transition::to_state.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Creates a new DFA state based on a set of NFA states.
Needs to be freed using automaton_destroy_state.
ctx | context |
nfa_states | set of NFA states on which the DFA should be based on |
Definition at line 1741 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, GNUNET_asprintf(), GNUNET_malloc, GNUNET_new, GNUNET_realloc, GNUNET_snprintf(), REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_StateSet::off, state_add_transition(), REGEX_INTERNAL_Context::state_id, REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_State::transitions_head.
Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().
|
static |
Move from the given state 's' to the next state on transition 'str'.
Consumes as much of the given 'str' as possible (usefull for strided DFAs). On return 's' will point to the next state, and the length of the substring used for this transition will be returned. If no transition possible 0 is returned and 's' points to NULL.
s | starting state, will point to the next state or NULL (if no transition possible) |
str | edge label to follow (will match longest common prefix) |
Definition at line 1811 of file regex_internal.c.
References REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_Transition::next, t, and REGEX_INTERNAL_Transition::to_state.
Referenced by evaluate_dfa().
|
static |
Set the given state 'marked' to GNUNET_YES.
Used by the dfa_remove_unreachable_states() function to detect unreachable states in the automaton.
cls | closure, not used. |
count | count, not used. |
s | state where the marked attribute will be set to GNUNET_YES. |
Definition at line 1852 of file regex_internal.c.
References GNUNET_YES, and REGEX_INTERNAL_State::marked.
Referenced by dfa_remove_unreachable_states().
|
static |
Remove all unreachable states from DFA 'a'.
Unreachable states are those states that are not reachable from the starting state.
a | DFA automaton |
Definition at line 1867 of file regex_internal.c.
References automaton_remove_state(), GNUNET_NO, mark_states(), REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::states_head.
Referenced by dfa_minimize().
|
static |
Remove all dead states from the DFA 'a'.
Dead states are those states that do not transition to any other state but themselves.
a | DFA automaton |
Definition at line 1901 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_remove_state(), DFA, GNUNET_assert, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_Automaton::type.
Referenced by dfa_minimize().
|
static |
Merge all non distinguishable states in the DFA 'a'.
ctx | context |
a | DFA automaton |
Definition at line 1944 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_merge_states(), GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_OK, GNUNET_SYSERR, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, table, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_minimize().
|
static |
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states.
ctx | context |
a | DFA automaton |
Definition at line 2064 of file regex_internal.c.
References DFA, dfa_merge_nondistinguishable_states(), dfa_remove_dead_states(), dfa_remove_unreachable_states(), GNUNET_assert, GNUNET_OK, GNUNET_SYSERR, and REGEX_INTERNAL_Automaton::type.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Recursive helper function to add strides to a DFA.
cls | context, contains stride length and strided transitions DLL. |
depth | current depth of the depth-first traversal of the graph. |
label | current label, string that contains all labels on the path from 'start' to 's'. |
start | start state for the depth-first traversal of the graph. |
s | current state in the depth-first traversal |
Definition at line 2119 of file regex_internal.c.
References ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, start, REGEX_INTERNAL_Strided_Context::stride, t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transitions_head, REGEX_INTERNAL_Strided_Context::transitions_head, and REGEX_INTERNAL_Strided_Context::transitions_tail.
Referenced by dfa_add_multi_strides().
|
static |
Function called for each state in the DFA.
Starts a traversal of depth set in context starting from state 's'.
cls | context. |
count | not used. |
s | current state. |
Definition at line 2175 of file regex_internal.c.
References dfa_add_multi_strides_helper().
Referenced by REGEX_INTERNAL_dfa_add_multi_strides().
void REGEX_INTERNAL_dfa_add_multi_strides | ( | struct REGEX_INTERNAL_Context * | regex_ctx, |
struct REGEX_INTERNAL_Automaton * | dfa, | ||
const unsigned int | stride_len | ||
) |
Adds multi-strided transitions to the given 'dfa'.
regex_ctx | regex context needed to add transitions to the automaton. |
dfa | DFA to which the multi strided transitions should be added. |
stride_len | length of the strides. |
Definition at line 2191 of file regex_internal.c.
References dfa_add_multi_strides(), REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_YES, REGEX_INTERNAL_Automaton::is_multistrided, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, state_add_transition(), t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_Strided_Context::transitions_head, and REGEX_INTERNAL_Strided_Context::transitions_tail.
void dfa_compress_paths_helper | ( | struct REGEX_INTERNAL_Automaton * | dfa, |
struct REGEX_INTERNAL_State * | start, | ||
struct REGEX_INTERNAL_State * | cur, | ||
char * | label, | ||
unsigned int | max_len, | ||
struct REGEX_INTERNAL_Transition ** | transitions_head, | ||
struct REGEX_INTERNAL_Transition ** | transitions_tail | ||
) |
Recursive Helper function for DFA path compression.
Does DFS on the DFA graph and adds new transitions to the given transitions DLL and marks states that should be removed by setting state->contained to GNUNET_YES.
dfa | DFA for which the paths should be compressed. |
start | starting state for linear path search. |
cur | current state in the recursive DFS. |
label | current label (string of traversed labels). |
max_len | maximal path compression length. |
transitions_head | transitions DLL. |
transitions_tail | transitions DLL. |
Definition at line 2239 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_strdup, GNUNET_YES, REGEX_INTERNAL_State::incoming_transition_count, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, start, REGEX_INTERNAL_Automaton::start, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by dfa_compress_paths().
|
static |
Compress paths in the given 'dfa'.
Linear paths like 0->1->2->3 will be compressed to 0->3 by combining transitions.
regex_ctx | context for adding new transitions. |
dfa | DFA representation, will directly modify the given DFA. |
max_len | maximal length of the compressed paths. |
Definition at line 2318 of file regex_internal.c.
References automaton_remove_state(), REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_State::incoming_transition_count, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Creates a new NFA fragment.
Needs to be cleared using automaton_fragment_clear.
start | starting state |
end | end state |
Definition at line 2388 of file regex_internal.c.
References automaton_add_state(), end, REGEX_INTERNAL_Automaton::end, GNUNET_new, NFA, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and REGEX_INTERNAL_Automaton::type.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Adds a list of states to the given automaton 'n'.
n | automaton to which the states should be added |
states_head | head of the DLL of states |
states_tail | tail of the DLL of states |
Definition at line 2423 of file regex_internal.c.
References GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Creates a new NFA state.
Needs to be freed using automaton_destroy_state.
ctx | context |
accepting | is it an accepting state or not |
Definition at line 2462 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, GNUNET_asprintf(), GNUNET_new, GNUNET_NO, REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::name, REGEX_INTERNAL_State::scc_id, and REGEX_INTERNAL_Context::state_id.
Referenced by nfa_add_alternation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().
|
static |
Calculates the closure set for the given set of states.
ret | set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) |
nfa | the NFA containing 's' |
states | list of states on which to base the closure on |
label | transitioning label for which to base the closure on, pass NULL for epsilon transition |
Definition at line 2491 of file regex_internal.c.
References REGEX_INTERNAL_State::contained, GNUNET_CONTAINER_MDLL_insert, GNUNET_CONTAINER_MDLL_insert_tail, GNUNET_CONTAINER_MDLL_remove, REGEX_INTERNAL_StateSet_MDLL::head, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_Transition::next, nullstrcmp(), REGEX_INTERNAL_StateSet::off, state_compare(), state_set_append(), REGEX_INTERNAL_StateSet::states, REGEX_INTERNAL_StateSet_MDLL::tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().
|
static |
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
ctx | context |
Definition at line 2564 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), testconfigure::b, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
ctx | context |
Definition at line 2600 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
ctx | context |
Definition at line 2644 of file regex_internal.c.
References REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, REGEX_INTERNAL_Automaton::start, and state_add_transition().
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
ctx | context |
Definition at line 2672 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b)
ctx | context |
Definition at line 2713 of file regex_internal.c.
References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), testconfigure::b, end, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Adds a new nfa fragment to the stack.
ctx | context |
label | label for nfa transition |
Definition at line 2757 of file regex_internal.c.
References end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, nfa_fragment_create(), nfa_state_create(), REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, start, and state_add_transition().
Referenced by REGEX_INTERNAL_construct_nfa().
|
static |
Initialize a new context.
ctx | context |
Definition at line 2780 of file regex_internal.c.
References GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Context::stack_head, REGEX_INTERNAL_Context::stack_tail, REGEX_INTERNAL_Context::state_id, and REGEX_INTERNAL_Context::transition_id.
Referenced by REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().
struct REGEX_INTERNAL_Automaton* REGEX_INTERNAL_construct_nfa | ( | const char * | regex, |
const size_t | len | ||
) |
Construct an NFA by parsing the regex string of length 'len'.
regex | regular expression string |
len | length of the string |
Definition at line 2803 of file regex_internal.c.
References GNUNET_array_grow, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_NO, GNUNET_strdup, REGEX_INTERNAL_Automaton::is_multistrided, nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), number_states(), p, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_context_init(), REGEX_INTERNAL_Context::stack_head, and REGEX_INTERNAL_Context::stack_tail.
Referenced by REGEX_INTERNAL_construct_dfa().
|
static |
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
ctx | context. |
nfa | NFA automaton. |
dfa | DFA automaton. |
dfa_state | current dfa state, pass epsilon closure of first nfa state for starting. |
Definition at line 2997 of file regex_internal.c.
References automaton_add_state(), dfa_state_create(), REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, nfa_closure_set_create(), REGEX_INTERNAL_State::nfa_set, state_set_clear(), state_set_compare(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.
Referenced by REGEX_INTERNAL_construct_dfa().
struct REGEX_INTERNAL_Automaton* REGEX_INTERNAL_construct_dfa | ( | const char * | regex, |
const size_t | len, | ||
unsigned int | max_path_len | ||
) |
Construct DFA for the given 'regex' of length 'len'.
Path compression means, that for example a DFA o -> a -> b -> c -> o will be compressed to o -> abc -> o. Note that this parameter influences the non-determinism of states of the resulting NFA in the DHT (number of outgoing edges with the same label). For example for an application that stores IPv4 addresses as bitstrings it could make sense to limit the path compression to 4 or 8.
regex | regular expression string. |
len | length of the regular expression. |
max_path_len | limit the path compression length to the given value. If set to 1, no path compression is applied. Set to 0 for maximal possible path compression (generally not desireable). |
Definition at line 3062 of file regex_internal.c.
References automaton_add_state(), automaton_create_proofs(), construct_dfa_states(), DFA, dfa_compress_paths(), dfa_minimize(), dfa_state_create(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_OK, GNUNET_strdup, nfa_closure_set_create(), REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_context_init(), REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), and REGEX_INTERNAL_Automaton::type.
Referenced by announce_regex(), main(), and REGEX_INTERNAL_announce().