GNUnet 0.21.1
regex_internal.c File Reference
#include "platform.h"
#include "gnunet_util_lib.h"
#include "gnunet_regex_service.h"
#include "regex_internal_lib.h"
#include "regex_internal.h"
Include dependency graph for regex_internal.c:

Go to the source code of this file.

Data Structures

struct  REGEX_INTERNAL_StateSet_MDLL
 Set of states using MDLL API. More...
 
struct  StringBuffer
 String container for faster string operations. More...
 
struct  REGEX_INTERNAL_Strided_Context
 Context for adding strided transitions to a DFA. More...
 
struct  temporal_state_store
 Struct to hold all the relevant state information in the HashMap. More...
 
struct  client_iterator
 Store regex iterator and cls in one place to pass to the hashmap iterator. More...
 

Macros

#define REGEX_DEBUG_DFA   GNUNET_NO
 Set this to GNUNET_YES to enable state naming. More...
 
#define PRIS(a)
 

Functions

static void state_set_append (struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
 Append state to the given StateSet. More...
 
static int nullstrcmp (const char *str1, const char *str2)
 Compare two strings for equality. More...
 
static void state_add_transition (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
 Adds a transition from one state to another on label. More...
 
static void state_remove_transition (struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
 Remove a 'transition' from 'state'. More...
 
static int state_compare (const void *a, const void *b)
 Compare two states. More...
 
static unsigned int state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
 Get all edges leaving state s. More...
 
static int state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
 Compare to state sets by comparing the id's of the states that are contained in each set. More...
 
static void state_set_clear (struct REGEX_INTERNAL_StateSet *set)
 Clears the given StateSet 'set'. More...
 
static void automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a)
 Clears an automaton fragment. More...
 
static void automaton_destroy_state (struct REGEX_INTERNAL_State *s)
 Frees the memory used by State s. More...
 
static void automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
 Remove a state from the given automaton 'a'. More...
 
static void automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
 Merge two states into one. More...
 
static void automaton_add_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
 Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton. More...
 
static void automaton_state_traverse (struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
 Depth-first traversal (DFS) of all states that are reachable from state 's'. More...
 
void REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
 Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them. More...
 
static int sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
 Compare two strings for equality. More...
 
static int sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
 Compare two strings for equality. More...
 
static void sb_realloc (struct StringBuffer *ret, size_t nlen)
 Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer. More...
 
static void sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
 Append a string. More...
 
static void sb_append_cstr (struct StringBuffer *ret, const char *cstr)
 Append a C string. More...
 
static void sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
 Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'. More...
 
static void sb_printf1 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
 Format a string buffer. More...
 
static void sb_printf2 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
 Format a string buffer. More...
 
static void sb_printf3 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
 Format a string buffer. More...
 
static void sb_free (struct StringBuffer *sb)
 Free resources of the given string buffer. More...
 
static void sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
 Copy the given string buffer from 'in' to 'out'. More...
 
static void sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
 Copy the given string buffer from 'in' to 'out'. More...
 
static int needs_parentheses (const struct StringBuffer *str)
 Check if the given string str needs parentheses around it when using it to generate a regex. More...
 
static void remove_parentheses (struct StringBuffer *str)
 Remove parentheses surrounding string str. More...
 
static int has_epsilon (const struct StringBuffer *str)
 Check if the string 'str' starts with an epsilon (empty string). More...
 
static void remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
 Remove an epsilon from the string str. More...
 
static int sb_strncmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
 Compare n bytes of 'str1' and 'str2'. More...
 
static int sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
 Compare n bytes of 'str1' and 'str2'. More...
 
static void sb_init (struct StringBuffer *sb, size_t n)
 Initialize string buffer for storing strings of up to n characters. More...
 
static int sb_strkcmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
 Compare 'str1', starting from position 'k', with whole 'str2'. More...
 
static void number_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states. More...
 
static void automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
 Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. More...
 
static int automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a)
 Create proofs for all states in the given automaton. More...
 
static struct REGEX_INTERNAL_Statedfa_state_create (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
 Creates a new DFA state based on a set of NFA states. More...
 
static unsigned int dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
 Move from the given state 's' to the next state on transition 'str'. More...
 
static void mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Set the given state 'marked' to GNUNET_YES. More...
 
static void dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a)
 Remove all unreachable states from DFA 'a'. More...
 
static void dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a)
 Remove all dead states from the DFA 'a'. More...
 
static int dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
 Merge all non distinguishable states in the DFA 'a'. More...
 
static int dfa_minimize (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
 Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states. More...
 
static void dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
 Recursive helper function to add strides to a DFA. More...
 
static void dfa_add_multi_strides (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Function called for each state in the DFA. More...
 
void REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
 Adds multi-strided transitions to the given 'dfa'. More...
 
void dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
 Recursive Helper function for DFA path compression. More...
 
static void dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
 Compress paths in the given 'dfa'. More...
 
static struct REGEX_INTERNAL_Automatonnfa_fragment_create (struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
 Creates a new NFA fragment. More...
 
static void nfa_add_states (struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
 Adds a list of states to the given automaton 'n'. More...
 
static struct REGEX_INTERNAL_Statenfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting)
 Creates a new NFA state. More...
 
static void nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
 Calculates the closure set for the given set of states. More...
 
static void nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx)
 Pops two NFA fragments (a, b) from the stack and concatenates them (ab) More...
 
static void nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx)
 Pops a NFA fragment from the stack (a) and adds a new fragment (a*) More...
 
static void nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx)
 Pops an NFA fragment (a) from the stack and adds a new fragment (a+) More...
 
static void nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx)
 Pops an NFA fragment (a) from the stack and adds a new fragment (a?) More...
 
static void nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx)
 Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b) More...
 
static void nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
 Adds a new nfa fragment to the stack. More...
 
static void REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx)
 Initialize a new context. More...
 
struct REGEX_INTERNAL_AutomatonREGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
 Construct an NFA by parsing the regex string of length 'len'. More...
 
static void construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
 Create DFA states based on given 'nfa' and starting with 'dfa_state'. More...
 
struct REGEX_INTERNAL_AutomatonREGEX_INTERNAL_construct_dfa (const char *regex, const size_t len, unsigned int max_path_len)
 Construct DFA for the given 'regex' of length 'len'. More...
 
void REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a)
 Free the memory allocated by constructing the REGEX_INTERNAL_Automaton. More...
 
static int evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given string using the given DFA automaton. More...
 
static int evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given string using the given NFA automaton. More...
 
int REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given 'string' against the given compiled regex. More...
 
const char * REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
 Get the canonical regex of the given automaton. More...
 
unsigned int REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
 Get the number of transitions that are contained in the given automaton. More...
 
size_t REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
 Get the first key for the given input_string. More...
 
static void iterate_initial_edge (unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Recursive function that calls the iterator for each synthetic start state. More...
 
void REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Iterate over all edges starting from start state of automaton 'a'. More...
 
static void store_all_states (void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
 Iterator over all edges of a dfa. More...
 
static void mark_as_reachable (struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
 Mark state as reachable and call recursively on all its edges. More...
 
static int reachability_iterator (void *cls, const struct GNUNET_HashCode *key, void *value)
 Iterator over hash map entries to mark the ones that are reachable. More...
 
static int iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
 Iterator over hash map entries. More...
 
void REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. More...
 

Macro Definition Documentation

◆ REGEX_DEBUG_DFA

#define REGEX_DEBUG_DFA   GNUNET_NO

Set this to GNUNET_YES to enable state naming.

Used to debug NFA->DFA creation. Disabled by default for better performance.

Definition at line 37 of file regex_internal.c.

◆ PRIS

#define PRIS (   a)
Value:
((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
@ GNUNET_YES

Definition at line 1138 of file regex_internal.c.

Function Documentation

◆ state_set_append()

static void state_set_append ( struct REGEX_INTERNAL_StateSet set,
struct REGEX_INTERNAL_State state 
)
static

Append state to the given StateSet.

Parameters
setset to be modified
statestate to be appended

Definition at line 68 of file regex_internal.c.

70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
enum State state
current state of profiling
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.

References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, state, and REGEX_INTERNAL_StateSet::states.

Referenced by evaluate_nfa(), nfa_closure_set_create(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ nullstrcmp()

static int nullstrcmp ( const char *  str1,
const char *  str2 
)
static

Compare two strings for equality.

If either is NULL they are not equal.

Parameters
str1first string for comparison.
str2second string for comparison.
Returns
0 if the strings are the same or both NULL, 1 or -1 if not.

Definition at line 86 of file regex_internal.c.

87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}

Referenced by nfa_closure_set_create(), and state_add_transition().

Here is the caller graph for this function:

◆ state_add_transition()

static void state_add_transition ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_State from_state,
const char *  label,
struct REGEX_INTERNAL_State to_state 
)
static

Adds a transition from one state to another on label.

Does not add duplicate states.

Parameters
ctxcontext
from_statestarting state for the transition
labeltransition label
to_statestate to where the transition should point to

Definition at line 107 of file regex_internal.c.

111{
113 struct REGEX_INTERNAL_Transition *oth;
114
115 if (NULL == from_state)
116 {
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118 return;
119 }
120
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 {
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
126 return;
127 }
128
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 {
132 if (0 < nullstrcmp (oth->label, label))
133 break;
134 }
135
137 if (NULL != ctx)
138 t->id = ctx->transition_id++;
139 if (NULL != label)
140 t->label = GNUNET_strdup (label);
141 else
142 t->label = NULL;
143 t->to_state = to_state;
144 t->from_state = from_state;
145
146 /* Add outgoing transition to 'from_state' */
150 oth,
151 t);
152}
static struct GNUNET_FS_Handle * ctx
static struct GNUNET_SCHEDULER_Task * t
Main task.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_log(kind,...)
@ GNUNET_ERROR_TYPE_ERROR
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
unsigned int transition_count
Number of transitions from this state to other states.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.

References ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_insert_before, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Transition::next, nullstrcmp(), t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::transitions_tail.

Referenced by automaton_merge_states(), dfa_compress_paths(), dfa_state_create(), nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), and REGEX_INTERNAL_dfa_add_multi_strides().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ state_remove_transition()

static void state_remove_transition ( struct REGEX_INTERNAL_State state,
struct REGEX_INTERNAL_Transition transition 
)
static

Remove a 'transition' from 'state'.

Parameters
statestate from which the to-be-removed transition originates.
transitiontransition that should be removed from state 'state'.

Definition at line 162 of file regex_internal.c.

164{
165 if ((NULL == state) || (NULL == transition))
166 return;
167
168 if (transition->from_state != state)
169 return;
170
171 GNUNET_free (transition->label);
172
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
176 transition);
177
178 GNUNET_free (transition);
179}
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_free(ptr)
Wrapper around free.

References REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_Transition::label, and state.

Referenced by automaton_destroy_state(), automaton_merge_states(), and automaton_remove_state().

Here is the caller graph for this function:

◆ state_compare()

static int state_compare ( const void *  a,
const void *  b 
)
static

Compare two states.

Used for sorting.

Parameters
afirst state
bsecond state
Returns
an integer less than, equal to, or greater than zero if the first argument is considered to be respectively less than, equal to, or greater than the second.

Definition at line 193 of file regex_internal.c.

194{
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197
198 return (*s1)->id - (*s2)->id;
199}
unsigned int id
Unique state id.

References REGEX_INTERNAL_State::id.

Referenced by nfa_closure_set_create(), and state_set_compare().

Here is the caller graph for this function:

◆ state_get_edges()

static unsigned int state_get_edges ( struct REGEX_INTERNAL_State s,
struct REGEX_BLOCK_Edge edges 
)
static

Get all edges leaving state s.

Parameters
sstate.
edgesall edges leaving s, expected to be allocated and have enough space for s->transitions_count elements.
Returns
number of edges.

Definition at line 212 of file regex_internal.c.

213{
215 unsigned int count;
216
217 if (NULL == s)
218 return 0;
219
220 count = 0;
221
222 for (t = s->transitions_head; NULL != t; t = t->next)
223 {
224 if (NULL != t->to_state)
225 {
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
228 count++;
229 }
230 }
231 return count;
232}
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.

References REGEX_BLOCK_Edge::destination, REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().

Here is the caller graph for this function:

◆ state_set_compare()

static int state_set_compare ( struct REGEX_INTERNAL_StateSet sset1,
struct REGEX_INTERNAL_StateSet sset2 
)
static

Compare to state sets by comparing the id's of the states that are contained in each set.

Both sets are expected to be sorted by id!

Parameters
sset1first state set
sset2second state set
Returns
0 if the sets are equal, otherwise non-zero

Definition at line 244 of file regex_internal.c.

246{
247 int result;
248 unsigned int i;
249
250 if ((NULL == sset1) || (NULL == sset2))
251 return 1;
252
253 result = sset1->off - sset2->off;
254 if (result < 0)
255 return -1;
256 if (result > 0)
257 return 1;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260 break;
261 return result;
262}
static int result
Global testing status.
static int state_compare(const void *a, const void *b)
Compare two states.

References REGEX_INTERNAL_StateSet::off, result, state_compare(), and REGEX_INTERNAL_StateSet::states.

Referenced by construct_dfa_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ state_set_clear()

static void state_set_clear ( struct REGEX_INTERNAL_StateSet set)
static

Clears the given StateSet 'set'.

Parameters
setset to be cleared

Definition at line 271 of file regex_internal.c.

272{
273 GNUNET_array_grow (set->states, set->size, 0);
274 set->off = 0;
275}

References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, and REGEX_INTERNAL_StateSet::states.

Referenced by automaton_destroy_state(), construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ automaton_fragment_clear()

static void automaton_fragment_clear ( struct REGEX_INTERNAL_Automaton a)
static

Clears an automaton fragment.

Does not destroy the states inside the automaton.

Parameters
aautomaton to be cleared

Definition at line 285 of file regex_internal.c.

286{
287 if (NULL == a)
288 return;
289
290 a->start = NULL;
291 a->end = NULL;
292 a->states_head = NULL;
293 a->states_tail = NULL;
294 a->state_count = 0;
295 GNUNET_free (a);
296}
struct REGEX_INTERNAL_State * start
First state of the automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.

References REGEX_INTERNAL_Automaton::end, GNUNET_free, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().

Here is the caller graph for this function:

◆ automaton_destroy_state()

static void automaton_destroy_state ( struct REGEX_INTERNAL_State s)
static

Frees the memory used by State s.

Parameters
sstate that should be destroyed

Definition at line 305 of file regex_internal.c.

306{
308 struct REGEX_INTERNAL_Transition *next_t;
309
310 if (NULL == s)
311 return;
312
313 GNUNET_free (s->name);
314 GNUNET_free (s->proof);
316 for (t = s->transitions_head; NULL != t; t = next_t)
317 {
318 next_t = t->next;
320 }
321
322 GNUNET_free (s);
323}
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
char * proof
Proof for this state.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
char * name
Human readable name of the state.

References GNUNET_free, REGEX_INTERNAL_State::name, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_State::proof, state_remove_transition(), state_set_clear(), t, and REGEX_INTERNAL_State::transitions_head.

Referenced by automaton_merge_states(), automaton_remove_state(), and REGEX_INTERNAL_automaton_destroy().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_remove_state()

static void automaton_remove_state ( struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s 
)
static

Remove a state from the given automaton 'a'.

Always use this function when altering the states of an automaton. Will also remove all transitions leading to this state, before destroying it.

Parameters
aautomaton
sstate to remove

Definition at line 335 of file regex_internal.c.

337{
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
341
342 if ((NULL == a) || (NULL == s))
343 return;
344
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347 {
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
350 {
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
354 }
355 }
356
357 /* remove state */
359 a->state_count--;
360
362}
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.

References automaton_destroy_state(), GNUNET_CONTAINER_DLL_remove, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_compress_paths(), dfa_remove_dead_states(), and dfa_remove_unreachable_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_merge_states()

static void automaton_merge_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s1,
struct REGEX_INTERNAL_State s2 
)
static

Merge two states into one.

Will merge 's1' and 's2' into 's1' and destroy 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.

Parameters
ctxcontext
aautomaton
s1first state
s2second state, will be destroyed

Definition at line 375 of file regex_internal.c.

379{
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
383 struct REGEX_INTERNAL_Transition *t_next;
384 int is_dup;
385
386 if (s1 == s2)
387 return;
388
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392 {
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394 {
395 t_next = t_check->next;
396
397 if (s2 == t_check->to_state)
398 {
399 is_dup = GNUNET_NO;
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401 {
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403 is_dup = GNUNET_YES;
404 }
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
407 else
408 state_remove_transition (t_check->from_state, t_check);
409 }
410 }
411 }
412
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415 {
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418 }
419
420 /* 3. Rename s1 to {s1,s2} */
421#if REGEX_DEBUG_DFA
422 char *new_name;
423
424 new_name = s1->name;
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
427#endif
428
429 /* remove state */
431 a->state_count--;
433}
@ GNUNET_NO
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.

References automaton_destroy_state(), ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::name, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_merge_nondistinguishable_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_add_state()

static void automaton_add_state ( struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s 
)
static

Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.

Parameters
aautomaton to add the state to
sstate that should be added

Definition at line 444 of file regex_internal.c.

446{
448 a->state_count++;
449}
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.

References GNUNET_CONTAINER_DLL_insert, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by construct_dfa_states(), nfa_fragment_create(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ automaton_state_traverse()

static void automaton_state_traverse ( struct REGEX_INTERNAL_State s,
int *  marks,
unsigned int *  count,
REGEX_INTERNAL_traverse_check  check,
void *  check_cls,
REGEX_INTERNAL_traverse_action  action,
void *  action_cls 
)
static

Depth-first traversal (DFS) of all states that are reachable from state 's'.

Performs 'action' on each visited state.

Parameters
sstart state.
marksan array of size a->state_count to remember which state was already visited.
countcurrent count of the state.
checkfunction that is checked before advancing on each transition in the DFS.
check_clsclosure for check.
actionaction to be performed on each state.
action_clsclosure for action.

Definition at line 467 of file regex_internal.c.

474{
476
477 if (GNUNET_YES == marks[s->traversal_id])
478 return;
479
480 marks[s->traversal_id] = GNUNET_YES;
481
482 if (NULL != action)
483 action (action_cls, *count, s);
484
485 (*count)++;
486
487 for (t = s->transitions_head; NULL != t; t = t->next)
488 {
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491 {
492 automaton_state_traverse (t->to_state,
493 marks,
494 count,
495 check,
496 check_cls,
497 action,
498 action_cls);
499 }
500 }
501}
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.

References consensus-simulation::action, automaton_state_traverse(), GNUNET_YES, GNUNET_SCHEDULER_Task::next, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::traversal_id.

Referenced by automaton_state_traverse(), and REGEX_INTERNAL_automaton_traverse().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_automaton_traverse()

void REGEX_INTERNAL_automaton_traverse ( const struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State start,
REGEX_INTERNAL_traverse_check  check,
void *  check_cls,
REGEX_INTERNAL_traverse_action  action,
void *  action_cls 
)

Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them.

Parameters
aautomaton to be traversed.
startstart state, pass a->start or NULL to traverse the whole automaton.
checkfunction that is checked before advancing on each transition in the DFS.
check_clsclosure for check.
actionaction to be performed on each state.
action_clsclosure for action

Definition at line 505 of file regex_internal.c.

511{
512 unsigned int count;
513 struct REGEX_INTERNAL_State *s;
514
515 if ((NULL == a) || (0 == a->state_count))
516 return;
517
518 int marks[a->state_count];
519
520 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521 s = s->next, count++)
522 {
523 s->traversal_id = count;
524 marks[s->traversal_id] = GNUNET_NO;
525 }
526
527 count = 0;
528
529 if (NULL == start)
530 s = a->start;
531 else
532 s = start;
533
535 marks,
536 &count,
537 check,
538 check_cls,
539 action,
540 action_cls);
541}
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39

References consensus-simulation::action, automaton_state_traverse(), GNUNET_NO, REGEX_INTERNAL_State::next, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::traversal_id.

Referenced by automaton_create_proofs(), dfa_remove_unreachable_states(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_dfa_add_multi_strides(), and REGEX_TEST_automaton_save_graph().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_nullstrcmp()

static int sb_nullstrcmp ( const struct StringBuffer s1,
const struct StringBuffer s2 
)
static

Compare two strings for equality.

If either is NULL they are not equal.

Parameters
s1first string for comparison.
s2second string for comparison.
Returns
0 if the strings are the same or both NULL, 1 or -1 if not.

Definition at line 597 of file regex_internal.c.

598{
599 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
600 return 0;
601 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
602 return -1;
603 if (s1->slen != s2->slen)
604 return -1;
605 if (0 == s1->slen)
606 return 0;
607 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
608}
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strcmp()

static int sb_strcmp ( const struct StringBuffer s1,
const struct StringBuffer s2 
)
static

Compare two strings for equality.

Parameters
s1first string for comparison.
s2second string for comparison.
Returns
0 if the strings are the same, 1 or -1 if not.

Definition at line 620 of file regex_internal.c.

621{
622 if (s1->slen != s2->slen)
623 return -1;
624 if (0 == s1->slen)
625 return 0;
626 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
627}

References StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_realloc()

static void sb_realloc ( struct StringBuffer ret,
size_t  nlen 
)
static

Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer.

Parameters
retcurrent buffer, to be updated
nlentarget length for the buffer, must be at least ret->slen

Definition at line 638 of file regex_internal.c.

639{
640 char *old;
641
642 GNUNET_assert (nlen >= ret->slen);
643 old = ret->abuf;
644 ret->abuf = GNUNET_malloc (nlen);
645 ret->blen = nlen;
646 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
647 ret->sbuf = ret->abuf;
648 GNUNET_free (old);
649}
static int ret
Final status code.
Definition: gnunet-arm.c:94
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_malloc(size)
Wrapper around malloc.

References GNUNET_assert, GNUNET_free, GNUNET_malloc, GNUNET_memcpy, and ret.

Referenced by sb_append(), sb_append_cstr(), sb_printf1(), sb_printf2(), and sb_printf3().

Here is the caller graph for this function:

◆ sb_append()

static void sb_append ( struct StringBuffer ret,
const struct StringBuffer sarg 
)
static

Append a string.

Parameters
retwhere to write the result
sargstring to append

Definition at line 659 of file regex_internal.c.

660{
661 if (GNUNET_YES == ret->null_flag)
662 ret->slen = 0;
663 ret->null_flag = GNUNET_NO;
664 if (ret->blen < sarg->slen + ret->slen)
665 sb_realloc (ret, ret->blen + sarg->slen + 128);
666 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
667 ret->slen += sarg->slen;
668}
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...

References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_append_cstr()

static void sb_append_cstr ( struct StringBuffer ret,
const char *  cstr 
)
static

Append a C string.

Parameters
retwhere to write the result
cstrstring to append

Definition at line 678 of file regex_internal.c.

679{
680 size_t cstr_len = strlen (cstr);
681
682 if (GNUNET_YES == ret->null_flag)
683 ret->slen = 0;
684 ret->null_flag = GNUNET_NO;
685 if (ret->blen < cstr_len + ret->slen)
686 sb_realloc (ret, ret->blen + cstr_len + 128);
687 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
688 ret->slen += cstr_len;
689}

References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, and sb_realloc().

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_wrap()

static void sb_wrap ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars 
)
static

Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result and take the input for %.*s from
formatformat string, fprintf-style, with exactly one "%.*s"
extra_charshow long will the result be, in addition to 'sarg' length

Definition at line 703 of file regex_internal.c.

704{
705 char *temp;
706
707 if (GNUNET_YES == ret->null_flag)
708 ret->slen = 0;
709 ret->null_flag = GNUNET_NO;
710 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
711 GNUNET_snprintf (temp,
712 ret->slen + extra_chars + 1,
713 format,
714 (int) ret->slen,
715 ret->sbuf);
716 GNUNET_free (ret->abuf);
717 ret->abuf = temp;
718 ret->sbuf = temp;
719 ret->blen = ret->slen + extra_chars + 1;
720 ret->slen = ret->slen + extra_chars;
721}
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.

References GNUNET_free, GNUNET_malloc, GNUNET_NO, GNUNET_snprintf(), GNUNET_YES, and ret.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf1()

static void sb_printf1 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg 
)
static

Format a string buffer.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly one "%.*s"
extra_charshow long will the result be, in addition to 'sarg' length
sargstring to print into the format

Definition at line 734 of file regex_internal.c.

738{
739 if (ret->blen < sarg->slen + extra_chars + 1)
740 sb_realloc (ret, sarg->slen + extra_chars + 1);
741 ret->null_flag = GNUNET_NO;
742 ret->sbuf = ret->abuf;
743 ret->slen = sarg->slen + extra_chars;
744 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
745}

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf2()

static void sb_printf2 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg1,
const struct StringBuffer sarg2 
)
static

Format a string buffer.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly two "%.*s"
extra_charshow long will the result be, in addition to 'sarg1/2' length
sarg1first string to print into the format
sarg2second string to print into the format

Definition at line 758 of file regex_internal.c.

763{
764 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
765 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
766 ret->null_flag = GNUNET_NO;
767 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
768 ret->sbuf = ret->abuf;
769 GNUNET_snprintf (ret->sbuf,
770 ret->blen,
771 format,
772 (int) sarg1->slen,
773 sarg1->sbuf,
774 (int) sarg2->slen,
775 sarg2->sbuf);
776}

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf3()

static void sb_printf3 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg1,
const struct StringBuffer sarg2,
const struct StringBuffer sarg3 
)
static

Format a string buffer.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly three "%.*s"
extra_charshow long will the result be, in addition to 'sarg1/2/3' length
sarg1first string to print into the format
sarg2second string to print into the format
sarg3third string to print into the format

Definition at line 791 of file regex_internal.c.

797{
798 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
799 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
800 ret->null_flag = GNUNET_NO;
801 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
802 ret->sbuf = ret->abuf;
803 GNUNET_snprintf (ret->sbuf,
804 ret->blen,
805 format,
806 (int) sarg1->slen,
807 sarg1->sbuf,
808 (int) sarg2->slen,
809 sarg2->sbuf,
810 (int) sarg3->slen,
811 sarg3->sbuf);
812}

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_free()

static void sb_free ( struct StringBuffer sb)
static

Free resources of the given string buffer.

Parameters
sbbuffer to free (actual pointer is not freed, as they should not be individually allocated)

Definition at line 822 of file regex_internal.c.

823{
824 GNUNET_array_grow (sb->abuf, sb->blen, 0);
825 sb->slen = 0;
826 sb->sbuf = NULL;
827 sb->null_flag = GNUNET_YES;
828}
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strdup()

static void sb_strdup ( struct StringBuffer out,
const struct StringBuffer in 
)
static

Copy the given string buffer from 'in' to 'out'.

Parameters
ininput string
outoutput string

Definition at line 838 of file regex_internal.c.

840{
841 out->null_flag = in->null_flag;
842 if (GNUNET_YES == out->null_flag)
843 return;
844 if (out->blen < in->slen)
845 {
846 GNUNET_array_grow (out->abuf, out->blen, in->slen);
847 }
848 out->sbuf = out->abuf;
849 out->slen = in->slen;
850 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
851}

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify(), and remove_epsilon().

Here is the caller graph for this function:

◆ sb_strdup_cstr()

static void sb_strdup_cstr ( struct StringBuffer out,
const char *  cstr 
)
static

Copy the given string buffer from 'in' to 'out'.

Parameters
cstrinput string
outoutput string

Definition at line 861 of file regex_internal.c.

862{
863 if (NULL == cstr)
864 {
865 out->null_flag = GNUNET_YES;
866 return;
867 }
868 out->null_flag = GNUNET_NO;
869 out->slen = strlen (cstr);
870 if (out->blen < out->slen)
871 {
872 GNUNET_array_grow (out->abuf, out->blen, out->slen);
873 }
874 out->sbuf = out->abuf;
875 GNUNET_memcpy (out->sbuf, cstr, out->slen);
876}

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs().

Here is the caller graph for this function:

◆ needs_parentheses()

static int needs_parentheses ( const struct StringBuffer str)
static

Check if the given string str needs parentheses around it when using it to generate a regex.

Parameters
strstring
Returns
GNUNET_YES if parentheses are needed, GNUNET_NO otherwise

Definition at line 888 of file regex_internal.c.

889{
890 size_t slen;
891 const char *op;
892 const char *cl;
893 const char *pos;
894 const char *end;
895 unsigned int cnt;
896
897 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
898 return GNUNET_NO;
899 pos = str->sbuf;
900 if ('(' != pos[0])
901 return GNUNET_YES;
902 end = str->sbuf + slen;
903 cnt = 1;
904 pos++;
905 while (cnt > 0)
906 {
907 cl = memchr (pos, ')', end - pos);
908 if (NULL == cl)
909 {
910 GNUNET_break (0);
911 return GNUNET_YES;
912 }
913 /* while '(' before ')', count opening parens */
914 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
915 {
916 cnt++;
917 pos = op + 1;
918 }
919 /* got ')' first */
920 cnt--;
921 pos = cl + 1;
922 }
923 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
924}
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:144
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.

References end, GNUNET_break, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ remove_parentheses()

static void remove_parentheses ( struct StringBuffer str)
static

Remove parentheses surrounding string str.

Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. You need to GNUNET_free() the returned string.

Parameters
strstring, modified to contain a
Returns
string without surrounding parentheses, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL

Definition at line 937 of file regex_internal.c.

938{
939 size_t slen;
940 const char *pos;
941 const char *end;
942 const char *sbuf;
943 const char *op;
944 const char *cp;
945 unsigned int cnt;
946
947 if (0)
948 return;
949 sbuf = str->sbuf;
950 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
951 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
952 return;
953 cnt = 0;
954 pos = &sbuf[1];
955 end = &sbuf[slen - 1];
956 op = memchr (pos, '(', end - pos);
957 cp = memchr (pos, ')', end - pos);
958 while (NULL != cp)
959 {
960 while ((NULL != op) && (op < cp))
961 {
962 cnt++;
963 pos = op + 1;
964 op = memchr (pos, '(', end - pos);
965 }
966 while ((NULL != cp) && ((NULL == op) || (cp < op)))
967 {
968 if (0 == cnt)
969 return; /* can't strip parens */
970 cnt--;
971 pos = cp + 1;
972 cp = memchr (pos, ')', end - pos);
973 }
974 }
975 if (0 != cnt)
976 {
977 GNUNET_break (0);
978 return;
979 }
980 str->sbuf++;
981 str->slen -= 2;
982}

References end, GNUNET_break, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ has_epsilon()

static int has_epsilon ( const struct StringBuffer str)
static

Check if the string 'str' starts with an epsilon (empty string).

Example: "(|a)" is starting with an epsilon.

Parameters
strstring to test
Returns
0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'

Definition at line 994 of file regex_internal.c.

995{
996 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
997 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
998 (')' == str->sbuf[str->slen - 1]);
999}

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ remove_epsilon()

static void remove_epsilon ( const struct StringBuffer str,
struct StringBuffer ret 
)
static

Remove an epsilon from the string str.

Where epsilon is an empty string Example: str = "(|a|b|c)", result: "a|b|c" The returned string needs to be freed.

Parameters
stroriginal string
retwhere to return string without preceding epsilon, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL

Definition at line 1012 of file regex_internal.c.

1013{
1014 if (GNUNET_YES == str->null_flag)
1015 {
1016 ret->null_flag = GNUNET_YES;
1017 return;
1018 }
1019 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1020 (')' == str->sbuf[str->slen - 1]))
1021 {
1022 /* remove epsilon */
1023 if (ret->blen < str->slen - 3)
1024 {
1025 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1026 }
1027 ret->sbuf = ret->abuf;
1028 ret->slen = str->slen - 3;
1029 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1030 return;
1031 }
1032 sb_strdup (ret, str);
1033}
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.

References GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, ret, sb_strdup(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_strncmp()

static int sb_strncmp ( const struct StringBuffer str1,
const struct StringBuffer str2,
size_t  n 
)
static

Compare n bytes of 'str1' and 'str2'.

Parameters
str1first string to compare
str2second string for comparison
nnumber of bytes to compare
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1046 of file regex_internal.c.

1049{
1050 size_t max;
1051
1052 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1053 return -1;
1054 max = GNUNET_MAX (str1->slen, str2->slen);
1055 if (max > n)
1056 max = n;
1057 return memcmp (str1->sbuf, str2->sbuf, max);
1058}
#define GNUNET_MAX(a, b)
#define max(x, y)

References GNUNET_MAX, max, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strncmp_cstr()

static int sb_strncmp_cstr ( const struct StringBuffer str1,
const char *  str2,
size_t  n 
)
static

Compare n bytes of 'str1' and 'str2'.

Parameters
str1first string to compare
str2second C string for comparison
nnumber of bytes to compare (and length of str2)
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1071 of file regex_internal.c.

1072{
1073 if (str1->slen < n)
1074 return -1;
1075 return memcmp (str1->sbuf, str2, n);
1076}

References StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_init()

static void sb_init ( struct StringBuffer sb,
size_t  n 
)
static

Initialize string buffer for storing strings of up to n characters.

Parameters
sbbuffer to initialize
ndesired target length

Definition at line 1087 of file regex_internal.c.

1088{
1089 sb->null_flag = GNUNET_NO;
1090 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1091 sb->blen = n;
1092 sb->slen = 0;
1093}

References StringBuffer::abuf, StringBuffer::blen, GNUNET_malloc, GNUNET_NO, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strkcmp()

static int sb_strkcmp ( const struct StringBuffer str1,
const struct StringBuffer str2,
size_t  k 
)
static

Compare 'str1', starting from position 'k', with whole 'str2'.

Parameters
str1first string to compare, starting from position 'k'
str2second string for comparison
kstarting position in 'str1'
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1106 of file regex_internal.c.

1109{
1110 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1111 (k > str1->slen) || (str1->slen - k != str2->slen))
1112 return -1;
1113 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1114}

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ number_states()

static void number_states ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states.

Parameters
clsstates array.
countcurrent state counter.
scurrent state.

Definition at line 1126 of file regex_internal.c.

1129{
1130 struct REGEX_INTERNAL_State **states = cls;
1131
1132 s->dfs_id = count;
1133 if (NULL != states)
1134 states[count] = s;
1135}
unsigned int dfs_id
Linear state ID acquired by depth-first-search.

References REGEX_INTERNAL_State::dfs_id.

Referenced by automaton_create_proofs(), and REGEX_INTERNAL_construct_nfa().

Here is the caller graph for this function:

◆ automaton_create_proofs_simplify()

static void automaton_create_proofs_simplify ( const struct StringBuffer R_last_ij,
const struct StringBuffer R_last_ik,
const struct StringBuffer R_last_kk,
const struct StringBuffer R_last_kj,
struct StringBuffer R_cur_ij,
struct StringBuffer R_cur_l,
struct StringBuffer R_cur_r 
)
static

Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.

Parameters
R_last_ijvalue of $R^{(k-1)_{ij}.
R_last_ikvalue of $R^{(k-1)_{ik}.
R_last_kkvalue of $R^{(k-1)_{kk}.
R_last_kjvalue of $R^{(k-1)_{kj}.
R_cur_ijresult for this inductive step is saved in R_cur_ij, R_cur_ij is expected to be NULL when called!
R_cur_loptimization – kept between iterations to avoid realloc
R_cur_roptimization – kept between iterations to avoid realloc

Definition at line 1158 of file regex_internal.c.

1165{
1166 struct StringBuffer R_temp_ij;
1167 struct StringBuffer R_temp_ik;
1168 struct StringBuffer R_temp_kj;
1169 struct StringBuffer R_temp_kk;
1170 int eps_check;
1171 int ij_ik_cmp;
1172 int ij_kj_cmp;
1173 int ik_kk_cmp;
1174 int kk_kj_cmp;
1175 int clean_ik_kk_cmp;
1176 int clean_kk_kj_cmp;
1177 size_t length;
1178 size_t length_l;
1179 size_t length_r;
1180
1181 /*
1182 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1183 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1184 * R_cur_ij = R_cur_l | R_cur_r
1185 * R_cur_l == R^{(k-1)}_{ij}
1186 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1187 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1188 ((GNUNET_YES == R_last_ik->null_flag) ||
1189 (GNUNET_YES == R_last_kj->null_flag)))
1190 {
1191 /* R^{(k)}_{ij} = N | N */
1192 R_cur_ij->null_flag = GNUNET_YES;
1193 R_cur_ij->synced = GNUNET_NO;
1194 return;
1195 }
1196
1197 if ((GNUNET_YES == R_last_ik->null_flag) ||
1198 (GNUNET_YES == R_last_kj->null_flag))
1199 {
1200 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1201 if (GNUNET_YES == R_last_ij->synced)
1202 {
1203 R_cur_ij->synced = GNUNET_YES;
1204 R_cur_ij->null_flag = GNUNET_NO;
1205 return;
1206 }
1207 R_cur_ij->synced = GNUNET_YES;
1208 sb_strdup (R_cur_ij, R_last_ij);
1209 return;
1210 }
1211 R_cur_ij->synced = GNUNET_NO;
1212
1213 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1214 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1215
1216 R_cur_r->null_flag = GNUNET_YES;
1217 R_cur_r->slen = 0;
1218 R_cur_l->null_flag = GNUNET_YES;
1219 R_cur_l->slen = 0;
1220
1221 /* cache results from strcmp, we might need these many times */
1222 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1223 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1224 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1225 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1226
1227 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1228 * as parentheses, so we can better compare the contents */
1229
1230 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1231 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1232 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1233 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1234 remove_epsilon (R_last_ik, &R_temp_ik);
1235 remove_epsilon (R_last_kk, &R_temp_kk);
1236 remove_epsilon (R_last_kj, &R_temp_kj);
1237 remove_parentheses (&R_temp_ik);
1238 remove_parentheses (&R_temp_kk);
1239 remove_parentheses (&R_temp_kj);
1240 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1241 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1242
1243 /* construct R_cur_l (and, if necessary R_cur_r) */
1244 if (GNUNET_YES != R_last_ij->null_flag)
1245 {
1246 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1247 * as parentheses, so we can better compare the contents */
1248 remove_epsilon (R_last_ij, &R_temp_ij);
1249 remove_parentheses (&R_temp_ij);
1250
1251 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1252 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1253 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1254 {
1255 if (0 == R_temp_ij.slen)
1256 {
1257 R_cur_r->null_flag = GNUNET_NO;
1258 }
1259 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1260 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1261 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1262 {
1263 /*
1264 * a|(e|a)a*(e|a) = a*
1265 * a|(e|a)(e|a)*(e|a) = a*
1266 * (e|a)|aa*a = a*
1267 * (e|a)|aa*(e|a) = a*
1268 * (e|a)|(e|a)a*a = a*
1269 * (e|a)|(e|a)a*(e|a) = a*
1270 * (e|a)|(e|a)(e|a)*(e|a) = a*
1271 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1272 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1273 else
1274 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1275 }
1276 else
1277 {
1278 /*
1279 * a|aa*a = a+
1280 * a|(e|a)a*a = a+
1281 * a|aa*(e|a) = a+
1282 * a|(e|a)(e|a)*a = a+
1283 * a|a(e|a)*(e|a) = a+
1284 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1286 else
1287 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1288 }
1289 }
1290 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1291 (0 != clean_ik_kk_cmp))
1292 {
1293 /* a|ab*b = ab* */
1294 if (0 == R_last_kk->slen)
1295 sb_strdup (R_cur_r, R_last_ij);
1296 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1297 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1298 else
1299 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1300 R_cur_l->null_flag = GNUNET_YES;
1301 }
1302 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1303 (0 != clean_kk_kj_cmp))
1304 {
1305 /* a|bb*a = b*a */
1306 if (R_last_kk->slen < 1)
1307 {
1308 sb_strdup (R_cur_r, R_last_kj);
1309 }
1310 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1311 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1312 else
1313 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1314
1315 R_cur_l->null_flag = GNUNET_YES;
1316 }
1317 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1318 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1319 {
1320 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1321 if (needs_parentheses (&R_temp_kk))
1322 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1323 else
1324 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1325 R_cur_l->null_flag = GNUNET_YES;
1326 }
1327 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1328 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1329 {
1330 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1331 if (needs_parentheses (&R_temp_kk))
1332 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1333 else
1334 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1335 R_cur_l->null_flag = GNUNET_YES;
1336 }
1337 else
1338 {
1339 sb_strdup (R_cur_l, R_last_ij);
1340 remove_parentheses (R_cur_l);
1341 }
1342 }
1343 else
1344 {
1345 /* we have no left side */
1346 R_cur_l->null_flag = GNUNET_YES;
1347 }
1348
1349 /* construct R_cur_r, if not already constructed */
1350 if (GNUNET_YES == R_cur_r->null_flag)
1351 {
1352 length = R_temp_kk.slen - R_last_ik->slen;
1353
1354 /* a(ba)*bx = (ab)+x */
1355 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1356 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1357 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1358 (0 < R_last_ik->slen) &&
1359 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1360 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1361 {
1362 struct StringBuffer temp_a;
1363 struct StringBuffer temp_b;
1364
1365 sb_init (&temp_a, length);
1366 sb_init (&temp_b, R_last_kj->slen - length);
1367
1368 length_l = length;
1369 temp_a.sbuf = temp_a.abuf;
1370 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1371 temp_a.slen = length_l;
1372
1373 length_r = R_last_kj->slen - length;
1374 temp_b.sbuf = temp_b.abuf;
1375 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1376 temp_b.slen = length_r;
1377
1378 /* e|(ab)+ = (ab)* */
1379 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1380 (0 == temp_b.slen))
1381 {
1382 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1383 sb_free (R_cur_l);
1384 R_cur_l->null_flag = GNUNET_YES;
1385 }
1386 else
1387 {
1388 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1389 }
1390 sb_free (&temp_a);
1391 sb_free (&temp_b);
1392 }
1393 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1394 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1395 {
1396 /*
1397 * (e|a)a*(e|a) = a*
1398 * (e|a)(e|a)*(e|a) = a*
1399 */
1400 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1401 {
1402 if (needs_parentheses (&R_temp_kk))
1403 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1404 else
1405 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1406 }
1407 /* aa*a = a+a */
1408 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1409 (! has_epsilon (R_last_ik)))
1410 {
1411 if (needs_parentheses (&R_temp_kk))
1412 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1413 else
1414 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1415 }
1416 /*
1417 * (e|a)a*a = a+
1418 * aa*(e|a) = a+
1419 * a(e|a)*(e|a) = a+
1420 * (e|a)a*a = a+
1421 */else
1422 {
1423 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1424 + has_epsilon (R_last_kj));
1425
1426 if (1 == eps_check)
1427 {
1428 if (needs_parentheses (&R_temp_kk))
1429 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1430 else
1431 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1432 }
1433 }
1434 }
1435 /*
1436 * aa*b = a+b
1437 * (e|a)(e|a)*b = a*b
1438 */
1439 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1440 {
1441 if (has_epsilon (R_last_ik))
1442 {
1443 if (needs_parentheses (&R_temp_kk))
1444 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1445 else
1446 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1447 }
1448 else
1449 {
1450 if (needs_parentheses (&R_temp_kk))
1451 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1452 else
1453 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1454 }
1455 }
1456 /*
1457 * ba*a = ba+
1458 * b(e|a)*(e|a) = ba*
1459 */
1460 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1461 {
1462 if (has_epsilon (R_last_kj))
1463 {
1464 if (needs_parentheses (&R_temp_kk))
1465 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1466 else
1467 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1468 }
1469 else
1470 {
1471 if (needs_parentheses (&R_temp_kk))
1472 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1473 else
1474 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1475 }
1476 }
1477 else
1478 {
1479 if (0 < R_temp_kk.slen)
1480 {
1481 if (needs_parentheses (&R_temp_kk))
1482 {
1483 sb_printf3 (R_cur_r,
1484 "%.*s(%.*s)*%.*s",
1485 3,
1486 R_last_ik,
1487 &R_temp_kk,
1488 R_last_kj);
1489 }
1490 else
1491 {
1492 sb_printf3 (R_cur_r,
1493 "%.*s%.*s*%.*s",
1494 1,
1495 R_last_ik,
1496 &R_temp_kk,
1497 R_last_kj);
1498 }
1499 }
1500 else
1501 {
1502 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1503 }
1504 }
1505 }
1506 sb_free (&R_temp_ij);
1507 sb_free (&R_temp_ik);
1508 sb_free (&R_temp_kk);
1509 sb_free (&R_temp_kj);
1510
1511 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1512 {
1513 R_cur_ij->null_flag = GNUNET_YES;
1514 return;
1515 }
1516
1517 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1518 {
1519 struct StringBuffer tmp;
1520
1521 tmp = *R_cur_ij;
1522 *R_cur_ij = *R_cur_l;
1523 *R_cur_l = tmp;
1524 return;
1525 }
1526
1527 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1528 {
1529 struct StringBuffer tmp;
1530
1531 tmp = *R_cur_ij;
1532 *R_cur_ij = *R_cur_r;
1533 *R_cur_r = tmp;
1534 return;
1535 }
1536
1537 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1538 {
1539 struct StringBuffer tmp;
1540
1541 tmp = *R_cur_ij;
1542 *R_cur_ij = *R_cur_l;
1543 *R_cur_l = tmp;
1544 return;
1545 }
1546 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1547}
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
String container for faster string operations.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...

References StringBuffer::abuf, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, has_epsilon(), needs_parentheses(), StringBuffer::null_flag, remove_epsilon(), remove_parentheses(), sb_free(), sb_init(), sb_nullstrcmp(), sb_printf1(), sb_printf2(), sb_printf3(), sb_strcmp(), sb_strdup(), sb_strkcmp(), sb_strncmp(), sb_strncmp_cstr(), StringBuffer::sbuf, StringBuffer::slen, and StringBuffer::synced.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_create_proofs()

static int automaton_create_proofs ( struct REGEX_INTERNAL_Automaton a)
static

Create proofs for all states in the given automaton.

Implementation of the algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and Computation 3rd Edition" by Hopcroft, Motwani and Ullman.

Each state in the automaton gets assigned 'proof' and 'hash' (hash of the proof) fields. The starting state will only have a valid proof/hash if it has any incoming transitions.

Parameters
aautomaton for which to assign proofs and hashes, must not be NULL

Definition at line 1562 of file regex_internal.c.

1563{
1564 unsigned int n = a->state_count;
1565 struct REGEX_INTERNAL_State *states[n];
1566 struct StringBuffer *R_last;
1567 struct StringBuffer *R_cur;
1568 struct StringBuffer R_cur_r;
1569 struct StringBuffer R_cur_l;
1570 struct StringBuffer *R_swap;
1572 struct StringBuffer complete_regex;
1573 unsigned int i;
1574 unsigned int j;
1575 unsigned int k;
1576
1577 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1578 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1579 if ((NULL == R_last) || (NULL == R_cur))
1580 {
1582 GNUNET_free (R_cur);
1583 GNUNET_free (R_last);
1584 return GNUNET_SYSERR;
1585 }
1586
1587 /* create depth-first numbering of the states, initializes 'state' */
1589 a->start,
1590 NULL,
1591 NULL,
1593 states);
1594
1595 for (i = 0; i < n; i++)
1596 GNUNET_assert (NULL != states[i]);
1597 for (i = 0; i < n; i++)
1598 for (j = 0; j < n; j++)
1599 R_last[i * n + j].null_flag = GNUNET_YES;
1600
1601 /* Compute regular expressions of length "1" between each pair of states */
1602 for (i = 0; i < n; i++)
1603 {
1604 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1605 {
1606 j = t->to_state->dfs_id;
1607 if (GNUNET_YES == R_last[i * n + j].null_flag)
1608 {
1609 sb_strdup_cstr (&R_last[i * n + j], t->label);
1610 }
1611 else
1612 {
1613 sb_append_cstr (&R_last[i * n + j], "|");
1614 sb_append_cstr (&R_last[i * n + j], t->label);
1615 }
1616 }
1617 /* add self-loop: i is reachable from i via epsilon-transition */
1618 if (GNUNET_YES == R_last[i * n + i].null_flag)
1619 {
1620 R_last[i * n + i].slen = 0;
1621 R_last[i * n + i].null_flag = GNUNET_NO;
1622 }
1623 else
1624 {
1625 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1626 }
1627 }
1628 for (i = 0; i < n; i++)
1629 for (j = 0; j < n; j++)
1630 if (needs_parentheses (&R_last[i * n + j]))
1631 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1632 /* Compute regular expressions of length "k" between each pair of states per
1633 * induction */
1634 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1635 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1636 for (k = 0; k < n; k++)
1637 {
1638 for (i = 0; i < n; i++)
1639 {
1640 for (j = 0; j < n; j++)
1641 {
1642 /* Basis for the recursion:
1643 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1644 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1645 */
1646
1647 /* Create R_cur[i][j] and simplify the expression */
1648 automaton_create_proofs_simplify (&R_last[i * n + j],
1649 &R_last[i * n + k],
1650 &R_last[k * n + k],
1651 &R_last[k * n + j],
1652 &R_cur[i * n + j],
1653 &R_cur_l,
1654 &R_cur_r);
1655 }
1656 }
1657 /* set R_last = R_cur */
1658 R_swap = R_last;
1659 R_last = R_cur;
1660 R_cur = R_swap;
1661 /* clear 'R_cur' for next iteration */
1662 for (i = 0; i < n; i++)
1663 for (j = 0; j < n; j++)
1664 R_cur[i * n + j].null_flag = GNUNET_YES;
1665 }
1666 sb_free (&R_cur_l);
1667 sb_free (&R_cur_r);
1668 /* assign proofs and hashes */
1669 for (i = 0; i < n; i++)
1670 {
1671 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1672 {
1673 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1674 R_last[a->start->dfs_id * n + i].slen);
1675 GNUNET_CRYPTO_hash (states[i]->proof,
1676 strlen (states[i]->proof),
1677 &states[i]->hash);
1678 }
1679 }
1680
1681 /* complete regex for whole DFA: union of all pairs (start state/accepting
1682 * state(s)). */
1683 sb_init (&complete_regex, 16 * n);
1684 for (i = 0; i < n; i++)
1685 {
1686 if (states[i]->accepting)
1687 {
1688 if ((0 == complete_regex.slen) &&
1689 (0 < R_last[a->start->dfs_id * n + i].slen))
1690 {
1691 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1692 }
1693 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1694 (0 < R_last[a->start->dfs_id * n + i].slen))
1695 {
1696 sb_append_cstr (&complete_regex, "|");
1697 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1698 }
1699 }
1700 }
1701 a->canonical_regex =
1702 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1703
1704 /* cleanup */
1705 sb_free (&complete_regex);
1706 for (i = 0; i < n; i++)
1707 for (j = 0; j < n; j++)
1708 {
1709 sb_free (&R_cur[i * n + j]);
1710 sb_free (&R_last[i * n + j]);
1711 }
1712 GNUNET_free (R_cur);
1713 GNUNET_free (R_last);
1714 return GNUNET_OK;
1715}
static uint64_t proof
Definition: gnunet-scrypt.c:49
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
@ GNUNET_OK
@ GNUNET_SYSERR
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)

References automaton_create_proofs_simplify(), REGEX_INTERNAL_Automaton::canonical_regex, REGEX_INTERNAL_State::dfs_id, GNUNET_assert, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_NO, GNUNET_OK, GNUNET_strndup, GNUNET_SYSERR, GNUNET_YES, needs_parentheses(), GNUNET_SCHEDULER_Task::next, StringBuffer::null_flag, number_states(), proof, REGEX_INTERNAL_State::proof, REGEX_INTERNAL_automaton_traverse(), sb_append(), sb_append_cstr(), sb_free(), sb_init(), sb_strdup_cstr(), sb_wrap(), StringBuffer::sbuf, StringBuffer::slen, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and t.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_state_create()

static struct REGEX_INTERNAL_State * dfa_state_create ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_StateSet nfa_states 
)
static

Creates a new DFA state based on a set of NFA states.

Needs to be freed using automaton_destroy_state.

Parameters
ctxcontext
nfa_statesset of NFA states on which the DFA should be based on
Returns
new DFA state

Definition at line 1728 of file regex_internal.c.

1730{
1731 struct REGEX_INTERNAL_State *s;
1732 char *pos;
1733 size_t len;
1734 struct REGEX_INTERNAL_State *cstate;
1735 struct REGEX_INTERNAL_Transition *ctran;
1736 unsigned int i;
1737
1738 s = GNUNET_new (struct REGEX_INTERNAL_State);
1739 s->id = ctx->state_id++;
1740 s->index = -1;
1741 s->lowlink = -1;
1742
1743 if (NULL == nfa_states)
1744 {
1745 GNUNET_asprintf (&s->name, "s%i", s->id);
1746 return s;
1747 }
1748
1749 s->nfa_set = *nfa_states;
1750
1751 if (nfa_states->off < 1)
1752 return s;
1753
1754 /* Create a name based on 'nfa_states' */
1755 len = nfa_states->off * 14 + 4;
1756 s->name = GNUNET_malloc (len);
1757 strcat (s->name, "{");
1758 pos = s->name + 1;
1759
1760 for (i = 0; i < nfa_states->off; i++)
1761 {
1762 cstate = nfa_states->states[i];
1763 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1764 pos += strlen (pos);
1765
1766 /* Add a transition for each distinct label to NULL state */
1767 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1768 if (NULL != ctran->label)
1769 state_add_transition (ctx, s, ctran->label, NULL);
1770
1771 /* If the nfa_states contain an accepting state, the new dfa state is also
1772 * accepting. */
1773 if (cstate->accepting)
1774 s->accepting = 1;
1775 }
1776 pos[-1] = '}';
1777 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1778
1779 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1780 return s;
1781}
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int index
Used for SCC detection.

References REGEX_INTERNAL_State::accepting, ctx, GNUNET_asprintf(), GNUNET_malloc, GNUNET_new, GNUNET_realloc, GNUNET_snprintf(), REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_StateSet::off, state_add_transition(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_State::transitions_head.

Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_move()

static unsigned int dfa_move ( struct REGEX_INTERNAL_State **  s,
const char *  str 
)
static

Move from the given state 's' to the next state on transition 'str'.

Consumes as much of the given 'str' as possible (useful for strided DFAs). On return 's' will point to the next state, and the length of the substring used for this transition will be returned. If no transition possible 0 is returned and 's' points to NULL.

Parameters
sstarting state, will point to the next state or NULL (if no transition possible)
stredge label to follow (will match longest common prefix)
Returns
length of the substring consumed from 'str'

Definition at line 1798 of file regex_internal.c.

1799{
1801 struct REGEX_INTERNAL_State *new_s;
1802 unsigned int len;
1803 unsigned int max_len;
1804
1805 if (NULL == s)
1806 return 0;
1807
1808 new_s = NULL;
1809 max_len = 0;
1810 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1811 {
1812 len = strlen (t->label);
1813
1814 if (0 == strncmp (t->label, str, len))
1815 {
1816 if (len >= max_len)
1817 {
1818 max_len = len;
1819 new_s = t->to_state;
1820 }
1821 }
1822 }
1823
1824 *s = new_s;
1825 return max_len;
1826}

References GNUNET_SCHEDULER_Task::next, and t.

Referenced by evaluate_dfa().

Here is the caller graph for this function:

◆ mark_states()

static void mark_states ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Set the given state 'marked' to GNUNET_YES.

Used by the dfa_remove_unreachable_states() function to detect unreachable states in the automaton.

Parameters
clsclosure, not used.
countcount, not used.
sstate where the marked attribute will be set to GNUNET_YES.

Definition at line 1839 of file regex_internal.c.

1842{
1843 s->marked = GNUNET_YES;
1844}
int marked
Marking of the state.

References GNUNET_YES, and REGEX_INTERNAL_State::marked.

Referenced by dfa_remove_unreachable_states().

Here is the caller graph for this function:

◆ dfa_remove_unreachable_states()

static void dfa_remove_unreachable_states ( struct REGEX_INTERNAL_Automaton a)
static

Remove all unreachable states from DFA 'a'.

Unreachable states are those states that are not reachable from the starting state.

Parameters
aDFA automaton

Definition at line 1854 of file regex_internal.c.

1855{
1856 struct REGEX_INTERNAL_State *s;
1857 struct REGEX_INTERNAL_State *s_next;
1858
1859 /* 1. unmark all states */
1860 for (s = a->states_head; NULL != s; s = s->next)
1861 s->marked = GNUNET_NO;
1862
1863 /* 2. traverse dfa from start state and mark all visited states */
1865 a->start,
1866 NULL,
1867 NULL,
1868 &mark_states,
1869 NULL);
1870
1871 /* 3. delete all states that were not visited */
1872 for (s = a->states_head; NULL != s; s = s_next)
1873 {
1874 s_next = s->next;
1875 if (GNUNET_NO == s->marked)
1877 }
1878}
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.

References automaton_remove_state(), GNUNET_NO, mark_states(), REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::states_head.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_remove_dead_states()

static void dfa_remove_dead_states ( struct REGEX_INTERNAL_Automaton a)
static

Remove all dead states from the DFA 'a'.

Dead states are those states that do not transition to any other state but themselves.

Parameters
aDFA automaton

Definition at line 1888 of file regex_internal.c.

1889{
1890 struct REGEX_INTERNAL_State *s;
1891 struct REGEX_INTERNAL_State *s_next;
1893 int dead;
1894
1895 GNUNET_assert (DFA == a->type);
1896
1897 for (s = a->states_head; NULL != s; s = s_next)
1898 {
1899 s_next = s->next;
1900
1901 if (s->accepting)
1902 continue;
1903
1904 dead = 1;
1905 for (t = s->transitions_head; NULL != t; t = t->next)
1906 {
1907 if ((NULL != t->to_state) && (t->to_state != s) )
1908 {
1909 dead = 0;
1910 break;
1911 }
1912 }
1913
1914 if (0 == dead)
1915 continue;
1916
1917 /* state s is dead, remove it */
1919 }
1920}
@ DFA
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.

References REGEX_INTERNAL_State::accepting, automaton_remove_state(), DFA, GNUNET_assert, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_Automaton::type.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_merge_nondistinguishable_states()

static int dfa_merge_nondistinguishable_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a 
)
static

Merge all non distinguishable states in the DFA 'a'.

Parameters
ctxcontext
aDFA automaton
Returns
GNUNET_OK on success

Definition at line 1931 of file regex_internal.c.

1933{
1934 uint32_t *table;
1935 struct REGEX_INTERNAL_State *s1;
1936 struct REGEX_INTERNAL_State *s2;
1937 struct REGEX_INTERNAL_Transition *t1;
1938 struct REGEX_INTERNAL_Transition *t2;
1939 struct REGEX_INTERNAL_State *s1_next;
1940 struct REGEX_INTERNAL_State *s2_next;
1941 int change;
1942 unsigned int num_equal_edges;
1943 unsigned int i;
1944 unsigned int state_cnt;
1945 unsigned long long idx;
1946 unsigned long long idx1;
1947
1948 if ((NULL == a) || (0 == a->state_count))
1949 {
1951 "Could not merge nondistinguishable states, automaton was NULL.\n");
1952 return GNUNET_SYSERR;
1953 }
1954
1955 state_cnt = a->state_count;
1957 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1958 if (NULL == table)
1959 {
1961 return GNUNET_SYSERR;
1962 }
1963
1964 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1965 s1->marked = i++;
1966
1967 /* Mark all pairs of accepting/!accepting states */
1968 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1969 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1970 if ((s1->accepting && ! s2->accepting) ||
1971 (! s1->accepting && s2->accepting))
1972 {
1973 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1974 table[idx / 32] |= (1U << (idx % 32));
1975 }
1976
1977 /* Find all equal states */
1978 change = 1;
1979 while (0 != change)
1980 {
1981 change = 0;
1982 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1983 {
1984 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1985 {
1986 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987 if (0 != (table[idx / 32] & (1U << (idx % 32))))
1988 continue;
1989 num_equal_edges = 0;
1990 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1991 {
1992 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1993 {
1994 if (0 == strcmp (t1->label, t2->label))
1995 {
1996 num_equal_edges++;
1997 /* same edge, but targets definitively different, so we're different
1998 as well */
1999 if (t1->to_state->marked > t2->to_state->marked)
2000 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2001 + t2->to_state->marked;
2002 else
2003 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2004 + t1->to_state->marked;
2005 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2006 {
2007 table[idx / 32] |= (1U << (idx % 32));
2008 change = 1; /* changed a marker, need to run again */
2009 }
2010 }
2011 }
2012 }
2013 if ((num_equal_edges != s1->transition_count) ||
2014 (num_equal_edges != s2->transition_count))
2015 {
2016 /* Make sure ALL edges of possible equal states are the same */
2017 table[idx / 32] |= (1U << (idx % 32));
2018 change = 1; /* changed a marker, need to run again */
2019 }
2020 }
2021 }
2022 }
2023
2024 /* Merge states that are equal */
2025 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2026 {
2027 s1_next = s1->next;
2028 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2029 {
2030 s2_next = s2->next;
2031 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2032 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2033 automaton_merge_states (ctx, a, s1, s2);
2034 }
2035 }
2036
2038 return GNUNET_OK;
2039}
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.

References REGEX_INTERNAL_State::accepting, automaton_merge_states(), ctx, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_OK, GNUNET_SYSERR, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, table, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_minimize()

static int dfa_minimize ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a 
)
static

Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states.

Parameters
ctxcontext
aDFA automaton
Returns
GNUNET_OK on success

Definition at line 2051 of file regex_internal.c.

2053{
2054 if (NULL == a)
2055 return GNUNET_SYSERR;
2056
2057 GNUNET_assert (DFA == a->type);
2058
2059 /* 1. remove unreachable states */
2061
2062 /* 2. remove dead states */
2064
2065 /* 3. Merge nondistinguishable states */
2067 return GNUNET_SYSERR;
2068 return GNUNET_OK;
2069}
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.

References ctx, DFA, dfa_merge_nondistinguishable_states(), dfa_remove_dead_states(), dfa_remove_unreachable_states(), GNUNET_assert, GNUNET_OK, GNUNET_SYSERR, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_add_multi_strides_helper()

static void dfa_add_multi_strides_helper ( void *  cls,
const unsigned int  depth,
char *  label,
struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State s 
)
static

Recursive helper function to add strides to a DFA.

Parameters
clscontext, contains stride length and strided transitions DLL.
depthcurrent depth of the depth-first traversal of the graph.
labelcurrent label, string that contains all labels on the path from 'start' to 's'.
startstart state for the depth-first traversal of the graph.
scurrent state in the depth-first traversal

Definition at line 2106 of file regex_internal.c.

2111{
2112 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2114 char *new_label;
2115
2116 if (depth == ctx->stride)
2117 {
2119 t->label = GNUNET_strdup (label);
2120 t->to_state = s;
2121 t->from_state = start;
2122 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2123 ctx->transitions_tail,
2124 t);
2125 }
2126 else
2127 {
2128 for (t = s->transitions_head; NULL != t; t = t->next)
2129 {
2130 /* Do not consider self-loops, because it end's up in too many
2131 * transitions */
2132 if (t->to_state == t->from_state)
2133 continue;
2134
2135 if (NULL != label)
2136 {
2137 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2138 }
2139 else
2140 new_label = GNUNET_strdup (t->label);
2141
2143 (depth + 1),
2144 new_label,
2145 start,
2146 t->to_state);
2147 }
2148 }
2150}
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
Context for adding strided transitions to a DFA.

References ctx, dfa_add_multi_strides_helper(), GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, GNUNET_SCHEDULER_Task::next, start, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_add_multi_strides(), and dfa_add_multi_strides_helper().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_add_multi_strides()

static void dfa_add_multi_strides ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Function called for each state in the DFA.

Starts a traversal of depth set in context starting from state 's'.

Parameters
clscontext.
countnot used.
scurrent state.

Definition at line 2162 of file regex_internal.c.

2165{
2166 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2167}

References dfa_add_multi_strides_helper().

Referenced by REGEX_INTERNAL_dfa_add_multi_strides().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_dfa_add_multi_strides()

void REGEX_INTERNAL_dfa_add_multi_strides ( struct REGEX_INTERNAL_Context regex_ctx,
struct REGEX_INTERNAL_Automaton dfa,
const unsigned int  stride_len 
)

Adds multi-strided transitions to the given 'dfa'.

Parameters
regex_ctxregex context needed to add transitions to the automaton.
dfaDFA to which the multi strided transitions should be added.
stride_lenlength of the strides.

Definition at line 2178 of file regex_internal.c.

2181{
2182 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2184 struct REGEX_INTERNAL_Transition *t_next;
2185
2186 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2187 return;
2188
2189 /* Compute the new transitions of given stride_len */
2191 dfa->start,
2192 NULL,
2193 NULL,
2195 &ctx);
2196
2197 /* Add all the new transitions to the automaton. */
2198 for (t = ctx.transitions_head; NULL != t; t = t_next)
2199 {
2200 t_next = t->next;
2201 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2202 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2203 GNUNET_free (t->label);
2204 GNUNET_free (t);
2205 }
2206
2207 /* Mark this automaton as multistrided */
2209}
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.

References ctx, dfa_add_multi_strides(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_YES, REGEX_INTERNAL_Automaton::is_multistrided, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, state_add_transition(), and t.

Here is the call graph for this function:

◆ dfa_compress_paths_helper()

void dfa_compress_paths_helper ( struct REGEX_INTERNAL_Automaton dfa,
struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State cur,
char *  label,
unsigned int  max_len,
struct REGEX_INTERNAL_Transition **  transitions_head,
struct REGEX_INTERNAL_Transition **  transitions_tail 
)

Recursive Helper function for DFA path compression.

Does DFS on the DFA graph and adds new transitions to the given transitions DLL and marks states that should be removed by setting state->contained to GNUNET_YES.

Parameters
dfaDFA for which the paths should be compressed.
startstarting state for linear path search.
curcurrent state in the recursive DFS.
labelcurrent label (string of traversed labels).
max_lenmaximal path compression length.
transitions_headtransitions DLL.
transitions_tailtransitions DLL.

Definition at line 2226 of file regex_internal.c.

2233{
2235 char *new_label;
2236
2237
2238 if ((NULL != label) &&
2239 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2240 cur->accepting) ||
2241 (GNUNET_YES == cur->marked) ) ||
2242 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2243 label))) ||
2244 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2245 label)))))
2246 {
2248 t->label = GNUNET_strdup (label);
2249 t->to_state = cur;
2250 t->from_state = start;
2251 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2252
2253 if (GNUNET_NO == cur->marked)
2254 {
2256 cur,
2257 cur,
2258 NULL,
2259 max_len,
2260 transitions_head,
2261 transitions_tail);
2262 }
2263 return;
2264 }
2265 else if (cur != start)
2266 cur->contained = GNUNET_YES;
2267
2268 if ((GNUNET_YES == cur->marked) && (cur != start))
2269 return;
2270
2271 cur->marked = GNUNET_YES;
2272
2273
2274 for (t = cur->transitions_head; NULL != t; t = t->next)
2275 {
2276 if (NULL != label)
2277 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2278 else
2279 new_label = GNUNET_strdup (t->label);
2280
2281 if (t->to_state != cur)
2282 {
2284 start,
2285 t->to_state,
2286 new_label,
2287 max_len,
2288 transitions_head,
2289 transitions_tail);
2290 }
2291 GNUNET_free (new_label);
2292 }
2293}
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
unsigned int incoming_transition_count
Number of incoming transitions.
int contained
Marking the state as contained.

References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_strdup, GNUNET_YES, REGEX_INTERNAL_State::incoming_transition_count, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, GNUNET_SCHEDULER_Task::next, start, REGEX_INTERNAL_Automaton::start, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_compress_paths(), and dfa_compress_paths_helper().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_compress_paths()

static void dfa_compress_paths ( struct REGEX_INTERNAL_Context regex_ctx,
struct REGEX_INTERNAL_Automaton dfa,
unsigned int  max_len 
)
static

Compress paths in the given 'dfa'.

Linear paths like 0->1->2->3 will be compressed to 0->3 by combining transitions.

Parameters
regex_ctxcontext for adding new transitions.
dfaDFA representation, will directly modify the given DFA.
max_lenmaximal length of the compressed paths.

Definition at line 2305 of file regex_internal.c.

2308{
2309 struct REGEX_INTERNAL_State *s;
2310 struct REGEX_INTERNAL_State *s_next;
2312 struct REGEX_INTERNAL_Transition *t_next;
2313 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2314 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2315
2316 if (NULL == dfa)
2317 return;
2318
2319 /* Count the incoming transitions on each state. */
2320 for (s = dfa->states_head; NULL != s; s = s->next)
2321 {
2322 for (t = s->transitions_head; NULL != t; t = t->next)
2323 {
2324 if (NULL != t->to_state)
2325 t->to_state->incoming_transition_count++;
2326 }
2327 }
2328
2329 /* Unmark all states. */
2330 for (s = dfa->states_head; NULL != s; s = s->next)
2331 {
2332 s->marked = GNUNET_NO;
2333 s->contained = GNUNET_NO;
2334 }
2335
2336 /* Add strides and mark states that can be deleted. */
2338 dfa->start,
2339 dfa->start,
2340 NULL,
2341 max_len,
2342 &transitions_head,
2343 &transitions_tail);
2344
2345 /* Add all the new transitions to the automaton. */
2346 for (t = transitions_head; NULL != t; t = t_next)
2347 {
2348 t_next = t->next;
2349 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2350 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2351 GNUNET_free (t->label);
2352 GNUNET_free (t);
2353 }
2354
2355 /* Remove marked states (including their incoming and outgoing transitions). */
2356 for (s = dfa->states_head; NULL != s; s = s_next)
2357 {
2358 s_next = s->next;
2359 if (GNUNET_YES == s->contained)
2360 automaton_remove_state (dfa, s);
2361 }
2362}

References automaton_remove_state(), REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_State::marked, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_fragment_create()

static struct REGEX_INTERNAL_Automaton * nfa_fragment_create ( struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State end 
)
static

Creates a new NFA fragment.

Needs to be cleared using automaton_fragment_clear.

Parameters
startstarting state
endend state
Returns
new NFA fragment

Definition at line 2375 of file regex_internal.c.

2377{
2378 struct REGEX_INTERNAL_Automaton *n;
2379
2381
2382 n->type = NFA;
2383 n->start = NULL;
2384 n->end = NULL;
2385 n->state_count = 0;
2386
2387 if ((NULL == start) || (NULL == end))
2388 return n;
2389
2392
2393 n->state_count = 2;
2394
2395 n->start = start;
2396 n->end = end;
2397
2398 return n;
2399}
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
@ NFA
Automaton representation.

References automaton_add_state(), end, REGEX_INTERNAL_Automaton::end, GNUNET_new, NFA, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and REGEX_INTERNAL_Automaton::type.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_states()

static void nfa_add_states ( struct REGEX_INTERNAL_Automaton n,
struct REGEX_INTERNAL_State states_head,
struct REGEX_INTERNAL_State states_tail 
)
static

Adds a list of states to the given automaton 'n'.

Parameters
nautomaton to which the states should be added
states_headhead of the DLL of states
states_tailtail of the DLL of states

Definition at line 2410 of file regex_internal.c.

2413{
2414 struct REGEX_INTERNAL_State *s;
2415
2416 if ((NULL == n) || (NULL == states_head))
2417 {
2418 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2419 return;
2420 }
2421
2422 if (NULL == n->states_head)
2423 {
2424 n->states_head = states_head;
2425 n->states_tail = states_tail;
2426 return;
2427 }
2428
2429 if (NULL != states_head)
2430 {
2431 n->states_tail->next = states_head;
2432 n->states_tail = states_tail;
2433 }
2434
2435 for (s = states_head; NULL != s; s = s->next)
2436 n->state_count++;
2437}

References GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().

Here is the caller graph for this function:

◆ nfa_state_create()

static struct REGEX_INTERNAL_State * nfa_state_create ( struct REGEX_INTERNAL_Context ctx,
int  accepting 
)
static

Creates a new NFA state.

Needs to be freed using automaton_destroy_state.

Parameters
ctxcontext
acceptingis it an accepting state or not
Returns
new NFA state

Definition at line 2449 of file regex_internal.c.

2450{
2451 struct REGEX_INTERNAL_State *s;
2452
2453 s = GNUNET_new (struct REGEX_INTERNAL_State);
2454 s->id = ctx->state_id++;
2455 s->accepting = accepting;
2456 s->marked = GNUNET_NO;
2457 s->contained = 0;
2458 s->index = -1;
2459 s->lowlink = -1;
2460 s->scc_id = 0;
2461 s->name = NULL;
2462 GNUNET_asprintf (&s->name, "s%i", s->id);
2463
2464 return s;
2465}
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).

References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, ctx, GNUNET_asprintf(), GNUNET_new, GNUNET_NO, REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::name, and REGEX_INTERNAL_State::scc_id.

Referenced by nfa_add_alternation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_closure_set_create()

static void nfa_closure_set_create ( struct REGEX_INTERNAL_StateSet ret,
struct REGEX_INTERNAL_Automaton nfa,
struct REGEX_INTERNAL_StateSet states,
const char *  label 
)
static

Calculates the closure set for the given set of states.

Parameters
retset to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
nfathe NFA containing 's'
stateslist of states on which to base the closure on
labeltransitioning label for which to base the closure on, pass NULL for epsilon transition

Definition at line 2478 of file regex_internal.c.

2482{
2483 struct REGEX_INTERNAL_State *s;
2484 unsigned int i;
2485 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2486 struct REGEX_INTERNAL_State *clsstate;
2487 struct REGEX_INTERNAL_State *currentstate;
2488 struct REGEX_INTERNAL_Transition *ctran;
2489
2490 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2491 if (NULL == states)
2492 return;
2493
2494 for (i = 0; i < states->off; i++)
2495 {
2496 s = states->states[i];
2497
2498 /* Add start state to closure only for epsilon closure */
2499 if (NULL == label)
2500 state_set_append (ret, s);
2501
2502 /* initialize work stack */
2503 cls_stack.head = NULL;
2504 cls_stack.tail = NULL;
2505 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2506 cls_stack.len = 1;
2507
2508 while (NULL != (currentstate = cls_stack.tail))
2509 {
2511 cls_stack.head,
2512 cls_stack.tail,
2513 currentstate);
2514 cls_stack.len--;
2515 for (ctran = currentstate->transitions_head; NULL != ctran;
2516 ctran = ctran->next)
2517 {
2518 if (NULL == (clsstate = ctran->to_state))
2519 continue;
2520 if (0 != clsstate->contained)
2521 continue;
2522 if (0 != nullstrcmp (label, ctran->label))
2523 continue;
2524 state_set_append (ret, clsstate);
2526 cls_stack.head,
2527 cls_stack.tail,
2528 clsstate);
2529 cls_stack.len++;
2530 clsstate->contained = 1;
2531 }
2532 }
2533 }
2534 for (i = 0; i < ret->off; i++)
2535 ret->states[i]->contained = 0;
2536
2537 if (ret->off > 1)
2538 qsort (ret->states,
2539 ret->off,
2540 sizeof(struct REGEX_INTERNAL_State *),
2541 &state_compare);
2542}
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
Set of states using MDLL API.

References REGEX_INTERNAL_State::contained, GNUNET_CONTAINER_MDLL_insert, GNUNET_CONTAINER_MDLL_insert_tail, GNUNET_CONTAINER_MDLL_remove, REGEX_INTERNAL_StateSet_MDLL::head, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_Transition::next, nullstrcmp(), REGEX_INTERNAL_StateSet::off, ret, state_compare(), state_set_append(), REGEX_INTERNAL_StateSet::states, REGEX_INTERNAL_StateSet_MDLL::tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_concatenation()

static void nfa_add_concatenation ( struct REGEX_INTERNAL_Context ctx)
static

Pops two NFA fragments (a, b) from the stack and concatenates them (ab)

Parameters
ctxcontext

Definition at line 2551 of file regex_internal.c.

2552{
2553 struct REGEX_INTERNAL_Automaton *a;
2554 struct REGEX_INTERNAL_Automaton *b;
2555 struct REGEX_INTERNAL_Automaton *new_nfa;
2556
2557 b = ctx->stack_tail;
2558 GNUNET_assert (NULL != b);
2559 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2560 a = ctx->stack_tail;
2561 GNUNET_assert (NULL != a);
2562 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2563
2564 state_add_transition (ctx, a->end, NULL, b->start);
2565 a->end->accepting = 0;
2566 b->end->accepting = 1;
2567
2568 new_nfa = nfa_fragment_create (NULL, NULL);
2569 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2570 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2571 new_nfa->start = a->start;
2572 new_nfa->end = b->end;
2573 new_nfa->state_count += a->state_count + b->state_count;
2576
2577 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2578}
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_star_op()

static void nfa_add_star_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops a NFA fragment from the stack (a) and adds a new fragment (a*)

Parameters
ctxcontext

Definition at line 2587 of file regex_internal.c.

2588{
2589 struct REGEX_INTERNAL_Automaton *a;
2590 struct REGEX_INTERNAL_Automaton *new_nfa;
2592 struct REGEX_INTERNAL_State *end;
2593
2594 a = ctx->stack_tail;
2595
2596 if (NULL == a)
2597 {
2598 GNUNET_log (
2600 "nfa_add_star_op failed, because there was no element on the stack");
2601 return;
2602 }
2603
2604 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2605
2606 start = nfa_state_create (ctx, 0);
2607 end = nfa_state_create (ctx, 1);
2608
2609 state_add_transition (ctx, start, NULL, a->start);
2611 state_add_transition (ctx, a->end, NULL, a->start);
2612 state_add_transition (ctx, a->end, NULL, end);
2613
2614 a->end->accepting = 0;
2615 end->accepting = 1;
2616
2617 new_nfa = nfa_fragment_create (start, end);
2618 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2620
2621 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2622}
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_plus_op()

static void nfa_add_plus_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops an NFA fragment (a) from the stack and adds a new fragment (a+)

Parameters
ctxcontext

Definition at line 2631 of file regex_internal.c.

2632{
2633 struct REGEX_INTERNAL_Automaton *a;
2634
2635 a = ctx->stack_tail;
2636
2637 if (NULL == a)
2638 {
2639 GNUNET_log (
2641 "nfa_add_plus_op failed, because there was no element on the stack");
2642 return;
2643 }
2644
2645 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2646
2647 state_add_transition (ctx, a->end, NULL, a->start);
2648
2649 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2650}

References ctx, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and state_add_transition().

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_question_op()

static void nfa_add_question_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops an NFA fragment (a) from the stack and adds a new fragment (a?)

Parameters
ctxcontext

Definition at line 2659 of file regex_internal.c.

2660{
2661 struct REGEX_INTERNAL_Automaton *a;
2662 struct REGEX_INTERNAL_Automaton *new_nfa;
2664 struct REGEX_INTERNAL_State *end;
2665
2666 a = ctx->stack_tail;
2667 if (NULL == a)
2668 {
2669 GNUNET_log (
2671 "nfa_add_question_op failed, because there was no element on the stack");
2672 return;
2673 }
2674
2675 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2676
2677 start = nfa_state_create (ctx, 0);
2678 end = nfa_state_create (ctx, 1);
2679
2680 state_add_transition (ctx, start, NULL, a->start);
2682 state_add_transition (ctx, a->end, NULL, end);
2683
2684 a->end->accepting = 0;
2685
2686 new_nfa = nfa_fragment_create (start, end);
2687 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2688 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2690}

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_alternation()

static void nfa_add_alternation ( struct REGEX_INTERNAL_Context ctx)
static

Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b)

Parameters
ctxcontext

Definition at line 2700 of file regex_internal.c.

2701{
2702 struct REGEX_INTERNAL_Automaton *a;
2703 struct REGEX_INTERNAL_Automaton *b;
2704 struct REGEX_INTERNAL_Automaton *new_nfa;
2706 struct REGEX_INTERNAL_State *end;
2707
2708 b = ctx->stack_tail;
2709 GNUNET_assert (NULL != b);
2710 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2711 a = ctx->stack_tail;
2712 GNUNET_assert (NULL != a);
2713 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2714
2715 start = nfa_state_create (ctx, 0);
2716 end = nfa_state_create (ctx, 1);
2717 state_add_transition (ctx, start, NULL, a->start);
2718 state_add_transition (ctx, start, NULL, b->start);
2719
2720 state_add_transition (ctx, a->end, NULL, end);
2721 state_add_transition (ctx, b->end, NULL, end);
2722
2723 a->end->accepting = 0;
2724 b->end->accepting = 0;
2725 end->accepting = 1;
2726
2727 new_nfa = nfa_fragment_create (start, end);
2728 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2729 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2732
2733 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2734}

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_label()

static void nfa_add_label ( struct REGEX_INTERNAL_Context ctx,
const char *  label 
)
static

Adds a new nfa fragment to the stack.

Parameters
ctxcontext
labellabel for nfa transition

Definition at line 2744 of file regex_internal.c.

2745{
2746 struct REGEX_INTERNAL_Automaton *n;
2748 struct REGEX_INTERNAL_State *end;
2749
2750 GNUNET_assert (NULL != ctx);
2751
2752 start = nfa_state_create (ctx, 0);
2753 end = nfa_state_create (ctx, 1);
2754 state_add_transition (ctx, start, label, end);
2756 GNUNET_assert (NULL != n);
2757 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2758}

References ctx, end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, nfa_fragment_create(), nfa_state_create(), start, and state_add_transition().

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_context_init()

static void REGEX_INTERNAL_context_init ( struct REGEX_INTERNAL_Context ctx)
static

Initialize a new context.

Parameters
ctxcontext

Definition at line 2767 of file regex_internal.c.

2768{
2769 if (NULL == ctx)
2770 {
2771 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2772 return;
2773 }
2774 ctx->state_id = 0;
2775 ctx->transition_id = 0;
2776 ctx->stack_head = NULL;
2777 ctx->stack_tail = NULL;
2778}

References ctx, GNUNET_ERROR_TYPE_ERROR, and GNUNET_log.

Referenced by REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().

Here is the caller graph for this function:

◆ REGEX_INTERNAL_construct_nfa()

struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa ( const char *  regex,
const size_t  len 
)

Construct an NFA by parsing the regex string of length 'len'.

Parameters
regexregular expression string
lenlength of the string
Returns
NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton

Definition at line 2790 of file regex_internal.c.

2791{
2793 struct REGEX_INTERNAL_Automaton *nfa;
2794 const char *regexp;
2795 char curlabel[2];
2796 char *error_msg;
2797 unsigned int count;
2798 unsigned int altcount;
2799 unsigned int atomcount;
2800 unsigned int poff;
2801 unsigned int psize;
2802
2803 struct
2804 {
2805 int altcount;
2806 int atomcount;
2807 } *p;
2808
2809 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2810 {
2812 "Could not parse regex. Empty regex string provided.\n");
2813
2814 return NULL;
2815 }
2817
2818 regexp = regex;
2819 curlabel[1] = '\0';
2820 p = NULL;
2821 error_msg = NULL;
2822 altcount = 0;
2823 atomcount = 0;
2824 poff = 0;
2825 psize = 0;
2826
2827 for (count = 0; count < len && *regexp; count++, regexp++)
2828 {
2829 switch (*regexp)
2830 {
2831 case '(':
2832 if (atomcount > 1)
2833 {
2834 --atomcount;
2836 }
2837 if (poff == psize)
2838 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2839 p[poff].altcount = altcount;
2840 p[poff].atomcount = atomcount;
2841 poff++;
2842 altcount = 0;
2843 atomcount = 0;
2844 break;
2845
2846 case '|':
2847 if (0 == atomcount)
2848 {
2849 error_msg = "Cannot append '|' to nothing";
2850 goto error;
2851 }
2852 while (--atomcount > 0)
2854 altcount++;
2855 break;
2856
2857 case ')':
2858 if (0 == poff)
2859 {
2860 error_msg = "Missing opening '('";
2861 goto error;
2862 }
2863 if (0 == atomcount)
2864 {
2865 /* Ignore this: "()" */
2866 poff--;
2867 altcount = p[poff].altcount;
2868 atomcount = p[poff].atomcount;
2869 break;
2870 }
2871 while (--atomcount > 0)
2873 for (; altcount > 0; altcount--)
2875 poff--;
2876 altcount = p[poff].altcount;
2877 atomcount = p[poff].atomcount;
2878 atomcount++;
2879 break;
2880
2881 case '*':
2882 if (atomcount == 0)
2883 {
2884 error_msg = "Cannot append '*' to nothing";
2885 goto error;
2886 }
2888 break;
2889
2890 case '+':
2891 if (atomcount == 0)
2892 {
2893 error_msg = "Cannot append '+' to nothing";
2894 goto error;
2895 }
2897 break;
2898
2899 case '?':
2900 if (atomcount == 0)
2901 {
2902 error_msg = "Cannot append '?' to nothing";
2903 goto error;
2904 }
2906 break;
2907
2908 default:
2909 if (atomcount > 1)
2910 {
2911 --atomcount;
2913 }
2914 curlabel[0] = *regexp;
2915 nfa_add_label (&ctx, curlabel);
2916 atomcount++;
2917 break;
2918 }
2919 }
2920 if (0 != poff)
2921 {
2922 error_msg = "Unbalanced parenthesis";
2923 goto error;
2924 }
2925 while (--atomcount > 0)
2927 for (; altcount > 0; altcount--)
2929
2930 GNUNET_array_grow (p, psize, 0);
2931
2932 nfa = ctx.stack_tail;
2933 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2934
2935 if (NULL != ctx.stack_head)
2936 {
2937 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2938 goto error;
2939 }
2940
2941 /* Remember the regex that was used to generate this NFA */
2942 nfa->regex = GNUNET_strdup (regex);
2943
2944 /* create depth-first numbering of the states for pretty printing */
2946 NULL,
2947 NULL,
2948 NULL,
2950 NULL);
2951
2952 /* No multistriding added so far */
2954
2955 return nfa;
2956
2957error:
2958 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2959 if (NULL != error_msg)
2960 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2961
2962 GNUNET_free (p);
2963
2964 while (NULL != (nfa = ctx.stack_head))
2965 {
2966 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2968 }
2969
2970 return NULL;
2971}
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...

References ctx, GNUNET_array_grow, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_NO, GNUNET_strdup, REGEX_INTERNAL_Automaton::is_multistrided, nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), number_states(), p, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_automaton_traverse(), and REGEX_INTERNAL_context_init().

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ construct_dfa_states()

static void construct_dfa_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton nfa,
struct REGEX_INTERNAL_Automaton dfa,
struct REGEX_INTERNAL_State dfa_state 
)
static

Create DFA states based on given 'nfa' and starting with 'dfa_state'.

Parameters
ctxcontext.
nfaNFA automaton.
dfaDFA automaton.
dfa_statecurrent dfa state, pass epsilon closure of first nfa state for starting.

Definition at line 2984 of file regex_internal.c.

2988{
2989 struct REGEX_INTERNAL_Transition *ctran;
2990 struct REGEX_INTERNAL_State *new_dfa_state;
2991 struct REGEX_INTERNAL_State *state_contains;
2992 struct REGEX_INTERNAL_State *state_iter;
2993 struct REGEX_INTERNAL_StateSet tmp;
2994 struct REGEX_INTERNAL_StateSet nfa_set;
2995
2996 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2997 {
2998 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
2999 continue;
3000
3001 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3002 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3003 state_set_clear (&tmp);
3004
3005 state_contains = NULL;
3006 for (state_iter = dfa->states_head; NULL != state_iter;
3007 state_iter = state_iter->next)
3008 {
3009 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3010 {
3011 state_contains = state_iter;
3012 break;
3013 }
3014 }
3015 if (NULL == state_contains)
3016 {
3017 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3018 automaton_add_state (dfa, new_dfa_state);
3019 ctran->to_state = new_dfa_state;
3020 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3021 }
3022 else
3023 {
3024 ctran->to_state = state_contains;
3025 state_set_clear (&nfa_set);
3026 }
3027 }
3028}
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.

References automaton_add_state(), construct_dfa_states(), ctx, dfa_state_create(), REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, nfa_closure_set_create(), REGEX_INTERNAL_State::nfa_set, state_set_clear(), state_set_compare(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_construct_dfa()

struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa ( const char *  regex,
const size_t  len,
unsigned int  max_path_len 
)

Construct DFA for the given 'regex' of length 'len'.

Path compression means, that for example a DFA o -> a -> b -> c -> o will be compressed to o -> abc -> o. Note that this parameter influences the non-determinism of states of the resulting NFA in the DHT (number of outgoing edges with the same label). For example for an application that stores IPv4 addresses as bitstrings it could make sense to limit the path compression to 4 or 8.

Parameters
regexregular expression string.
lenlength of the regular expression.
max_path_lenlimit the path compression length to the given value. If set to 1, no path compression is applied. Set to 0 for maximal possible path compression (generally not desirable).
Returns
DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy().

Definition at line 3032 of file regex_internal.c.

3035{
3037 struct REGEX_INTERNAL_Automaton *dfa;
3038 struct REGEX_INTERNAL_Automaton *nfa;
3039 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3040 struct REGEX_INTERNAL_StateSet singleton_set;
3041
3043
3044 /* Create NFA */
3045 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3046
3047 if (NULL == nfa)
3048 {
3050 "Could not create DFA, because NFA creation failed\n");
3051 return NULL;
3052 }
3053
3054 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3055 dfa->type = DFA;
3056 dfa->regex = GNUNET_strdup (regex);
3057
3058 /* Create DFA start state from epsilon closure */
3059 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3060 state_set_append (&singleton_set, nfa->start);
3061 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3062 state_set_clear (&singleton_set);
3063 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3064 automaton_add_state (dfa, dfa->start);
3065
3066 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3068
3069 /* Minimize DFA */
3070 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3071 {
3073 return NULL;
3074 }
3075
3076 /* Create proofs and hashes for all states */
3078 {
3080 return NULL;
3081 }
3082
3083 /* Compress linear DFA paths */
3084 if (1 != max_path_len)
3085 dfa_compress_paths (&ctx, dfa, max_path_len);
3086
3087 return dfa;
3088}
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.

References automaton_add_state(), automaton_create_proofs(), construct_dfa_states(), ctx, DFA, dfa_compress_paths(), dfa_minimize(), dfa_state_create(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_OK, GNUNET_strdup, nfa_closure_set_create(), REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_context_init(), REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), and REGEX_INTERNAL_Automaton::type.

Referenced by main(), and REGEX_INTERNAL_announce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_automaton_destroy()

void REGEX_INTERNAL_automaton_destroy ( struct REGEX_INTERNAL_Automaton a)

Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.

data structure.

Parameters
aautomaton to be destroyed.

Definition at line 3092 of file regex_internal.c.

3093{
3094 struct REGEX_INTERNAL_State *s;
3095 struct REGEX_INTERNAL_State *next_state;
3096
3097 if (NULL == a)
3098 return;
3099
3100 GNUNET_free (a->regex);
3102
3103 for (s = a->states_head; NULL != s; s = next_state)
3104 {
3105 next_state = s->next;
3108 }
3109
3110 GNUNET_free (a);
3111}

References automaton_destroy_state(), REGEX_INTERNAL_Automaton::canonical_regex, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by main(), REGEX_INTERNAL_announce_cancel(), REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ evaluate_dfa()

static int evaluate_dfa ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)
static

Evaluates the given string using the given DFA automaton.

Parameters
aautomaton, type must be DFA
stringstring that should be evaluated
Returns
0 if string matches, non-0 otherwise

Definition at line 3123 of file regex_internal.c.

3124{
3125 const char *strp;
3126 struct REGEX_INTERNAL_State *s;
3127 unsigned int step_len;
3128
3129 if (DFA != a->type)
3130 {
3132 "Tried to evaluate DFA, but NFA automaton given");
3133 return -1;
3134 }
3135
3136 s = a->start;
3137
3138 /* If the string is empty but the starting state is accepting, we accept. */
3139 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3140 return 0;
3141
3142 for (strp = string; NULL != strp && *strp; strp += step_len)
3143 {
3144 step_len = dfa_move (&s, strp);
3145
3146 if (NULL == s)
3147 break;
3148 }
3149
3150 if ((NULL != s) && s->accepting)
3151 return 0;
3152
3153 return 1;
3154}
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.

References REGEX_INTERNAL_State::accepting, DFA, dfa_move(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_eval().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ evaluate_nfa()

static int evaluate_nfa ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)
static

Evaluates the given string using the given NFA automaton.

Parameters
aautomaton, type must be NFA
stringstring that should be evaluated
Returns
0 if string matches, non-0 otherwise

Definition at line 3165 of file regex_internal.c.

3166{
3167 const char *strp;
3168 char str[2];
3169 struct REGEX_INTERNAL_State *s;
3170 struct REGEX_INTERNAL_StateSet sset;
3171 struct REGEX_INTERNAL_StateSet new_sset;
3172 struct REGEX_INTERNAL_StateSet singleton_set;
3173 unsigned int i;
3174 int result;
3175
3176 if (NFA != a->type)
3177 {
3179 "Tried to evaluate NFA, but DFA automaton given");
3180 return -1;
3181 }
3182
3183 /* If the string is empty but the starting state is accepting, we accept. */
3184 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3185 return 0;
3186
3187 result = 1;
3188 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3189 state_set_append (&singleton_set, a->start);
3190 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3191 state_set_clear (&singleton_set);
3192
3193 str[1] = '\0';
3194 for (strp = string; NULL != strp && *strp; strp++)
3195 {
3196 str[0] = *strp;
3197 nfa_closure_set_create (&new_sset, a, &sset, str);
3198 state_set_clear (&sset);
3199 nfa_closure_set_create (&sset, a, &new_sset, 0);
3200 state_set_clear (&new_sset);
3201 }
3202
3203 for (i = 0; i < sset.off; i++)
3204 {
3205 s = sset.states[i];
3206 if ((NULL != s) && (s->accepting))
3207 {
3208 result = 0;
3209 break;
3210 }
3211 }
3212
3213 state_set_clear (&sset);
3214 return result;
3215}

References REGEX_INTERNAL_State::accepting, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, NFA, nfa_closure_set_create(), REGEX_INTERNAL_StateSet::off, result, REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_eval().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_eval()

int REGEX_INTERNAL_eval ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)

Evaluates the given 'string' against the given compiled regex.

Parameters
aautomaton.
stringstring to check.
Returns
0 if string matches, non 0 otherwise.

Definition at line 3219 of file regex_internal.c.

3220{
3221 int result;
3222
3223 switch (a->type)
3224 {
3225 case DFA:
3226 result = evaluate_dfa (a, string);
3227 break;
3228
3229 case NFA:
3230 result = evaluate_nfa (a, string);
3231 break;
3232
3233 default:
3235 "Evaluating regex failed, automaton has no type!\n");
3237 break;
3238 }
3239
3240 return result;
3241}
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.

References DFA, evaluate_dfa(), evaluate_nfa(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_SYSERR, NFA, result, and REGEX_INTERNAL_Automaton::type.

Here is the call graph for this function:

◆ REGEX_INTERNAL_get_canonical_regex()

const char * REGEX_INTERNAL_get_canonical_regex ( struct REGEX_INTERNAL_Automaton a)

Get the canonical regex of the given automaton.

When constructing the automaton a proof is computed for each state, consisting of the regular expression leading to this state. A complete regex for the automaton can be computed by combining these proofs. As of now this function is only useful for testing.

Parameters
aautomaton for which the canonical regex should be returned.
Returns
canonical regex string.

Definition at line 3245 of file regex_internal.c.

3246{
3247 if (NULL == a)
3248 return NULL;
3249
3250 return a->canonical_regex;
3251}

References REGEX_INTERNAL_Automaton::canonical_regex.

◆ REGEX_INTERNAL_get_transition_count()

unsigned int REGEX_INTERNAL_get_transition_count ( struct REGEX_INTERNAL_Automaton a)

Get the number of transitions that are contained in the given automaton.

Parameters
aautomaton for which the number of transitions should be returned.
Returns
number of transitions in the given automaton.

Definition at line 3262 of file regex_internal.c.

3263{
3264 unsigned int t_count;
3265 struct REGEX_INTERNAL_State *s;
3266
3267 if (NULL == a)
3268 return 0;
3269
3270 t_count = 0;
3271 for (s = a->states_head; NULL != s; s = s->next)
3272 t_count += s->transition_count;
3273
3274 return t_count;
3275}

References REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::transition_count.

◆ REGEX_INTERNAL_get_first_key()

size_t REGEX_INTERNAL_get_first_key ( const char *  input_string,
size_t  string_len,
struct GNUNET_HashCode key 
)

Get the first key for the given input_string.

This hashes the first x bits of the input_string.

Parameters
input_stringstring.
string_lenlength of the input_string.
keypointer to where to write the hash code.
Returns
number of bits of input_string that have been consumed to construct the key

Definition at line 3289 of file regex_internal.c.

3292{
3293 size_t size;
3294
3295 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3297 if (NULL == input_string)
3298 {
3299 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3300 return 0;
3301 }
3302 GNUNET_CRYPTO_hash (input_string, size, key);
3303
3304 return size;
3305}
struct GNUNET_HashCode key
The key used in the DHT.
static unsigned int size
Size of the "table".
Definition: peer.c:68

References GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_REGEX_INITIAL_BYTES, key, and size.

Referenced by REGEX_INTERNAL_search().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ iterate_initial_edge()

static void iterate_initial_edge ( unsigned int  min_len,
unsigned int  max_len,
char *  consumed_string,
struct REGEX_INTERNAL_State state,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)
static

Recursive function that calls the iterator for each synthetic start state.

Parameters
min_lenminimum length of the path in the graph.
max_lenmaximum length of the path in the graph.
consumed_stringstring consumed by traversing the graph till this state.
statecurrent state of the automaton.
iteratoriterator function called for each edge.
iterator_clsclosure for the iterator function.

Definition at line 3319 of file regex_internal.c.

3325{
3326 char *temp;
3328 unsigned int num_edges = state->transition_count;
3329 struct REGEX_BLOCK_Edge edges[num_edges];
3330 struct REGEX_BLOCK_Edge edge[1];
3331 struct GNUNET_HashCode hash;
3332 struct GNUNET_HashCode hash_new;
3333 unsigned int cur_len;
3334
3335 if (NULL != consumed_string)
3336 cur_len = strlen (consumed_string);
3337 else
3338 cur_len = 0;
3339
3340 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3341 (cur_len > 0) && (NULL != consumed_string))
3342 {
3343 if (cur_len <= max_len)
3344 {
3345 if ((NULL != state->proof) &&
3346 (0 != strcmp (consumed_string, state->proof)))
3347 {
3348 (void) state_get_edges (state, edges);
3349 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3351 "Start state for string `%s' is %s\n",
3352 consumed_string,
3353 GNUNET_h2s (&hash));
3354 iterator (iterator_cls,
3355 &hash,
3356 consumed_string,
3357 state->accepting,
3358 num_edges,
3359 edges);
3360 }
3361
3362 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3363 (state->transition_count < 1) && (cur_len < max_len))
3364 {
3365 /* Special case for regex consisting of just a string that is shorter than
3366 * max_len */
3367 edge[0].label = &consumed_string[cur_len - 1];
3368 edge[0].destination = state->hash;
3369 temp = GNUNET_strdup (consumed_string);
3370 temp[cur_len - 1] = '\0';
3371 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3373 "Start state for short string `%s' is %s\n",
3374 temp,
3375 GNUNET_h2s (&hash_new));
3376 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3377 GNUNET_free (temp);
3378 }
3379 }
3380 else /* cur_len > max_len */
3381 {
3382 /* Case where the concatenated labels are longer than max_len, then split. */
3383 edge[0].label = &consumed_string[max_len];
3384 edge[0].destination = state->hash;
3385 temp = GNUNET_strdup (consumed_string);
3386 temp[max_len] = '\0';
3387 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3389 "Start state at split edge `%s'-`%s` is %s\n",
3390 temp,
3391 edge[0].label,
3392 GNUNET_h2s (&hash_new));
3393 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3394 GNUNET_free (temp);
3395 }
3396 }
3397
3398 if (cur_len < max_len)
3399 {
3400 for (t = state->transitions_head; NULL != t; t = t->next)
3401 {
3402 if (NULL != strchr (t->label, (int) '.'))
3403 {
3404 /* Wildcards not allowed during starting states */
3405 GNUNET_break (0);
3406 continue;
3407 }
3408 if (NULL != consumed_string)
3409 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3410 else
3411 GNUNET_asprintf (&temp, "%s", t->label);
3412 iterate_initial_edge (min_len,
3413 max_len,
3414 temp,
3415 t->to_state,
3416 iterator,
3417 iterator_cls);
3418 GNUNET_free (temp);
3419 }
3420 }
3421}
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
@ GNUNET_ERROR_TYPE_DEBUG
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
A 512-bit hashcode.
Edge representation.

References REGEX_BLOCK_Edge::destination, GNUNET_asprintf(), GNUNET_break, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_DEBUG, GNUNET_free, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_strdup, GNUNET_YES, iterate_initial_edge(), REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, state, state_get_edges(), and t.

Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_iterate_all_edges()

void REGEX_INTERNAL_iterate_all_edges ( struct REGEX_INTERNAL_Automaton a,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)

Iterate over all edges starting from start state of automaton 'a'.

Calling iterator for each edge.

Parameters
aautomaton.
iteratoriterator called for each edge.
iterator_clsclosure.

Definition at line 3433 of file regex_internal.c.

3436{
3437 struct REGEX_INTERNAL_State *s;
3438
3439 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3442 NULL,
3443 a->start,
3444 iterator,
3445 iterator_cls);
3446 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3447 for (s = a->states_head; NULL != s; s = s->next)
3448 {
3449 struct REGEX_BLOCK_Edge edges[s->transition_count];
3450 unsigned int num_edges;
3451
3452 num_edges = state_get_edges (s, edges);
3453 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3454 {
3456 "Creating DFA edges at `%s' under key %s\n",
3457 s->proof,
3458 GNUNET_h2s (&s->hash));
3459 iterator (iterator_cls,
3460 &s->hash,
3461 s->proof,
3462 s->accepting,
3463 num_edges,
3464 edges);
3465 }
3466 s->marked = GNUNET_NO;
3467 }
3468}
struct GNUNET_HashCode hash
Hash of the state.

References GNUNET_ERROR_TYPE_DEBUG, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, iterate_initial_edge(), REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_get_edges(), and REGEX_INTERNAL_Automaton::states_head.

Referenced by main(), and REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ store_all_states()

static void store_all_states ( void *  cls,
const struct GNUNET_HashCode key,
const char *  proof,
int  accepting,
unsigned int  num_edges,
const struct REGEX_BLOCK_Edge edges 
)
static

Iterator over all edges of a dfa.

Stores all of them in a HashMap for later reachability marking.

Parameters
clsClosure (HashMap)
keyhash for current state.
proofproof for current state
acceptingGNUNET_YES if this is an accepting state, GNUNET_NO if not.
num_edgesnumber of edges leaving current state.
edgesedges leaving current state.

Definition at line 3509 of file regex_internal.c.

3515{
3516 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3517 struct temporal_state_store *tmp;
3518 size_t edges_size;
3519
3520 tmp = GNUNET_new (struct temporal_state_store);
3521 tmp->reachable = GNUNET_NO;
3522 tmp->proof = GNUNET_strdup (proof);
3523 tmp->accepting = accepting;
3524 tmp->num_edges = num_edges;
3525 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3526 tmp->edges = GNUNET_malloc (edges_size);
3527 GNUNET_memcpy (tmp->edges, edges, edges_size);
3530 hm,
3531 key,
3532 tmp,
3534}
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
Internal representation of the hash map.
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges

References temporal_state_store::accepting, temporal_state_store::edges, GNUNET_assert, GNUNET_CONTAINER_multihashmap_put(), GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST, GNUNET_malloc, GNUNET_memcpy, GNUNET_new, GNUNET_NO, GNUNET_strdup, GNUNET_YES, key, temporal_state_store::num_edges, proof, temporal_state_store::proof, and temporal_state_store::reachable.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ mark_as_reachable()

static void mark_as_reachable ( struct temporal_state_store state,
struct GNUNET_CONTAINER_MultiHashMap hm 
)
static

Mark state as reachable and call recursively on all its edges.

If already marked as reachable, do nothing.

Parameters
stateState to mark as reachable.
hmHashMap which stores all the states indexed by key.

Definition at line 3546 of file regex_internal.c.

3548{
3550 unsigned int i;
3551
3552 if (GNUNET_YES == state->reachable)
3553 /* visited */
3554 return;
3555
3556 state->reachable = GNUNET_YES;
3557 for (i = 0; i < state->num_edges; i++)
3558 {
3559 child =
3560 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3561 if (NULL == child)
3562 {
3563 GNUNET_break (0);
3564 continue;
3565 }
3567 }
3568}
static pid_t child
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.

References child, GNUNET_break, GNUNET_CONTAINER_multihashmap_get(), GNUNET_YES, mark_as_reachable(), and state.

Referenced by mark_as_reachable(), and reachability_iterator().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reachability_iterator()

static int reachability_iterator ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Iterator over hash map entries to mark the ones that are reachable.

Parameters
clsclosure
keycurrent key code
valuevalue in the hash map
Returns
GNUNET_YES if we should continue to iterate, GNUNET_NO if not.

Definition at line 3581 of file regex_internal.c.

3584{
3585 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3587
3588 if (GNUNET_YES == state->reachable)
3589 /* already visited and marked */
3590 return GNUNET_YES;
3591
3592 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3593 (GNUNET_NO == state->accepting) )
3594 /* not directly reachable */
3595 return GNUNET_YES;
3596
3598 return GNUNET_YES;
3599}
static char * value
Value of the record to add/remove.

References GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_YES, mark_as_reachable(), state, and value.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ iterate_reachables()

static int iterate_reachables ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Iterator over hash map entries.

Calling the callback on the ones marked as reachables.

Parameters
clsclosure
keycurrent key code
valuevalue in the hash map
Returns
GNUNET_YES if we should continue to iterate, GNUNET_NO if not.

Definition at line 3613 of file regex_internal.c.

3614{
3615 struct client_iterator *ci = cls;
3617
3618 if (GNUNET_YES == state->reachable)
3619 {
3620 ci->iterator (ci->iterator_cls,
3621 key,
3622 state->proof,
3623 state->accepting,
3624 state->num_edges,
3625 state->edges);
3626 }
3627 GNUNET_free (state->edges);
3628 GNUNET_free (state->proof);
3630 return GNUNET_YES;
3631}
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator

References GNUNET_free, GNUNET_YES, client_iterator::iterator, client_iterator::iterator_cls, key, state, and value.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the caller graph for this function:

◆ REGEX_INTERNAL_iterate_reachable_edges()

void REGEX_INTERNAL_iterate_reachable_edges ( struct REGEX_INTERNAL_Automaton a,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)

Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.

Call the iterator for each such edge.

Parameters
aautomaton.
iteratoriterator called for each reachable edge.
iterator_clsclosure.

Definition at line 3635 of file regex_internal.c.

3638{
3640 struct client_iterator ci;
3641
3643 ci.iterator = iterator;
3644 ci.iterator_cls = iterator_cls;
3645
3649
3651}
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.

References GNUNET_CONTAINER_multihashmap_create(), GNUNET_CONTAINER_multihashmap_destroy(), GNUNET_CONTAINER_multihashmap_iterate(), GNUNET_NO, iterate_reachables(), client_iterator::iterator, client_iterator::iterator_cls, reachability_iterator(), REGEX_INTERNAL_iterate_all_edges(), REGEX_INTERNAL_Automaton::state_count, and store_all_states().

Referenced by main(), and REGEX_INTERNAL_reannounce().

Here is the call graph for this function:
Here is the caller graph for this function: