GNUnet  0.19.5
regex_internal.c File Reference

library to create Deterministic Finite Automatons (DFAs) from regular expressions (regexes). More...

#include "platform.h"
#include "gnunet_util_lib.h"
#include "gnunet_regex_service.h"
#include "regex_internal_lib.h"
#include "regex_internal.h"
Include dependency graph for regex_internal.c:

Go to the source code of this file.

Data Structures

struct  REGEX_INTERNAL_StateSet_MDLL
 Set of states using MDLL API. More...
 
struct  StringBuffer
 String container for faster string operations. More...
 
struct  REGEX_INTERNAL_Strided_Context
 Context for adding strided transitions to a DFA. More...
 
struct  temporal_state_store
 Struct to hold all the relevant state information in the HashMap. More...
 
struct  client_iterator
 Store regex iterator and cls in one place to pass to the hashmap iterator. More...
 

Macros

#define REGEX_DEBUG_DFA   GNUNET_NO
 Set this to GNUNET_YES to enable state naming. More...
 
#define PRIS(a)
 

Functions

static void state_set_append (struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
 Append state to the given StateSet. More...
 
static int nullstrcmp (const char *str1, const char *str2)
 Compare two strings for equality. More...
 
static void state_add_transition (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
 Adds a transition from one state to another on label. More...
 
static void state_remove_transition (struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
 Remove a 'transition' from 'state'. More...
 
static int state_compare (const void *a, const void *b)
 Compare two states. More...
 
static unsigned int state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
 Get all edges leaving state s. More...
 
static int state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
 Compare to state sets by comparing the id's of the states that are contained in each set. More...
 
static void state_set_clear (struct REGEX_INTERNAL_StateSet *set)
 Clears the given StateSet 'set'. More...
 
static void automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a)
 Clears an automaton fragment. More...
 
static void automaton_destroy_state (struct REGEX_INTERNAL_State *s)
 Frees the memory used by State s. More...
 
static void automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
 Remove a state from the given automaton 'a'. More...
 
static void automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
 Merge two states into one. More...
 
static void automaton_add_state (struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
 Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton. More...
 
static void automaton_state_traverse (struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
 Depth-first traversal (DFS) of all states that are reachable from state 's'. More...
 
void REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
 Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them. More...
 
static int sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
 Compare two strings for equality. More...
 
static int sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
 Compare two strings for equality. More...
 
static void sb_realloc (struct StringBuffer *ret, size_t nlen)
 Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer. More...
 
static void sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
 Append a string. More...
 
static void sb_append_cstr (struct StringBuffer *ret, const char *cstr)
 Append a C string. More...
 
static void sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
 Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'. More...
 
static void sb_printf1 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
 Format a string buffer. More...
 
static void sb_printf2 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
 Format a string buffer. More...
 
static void sb_printf3 (struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
 Format a string buffer. More...
 
static void sb_free (struct StringBuffer *sb)
 Free resources of the given string buffer. More...
 
static void sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
 Copy the given string buffer from 'in' to 'out'. More...
 
static void sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
 Copy the given string buffer from 'in' to 'out'. More...
 
static int needs_parentheses (const struct StringBuffer *str)
 Check if the given string str needs parentheses around it when using it to generate a regex. More...
 
static void remove_parentheses (struct StringBuffer *str)
 Remove parentheses surrounding string str. More...
 
static int has_epsilon (const struct StringBuffer *str)
 Check if the string 'str' starts with an epsilon (empty string). More...
 
static void remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
 Remove an epsilon from the string str. More...
 
static int sb_strncmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
 Compare n bytes of 'str1' and 'str2'. More...
 
static int sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
 Compare n bytes of 'str1' and 'str2'. More...
 
static void sb_init (struct StringBuffer *sb, size_t n)
 Initialize string buffer for storing strings of up to n characters. More...
 
static int sb_strkcmp (const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
 Compare 'str1', starting from position 'k', with whole 'str2'. More...
 
static void number_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states. More...
 
static void automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
 Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. More...
 
static int automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a)
 Create proofs for all states in the given automaton. More...
 
static struct REGEX_INTERNAL_Statedfa_state_create (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
 Creates a new DFA state based on a set of NFA states. More...
 
static unsigned int dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
 Move from the given state 's' to the next state on transition 'str'. More...
 
static void mark_states (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Set the given state 'marked' to GNUNET_YES. More...
 
static void dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a)
 Remove all unreachable states from DFA 'a'. More...
 
static void dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a)
 Remove all dead states from the DFA 'a'. More...
 
static int dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
 Merge all non distinguishable states in the DFA 'a'. More...
 
static int dfa_minimize (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
 Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states. More...
 
static void dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
 Recursive helper function to add strides to a DFA. More...
 
static void dfa_add_multi_strides (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Function called for each state in the DFA. More...
 
void REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
 Adds multi-strided transitions to the given 'dfa'. More...
 
void dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
 Recursive Helper function for DFA path compression. More...
 
static void dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
 Compress paths in the given 'dfa'. More...
 
static struct REGEX_INTERNAL_Automatonnfa_fragment_create (struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
 Creates a new NFA fragment. More...
 
static void nfa_add_states (struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
 Adds a list of states to the given automaton 'n'. More...
 
static struct REGEX_INTERNAL_Statenfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting)
 Creates a new NFA state. More...
 
static void nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
 Calculates the closure set for the given set of states. More...
 
static void nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx)
 Pops two NFA fragments (a, b) from the stack and concatenates them (ab) More...
 
static void nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx)
 Pops a NFA fragment from the stack (a) and adds a new fragment (a*) More...
 
static void nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx)
 Pops an NFA fragment (a) from the stack and adds a new fragment (a+) More...
 
static void nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx)
 Pops an NFA fragment (a) from the stack and adds a new fragment (a?) More...
 
static void nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx)
 Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b) More...
 
static void nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
 Adds a new nfa fragment to the stack. More...
 
static void REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx)
 Initialize a new context. More...
 
struct REGEX_INTERNAL_AutomatonREGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
 Construct an NFA by parsing the regex string of length 'len'. More...
 
static void construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
 Create DFA states based on given 'nfa' and starting with 'dfa_state'. More...
 
struct REGEX_INTERNAL_AutomatonREGEX_INTERNAL_construct_dfa (const char *regex, const size_t len, unsigned int max_path_len)
 Construct DFA for the given 'regex' of length 'len'. More...
 
void REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a)
 Free the memory allocated by constructing the REGEX_INTERNAL_Automaton. More...
 
static int evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given string using the given DFA automaton. More...
 
static int evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given string using the given NFA automaton. More...
 
int REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
 Evaluates the given 'string' against the given compiled regex. More...
 
const char * REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
 Get the canonical regex of the given automaton. More...
 
unsigned int REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
 Get the number of transitions that are contained in the given automaton. More...
 
size_t REGEX_INTERNAL_get_first_key (const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
 Get the first key for the given input_string. More...
 
static void iterate_initial_edge (unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Recursive function that calls the iterator for each synthetic start state. More...
 
void REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Iterate over all edges starting from start state of automaton 'a'. More...
 
static void store_all_states (void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
 Iterator over all edges of a dfa. More...
 
static void mark_as_reachable (struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
 Mark state as reachable and call recursively on all its edges. More...
 
static int reachability_iterator (void *cls, const struct GNUNET_HashCode *key, void *value)
 Iterator over hash map entries to mark the ones that are reachable. More...
 
static int iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
 Iterator over hash map entries. More...
 
void REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
 Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. More...
 

Detailed Description

library to create Deterministic Finite Automatons (DFAs) from regular expressions (regexes).

Author
Maximilian Szengel

Definition in file regex_internal.c.

Macro Definition Documentation

◆ REGEX_DEBUG_DFA

#define REGEX_DEBUG_DFA   GNUNET_NO

Set this to GNUNET_YES to enable state naming.

Used to debug NFA->DFA creation. Disabled by default for better performance.

Definition at line 37 of file regex_internal.c.

◆ PRIS

#define PRIS (   a)
Value:
((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
@ GNUNET_YES

Definition at line 1138 of file regex_internal.c.

Function Documentation

◆ state_set_append()

static void state_set_append ( struct REGEX_INTERNAL_StateSet set,
struct REGEX_INTERNAL_State state 
)
static

Append state to the given StateSet.

Parameters
setset to be modified
statestate to be appended

Definition at line 68 of file regex_internal.c.

70 {
71  if (set->off == set->size)
72  GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73  set->states[set->off++] = state;
74 }
enum State state
current state of profiling
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.

References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, state, and REGEX_INTERNAL_StateSet::states.

Referenced by evaluate_nfa(), nfa_closure_set_create(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ nullstrcmp()

static int nullstrcmp ( const char *  str1,
const char *  str2 
)
static

Compare two strings for equality.

If either is NULL they are not equal.

Parameters
str1first string for comparison.
str2second string for comparison.
Returns
0 if the strings are the same or both NULL, 1 or -1 if not.

Definition at line 86 of file regex_internal.c.

87 {
88  if ((NULL == str1) != (NULL == str2))
89  return -1;
90  if ((NULL == str1) && (NULL == str2))
91  return 0;
92 
93  return strcmp (str1, str2);
94 }

Referenced by nfa_closure_set_create(), and state_add_transition().

Here is the caller graph for this function:

◆ state_add_transition()

static void state_add_transition ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_State from_state,
const char *  label,
struct REGEX_INTERNAL_State to_state 
)
static

Adds a transition from one state to another on label.

Does not add duplicate states.

Parameters
ctxcontext
from_statestarting state for the transition
labeltransition label
to_statestate to where the transition should point to

Definition at line 107 of file regex_internal.c.

111 {
113  struct REGEX_INTERNAL_Transition *oth;
114 
115  if (NULL == from_state)
116  {
117  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118  return;
119  }
120 
121  /* Do not add duplicate state transitions */
122  for (t = from_state->transitions_head; NULL != t; t = t->next)
123  {
124  if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125  (t->from_state == from_state) )
126  return;
127  }
128 
129  /* sort transitions by label */
130  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131  {
132  if (0 < nullstrcmp (oth->label, label))
133  break;
134  }
135 
137  if (NULL != ctx)
138  t->id = ctx->transition_id++;
139  if (NULL != label)
140  t->label = GNUNET_strdup (label);
141  else
142  t->label = NULL;
143  t->to_state = to_state;
144  t->from_state = from_state;
145 
146  /* Add outgoing transition to 'from_state' */
150  oth,
151  t);
152 }
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static struct GNUNET_SCHEDULER_Task * t
Main task.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_log(kind,...)
@ GNUNET_ERROR_TYPE_ERROR
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
unsigned int transition_count
Number of transitions from this state to other states.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.

References ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_insert_before, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, GNUNET_SCHEDULER_Task::next, nullstrcmp(), t, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::transitions_tail.

Referenced by automaton_merge_states(), dfa_compress_paths(), dfa_state_create(), nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), and REGEX_INTERNAL_dfa_add_multi_strides().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ state_remove_transition()

static void state_remove_transition ( struct REGEX_INTERNAL_State state,
struct REGEX_INTERNAL_Transition transition 
)
static

Remove a 'transition' from 'state'.

Parameters
statestate from which the to-be-removed transition originates.
transitiontransition that should be removed from state 'state'.

Definition at line 162 of file regex_internal.c.

164 {
165  if ((NULL == state) || (NULL == transition))
166  return;
167 
168  if (transition->from_state != state)
169  return;
170 
171  GNUNET_free (transition->label);
172 
173  state->transition_count--;
174  GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175  state->transitions_tail,
176  transition);
177 
178  GNUNET_free (transition);
179 }
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_free(ptr)
Wrapper around free.

References REGEX_INTERNAL_Transition::from_state, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_Transition::label, and state.

Referenced by automaton_destroy_state(), automaton_merge_states(), and automaton_remove_state().

Here is the caller graph for this function:

◆ state_compare()

static int state_compare ( const void *  a,
const void *  b 
)
static

Compare two states.

Used for sorting.

Parameters
afirst state
bsecond state
Returns
an integer less than, equal to, or greater than zero if the first argument is considered to be respectively less than, equal to, or greater than the second.

Definition at line 193 of file regex_internal.c.

194 {
195  struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196  struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197 
198  return (*s1)->id - (*s2)->id;
199 }
unsigned int id
Unique state id.

References REGEX_INTERNAL_State::id.

Referenced by nfa_closure_set_create(), and state_set_compare().

Here is the caller graph for this function:

◆ state_get_edges()

static unsigned int state_get_edges ( struct REGEX_INTERNAL_State s,
struct REGEX_BLOCK_Edge edges 
)
static

Get all edges leaving state s.

Parameters
sstate.
edgesall edges leaving s, expected to be allocated and have enough space for s->transitions_count elements.
Returns
number of edges.

Definition at line 212 of file regex_internal.c.

213 {
215  unsigned int count;
216 
217  if (NULL == s)
218  return 0;
219 
220  count = 0;
221 
222  for (t = s->transitions_head; NULL != t; t = t->next)
223  {
224  if (NULL != t->to_state)
225  {
226  edges[count].label = t->label;
227  edges[count].destination = t->to_state->hash;
228  count++;
229  }
230  }
231  return count;
232 }
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.

References REGEX_BLOCK_Edge::destination, REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by iterate_initial_edge(), and REGEX_INTERNAL_iterate_all_edges().

Here is the caller graph for this function:

◆ state_set_compare()

static int state_set_compare ( struct REGEX_INTERNAL_StateSet sset1,
struct REGEX_INTERNAL_StateSet sset2 
)
static

Compare to state sets by comparing the id's of the states that are contained in each set.

Both sets are expected to be sorted by id!

Parameters
sset1first state set
sset2second state set
Returns
0 if the sets are equal, otherwise non-zero

Definition at line 244 of file regex_internal.c.

246 {
247  int result;
248  unsigned int i;
249 
250  if ((NULL == sset1) || (NULL == sset2))
251  return 1;
252 
253  result = sset1->off - sset2->off;
254  if (result < 0)
255  return -1;
256  if (result > 0)
257  return 1;
258  for (i = 0; i < sset1->off; i++)
259  if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260  break;
261  return result;
262 }
static int result
Global testing status.
static int state_compare(const void *a, const void *b)
Compare two states.

References REGEX_INTERNAL_StateSet::off, result, state_compare(), and REGEX_INTERNAL_StateSet::states.

Referenced by construct_dfa_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ state_set_clear()

static void state_set_clear ( struct REGEX_INTERNAL_StateSet set)
static

Clears the given StateSet 'set'.

Parameters
setset to be cleared

Definition at line 271 of file regex_internal.c.

272 {
273  GNUNET_array_grow (set->states, set->size, 0);
274  set->off = 0;
275 }

References GNUNET_array_grow, REGEX_INTERNAL_StateSet::off, REGEX_INTERNAL_StateSet::size, and REGEX_INTERNAL_StateSet::states.

Referenced by automaton_destroy_state(), construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ automaton_fragment_clear()

static void automaton_fragment_clear ( struct REGEX_INTERNAL_Automaton a)
static

Clears an automaton fragment.

Does not destroy the states inside the automaton.

Parameters
aautomaton to be cleared

Definition at line 285 of file regex_internal.c.

286 {
287  if (NULL == a)
288  return;
289 
290  a->start = NULL;
291  a->end = NULL;
292  a->states_head = NULL;
293  a->states_tail = NULL;
294  a->state_count = 0;
295  GNUNET_free (a);
296 }
struct REGEX_INTERNAL_State * start
First state of the automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.

References REGEX_INTERNAL_Automaton::end, GNUNET_free, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().

Here is the caller graph for this function:

◆ automaton_destroy_state()

static void automaton_destroy_state ( struct REGEX_INTERNAL_State s)
static

Frees the memory used by State s.

Parameters
sstate that should be destroyed

Definition at line 305 of file regex_internal.c.

306 {
308  struct REGEX_INTERNAL_Transition *next_t;
309 
310  if (NULL == s)
311  return;
312 
313  GNUNET_free (s->name);
314  GNUNET_free (s->proof);
315  state_set_clear (&s->nfa_set);
316  for (t = s->transitions_head; NULL != t; t = next_t)
317  {
318  next_t = t->next;
320  }
321 
322  GNUNET_free (s);
323 }
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
char * proof
Proof for this state.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
char * name
Human readable name of the state.

References GNUNET_free, REGEX_INTERNAL_State::name, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_State::proof, state_remove_transition(), state_set_clear(), t, and REGEX_INTERNAL_State::transitions_head.

Referenced by automaton_merge_states(), automaton_remove_state(), and REGEX_INTERNAL_automaton_destroy().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_remove_state()

static void automaton_remove_state ( struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s 
)
static

Remove a state from the given automaton 'a'.

Always use this function when altering the states of an automaton. Will also remove all transitions leading to this state, before destroying it.

Parameters
aautomaton
sstate to remove

Definition at line 335 of file regex_internal.c.

337 {
338  struct REGEX_INTERNAL_State *s_check;
339  struct REGEX_INTERNAL_Transition *t_check;
340  struct REGEX_INTERNAL_Transition *t_check_next;
341 
342  if ((NULL == a) || (NULL == s))
343  return;
344 
345  /* remove all transitions leading to this state */
346  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347  {
348  for (t_check = s_check->transitions_head; NULL != t_check;
349  t_check = t_check_next)
350  {
351  t_check_next = t_check->next;
352  if (t_check->to_state == s)
353  state_remove_transition (s_check, t_check);
354  }
355  }
356 
357  /* remove state */
359  a->state_count--;
360 
362 }
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.

References automaton_destroy_state(), GNUNET_CONTAINER_DLL_remove, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_compress_paths(), dfa_remove_dead_states(), and dfa_remove_unreachable_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_merge_states()

static void automaton_merge_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s1,
struct REGEX_INTERNAL_State s2 
)
static

Merge two states into one.

Will merge 's1' and 's2' into 's1' and destroy 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.

Parameters
ctxcontext
aautomaton
s1first state
s2second state, will be destroyed

Definition at line 375 of file regex_internal.c.

379 {
380  struct REGEX_INTERNAL_State *s_check;
381  struct REGEX_INTERNAL_Transition *t_check;
383  struct REGEX_INTERNAL_Transition *t_next;
384  int is_dup;
385 
386  if (s1 == s2)
387  return;
388 
389  /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390  * does not already exists, if it already exists remove transition. */
391  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392  {
393  for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394  {
395  t_next = t_check->next;
396 
397  if (s2 == t_check->to_state)
398  {
399  is_dup = GNUNET_NO;
400  for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401  {
402  if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403  is_dup = GNUNET_YES;
404  }
405  if (GNUNET_NO == is_dup)
406  t_check->to_state = s1;
407  else
408  state_remove_transition (t_check->from_state, t_check);
409  }
410  }
411  }
412 
413  /* 2. Add all transitions from s2 to sX to s1 */
414  for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415  {
416  if (t_check->to_state != s1)
417  state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418  }
419 
420  /* 3. Rename s1 to {s1,s2} */
421 #if REGEX_DEBUG_DFA
422  char *new_name;
423 
424  new_name = s1->name;
425  GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426  GNUNET_free (new_name);
427 #endif
428 
429  /* remove state */
431  a->state_count--;
433 }
@ GNUNET_NO
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.

References automaton_destroy_state(), ctx, REGEX_INTERNAL_Transition::from_state, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, GNUNET_SCHEDULER_Task::next, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, state_remove_transition(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Automaton::states_tail, t, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_merge_nondistinguishable_states().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_add_state()

static void automaton_add_state ( struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State s 
)
static

Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.

Parameters
aautomaton to add the state to
sstate that should be added

Definition at line 444 of file regex_internal.c.

446 {
448  a->state_count++;
449 }
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.

References GNUNET_CONTAINER_DLL_insert, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by construct_dfa_states(), nfa_fragment_create(), and REGEX_INTERNAL_construct_dfa().

Here is the caller graph for this function:

◆ automaton_state_traverse()

static void automaton_state_traverse ( struct REGEX_INTERNAL_State s,
int *  marks,
unsigned int *  count,
REGEX_INTERNAL_traverse_check  check,
void *  check_cls,
REGEX_INTERNAL_traverse_action  action,
void *  action_cls 
)
static

Depth-first traversal (DFS) of all states that are reachable from state 's'.

Performs 'action' on each visited state.

Parameters
sstart state.
marksan array of size a->state_count to remember which state was already visited.
countcurrent count of the state.
checkfunction that is checked before advancing on each transition in the DFS.
check_clsclosure for check.
actionaction to be performed on each state.
action_clsclosure for action.

Definition at line 467 of file regex_internal.c.

474 {
476 
477  if (GNUNET_YES == marks[s->traversal_id])
478  return;
479 
480  marks[s->traversal_id] = GNUNET_YES;
481 
482  if (NULL != action)
483  action (action_cls, *count, s);
484 
485  (*count)++;
486 
487  for (t = s->transitions_head; NULL != t; t = t->next)
488  {
489  if ((NULL == check) ||
490  ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491  {
492  automaton_state_traverse (t->to_state,
493  marks,
494  count,
495  check,
496  check_cls,
497  action,
498  action_cls);
499  }
500  }
501 }
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.

References consensus-simulation::action, GNUNET_YES, GNUNET_SCHEDULER_Task::next, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_State::traversal_id.

Referenced by REGEX_INTERNAL_automaton_traverse().

Here is the caller graph for this function:

◆ REGEX_INTERNAL_automaton_traverse()

void REGEX_INTERNAL_automaton_traverse ( const struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State start,
REGEX_INTERNAL_traverse_check  check,
void *  check_cls,
REGEX_INTERNAL_traverse_action  action,
void *  action_cls 
)

Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them.

Parameters
aautomaton to be traversed.
startstart state, pass a->start or NULL to traverse the whole automaton.
checkfunction that is checked before advancing on each transition in the DFS.
check_clsclosure for check.
actionaction to be performed on each state.
action_clsclosure for action

Definition at line 505 of file regex_internal.c.

511 {
512  unsigned int count;
513  struct REGEX_INTERNAL_State *s;
514 
515  if ((NULL == a) || (0 == a->state_count))
516  return;
517 
518  int marks[a->state_count];
519 
520  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521  s = s->next, count++)
522  {
523  s->traversal_id = count;
524  marks[s->traversal_id] = GNUNET_NO;
525  }
526 
527  count = 0;
528 
529  if (NULL == start)
530  s = a->start;
531  else
532  s = start;
533 
535  marks,
536  &count,
537  check,
538  check_cls,
539  action,
540  action_cls);
541 }
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39

References consensus-simulation::action, automaton_state_traverse(), GNUNET_NO, REGEX_INTERNAL_State::next, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::traversal_id.

Referenced by automaton_create_proofs(), dfa_remove_unreachable_states(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_dfa_add_multi_strides(), and REGEX_TEST_automaton_save_graph().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_nullstrcmp()

static int sb_nullstrcmp ( const struct StringBuffer s1,
const struct StringBuffer s2 
)
static

Compare two strings for equality.

If either is NULL they are not equal.

Parameters
s1first string for comparison.
s2second string for comparison.
Returns
0 if the strings are the same or both NULL, 1 or -1 if not.

Definition at line 597 of file regex_internal.c.

598 {
599  if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
600  return 0;
601  if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
602  return -1;
603  if (s1->slen != s2->slen)
604  return -1;
605  if (0 == s1->slen)
606  return 0;
607  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
608 }
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strcmp()

static int sb_strcmp ( const struct StringBuffer s1,
const struct StringBuffer s2 
)
static

Compare two strings for equality.

Parameters
s1first string for comparison.
s2second string for comparison.
Returns
0 if the strings are the same, 1 or -1 if not.

Definition at line 620 of file regex_internal.c.

621 {
622  if (s1->slen != s2->slen)
623  return -1;
624  if (0 == s1->slen)
625  return 0;
626  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
627 }

References StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_realloc()

static void sb_realloc ( struct StringBuffer ret,
size_t  nlen 
)
static

Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of the new buffer.

Parameters
retcurrent buffer, to be updated
nlentarget length for the buffer, must be at least ret->slen

Definition at line 638 of file regex_internal.c.

639 {
640  char *old;
641 
642  GNUNET_assert (nlen >= ret->slen);
643  old = ret->abuf;
644  ret->abuf = GNUNET_malloc (nlen);
645  ret->blen = nlen;
646  GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
647  ret->sbuf = ret->abuf;
648  GNUNET_free (old);
649 }
static int ret
Return value of the commandline.
Definition: gnunet-abd.c:81
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_malloc(size)
Wrapper around malloc.

References GNUNET_assert, GNUNET_free, GNUNET_malloc, GNUNET_memcpy, and ret.

Referenced by sb_append(), sb_append_cstr(), sb_printf1(), sb_printf2(), and sb_printf3().

Here is the caller graph for this function:

◆ sb_append()

static void sb_append ( struct StringBuffer ret,
const struct StringBuffer sarg 
)
static

Append a string.

Parameters
retwhere to write the result
sargstring to append

Definition at line 659 of file regex_internal.c.

660 {
661  if (GNUNET_YES == ret->null_flag)
662  ret->slen = 0;
663  ret->null_flag = GNUNET_NO;
664  if (ret->blen < sarg->slen + ret->slen)
665  sb_realloc (ret, ret->blen + sarg->slen + 128);
666  GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
667  ret->slen += sarg->slen;
668 }
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...

References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_append_cstr()

static void sb_append_cstr ( struct StringBuffer ret,
const char *  cstr 
)
static

Append a C string.

Parameters
retwhere to write the result
cstrstring to append

Definition at line 678 of file regex_internal.c.

679 {
680  size_t cstr_len = strlen (cstr);
681 
682  if (GNUNET_YES == ret->null_flag)
683  ret->slen = 0;
684  ret->null_flag = GNUNET_NO;
685  if (ret->blen < cstr_len + ret->slen)
686  sb_realloc (ret, ret->blen + cstr_len + 128);
687  GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
688  ret->slen += cstr_len;
689 }

References GNUNET_memcpy, GNUNET_NO, GNUNET_YES, ret, and sb_realloc().

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_wrap()

static void sb_wrap ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars 
)
static

Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be replaced with the original content of 'ret'.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result and take the input for %.*s from
formatformat string, fprintf-style, with exactly one "%.*s"
extra_charshow long will the result be, in addition to 'sarg' length

Definition at line 703 of file regex_internal.c.

704 {
705  char *temp;
706 
707  if (GNUNET_YES == ret->null_flag)
708  ret->slen = 0;
709  ret->null_flag = GNUNET_NO;
710  temp = GNUNET_malloc (ret->slen + extra_chars + 1);
711  GNUNET_snprintf (temp,
712  ret->slen + extra_chars + 1,
713  format,
714  (int) ret->slen,
715  ret->sbuf);
716  GNUNET_free (ret->abuf);
717  ret->abuf = temp;
718  ret->sbuf = temp;
719  ret->blen = ret->slen + extra_chars + 1;
720  ret->slen = ret->slen + extra_chars;
721 }
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.

References GNUNET_free, GNUNET_malloc, GNUNET_NO, GNUNET_snprintf(), GNUNET_YES, and ret.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf1()

static void sb_printf1 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg 
)
static

Format a string buffer.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly one "%.*s"
extra_charshow long will the result be, in addition to 'sarg' length
sargstring to print into the format

Definition at line 734 of file regex_internal.c.

738 {
739  if (ret->blen < sarg->slen + extra_chars + 1)
740  sb_realloc (ret, sarg->slen + extra_chars + 1);
741  ret->null_flag = GNUNET_NO;
742  ret->sbuf = ret->abuf;
743  ret->slen = sarg->slen + extra_chars;
744  GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
745 }

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf2()

static void sb_printf2 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg1,
const struct StringBuffer sarg2 
)
static

Format a string buffer.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly two "%.*s"
extra_charshow long will the result be, in addition to 'sarg1/2' length
sarg1first string to print into the format
sarg2second string to print into the format

Definition at line 758 of file regex_internal.c.

763 {
764  if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
765  sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
766  ret->null_flag = GNUNET_NO;
767  ret->slen = sarg1->slen + sarg2->slen + extra_chars;
768  ret->sbuf = ret->abuf;
769  GNUNET_snprintf (ret->sbuf,
770  ret->blen,
771  format,
772  (int) sarg1->slen,
773  sarg1->sbuf,
774  (int) sarg2->slen,
775  sarg2->sbuf);
776 }

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_printf3()

static void sb_printf3 ( struct StringBuffer ret,
const char *  format,
size_t  extra_chars,
const struct StringBuffer sarg1,
const struct StringBuffer sarg2,
const struct StringBuffer sarg3 
)
static

Format a string buffer.

Note that optimizing this function is not really worth it, it is rarely called.

Parameters
retwhere to write the result
formatformat string, fprintf-style, with exactly three "%.*s"
extra_charshow long will the result be, in addition to 'sarg1/2/3' length
sarg1first string to print into the format
sarg2second string to print into the format
sarg3third string to print into the format

Definition at line 791 of file regex_internal.c.

797 {
798  if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
799  sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
800  ret->null_flag = GNUNET_NO;
801  ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
802  ret->sbuf = ret->abuf;
803  GNUNET_snprintf (ret->sbuf,
804  ret->blen,
805  format,
806  (int) sarg1->slen,
807  sarg1->sbuf,
808  (int) sarg2->slen,
809  sarg2->sbuf,
810  (int) sarg3->slen,
811  sarg3->sbuf);
812 }

References GNUNET_NO, GNUNET_snprintf(), ret, sb_realloc(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_free()

static void sb_free ( struct StringBuffer sb)
static

Free resources of the given string buffer.

Parameters
sbbuffer to free (actual pointer is not freed, as they should not be individually allocated)

Definition at line 822 of file regex_internal.c.

823 {
824  GNUNET_array_grow (sb->abuf, sb->blen, 0);
825  sb->slen = 0;
826  sb->sbuf = NULL;
827  sb->null_flag = GNUNET_YES;
828 }
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strdup()

static void sb_strdup ( struct StringBuffer out,
const struct StringBuffer in 
)
static

Copy the given string buffer from 'in' to 'out'.

Parameters
ininput string
outoutput string

Definition at line 838 of file regex_internal.c.

840 {
841  out->null_flag = in->null_flag;
842  if (GNUNET_YES == out->null_flag)
843  return;
844  if (out->blen < in->slen)
845  {
846  GNUNET_array_grow (out->abuf, out->blen, in->slen);
847  }
848  out->sbuf = out->abuf;
849  out->slen = in->slen;
850  GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
851 }

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify(), and remove_epsilon().

Here is the caller graph for this function:

◆ sb_strdup_cstr()

static void sb_strdup_cstr ( struct StringBuffer out,
const char *  cstr 
)
static

Copy the given string buffer from 'in' to 'out'.

Parameters
cstrinput string
outoutput string

Definition at line 861 of file regex_internal.c.

862 {
863  if (NULL == cstr)
864  {
865  out->null_flag = GNUNET_YES;
866  return;
867  }
868  out->null_flag = GNUNET_NO;
869  out->slen = strlen (cstr);
870  if (out->blen < out->slen)
871  {
872  GNUNET_array_grow (out->abuf, out->blen, out->slen);
873  }
874  out->sbuf = out->abuf;
875  GNUNET_memcpy (out->sbuf, cstr, out->slen);
876 }

References StringBuffer::abuf, StringBuffer::blen, GNUNET_array_grow, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs().

Here is the caller graph for this function:

◆ needs_parentheses()

static int needs_parentheses ( const struct StringBuffer str)
static

Check if the given string str needs parentheses around it when using it to generate a regex.

Parameters
strstring
Returns
GNUNET_YES if parentheses are needed, GNUNET_NO otherwise

Definition at line 888 of file regex_internal.c.

889 {
890  size_t slen;
891  const char *op;
892  const char *cl;
893  const char *pos;
894  const char *end;
895  unsigned int cnt;
896 
897  if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
898  return GNUNET_NO;
899  pos = str->sbuf;
900  if ('(' != pos[0])
901  return GNUNET_YES;
902  end = str->sbuf + slen;
903  cnt = 1;
904  pos++;
905  while (cnt > 0)
906  {
907  cl = memchr (pos, ')', end - pos);
908  if (NULL == cl)
909  {
910  GNUNET_break (0);
911  return GNUNET_YES;
912  }
913  /* while '(' before ')', count opening parens */
914  while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
915  {
916  cnt++;
917  pos = op + 1;
918  }
919  /* got ')' first */
920  cnt--;
921  pos = cl + 1;
922  }
923  return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
924 }
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:144
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.

References end, GNUNET_break, GNUNET_NO, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ remove_parentheses()

static void remove_parentheses ( struct StringBuffer str)
static

Remove parentheses surrounding string str.

Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. You need to GNUNET_free() the returned string.

Parameters
strstring, modified to contain a
Returns
string without surrounding parentheses, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL

Definition at line 937 of file regex_internal.c.

938 {
939  size_t slen;
940  const char *pos;
941  const char *end;
942  const char *sbuf;
943  const char *op;
944  const char *cp;
945  unsigned int cnt;
946 
947  if (0)
948  return;
949  sbuf = str->sbuf;
950  if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
951  ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
952  return;
953  cnt = 0;
954  pos = &sbuf[1];
955  end = &sbuf[slen - 1];
956  op = memchr (pos, '(', end - pos);
957  cp = memchr (pos, ')', end - pos);
958  while (NULL != cp)
959  {
960  while ((NULL != op) && (op < cp))
961  {
962  cnt++;
963  pos = op + 1;
964  op = memchr (pos, '(', end - pos);
965  }
966  while ((NULL != cp) && ((NULL == op) || (cp < op)))
967  {
968  if (0 == cnt)
969  return; /* can't strip parens */
970  cnt--;
971  pos = cp + 1;
972  cp = memchr (pos, ')', end - pos);
973  }
974  }
975  if (0 != cnt)
976  {
977  GNUNET_break (0);
978  return;
979  }
980  str->sbuf++;
981  str->slen -= 2;
982 }

References end, GNUNET_break, GNUNET_YES, StringBuffer::null_flag, op, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ has_epsilon()

static int has_epsilon ( const struct StringBuffer str)
static

Check if the string 'str' starts with an epsilon (empty string).

Example: "(|a)" is starting with an epsilon.

Parameters
strstring to test
Returns
0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'

Definition at line 994 of file regex_internal.c.

995 {
996  return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
997  ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
998  (')' == str->sbuf[str->slen - 1]);
999 }

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ remove_epsilon()

static void remove_epsilon ( const struct StringBuffer str,
struct StringBuffer ret 
)
static

Remove an epsilon from the string str.

Where epsilon is an empty string Example: str = "(|a|b|c)", result: "a|b|c" The returned string needs to be freed.

Parameters
stroriginal string
retwhere to return string without preceding epsilon, string 'str' if no preceding epsilon could be found, NULL if 'str' was NULL

Definition at line 1012 of file regex_internal.c.

1013 {
1014  if (GNUNET_YES == str->null_flag)
1015  {
1016  ret->null_flag = GNUNET_YES;
1017  return;
1018  }
1019  if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1020  (')' == str->sbuf[str->slen - 1]))
1021  {
1022  /* remove epsilon */
1023  if (ret->blen < str->slen - 3)
1024  {
1025  GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1026  }
1027  ret->sbuf = ret->abuf;
1028  ret->slen = str->slen - 3;
1029  GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1030  return;
1031  }
1032  sb_strdup (ret, str);
1033 }
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.

References GNUNET_array_grow, GNUNET_memcpy, GNUNET_YES, StringBuffer::null_flag, ret, sb_strdup(), StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ sb_strncmp()

static int sb_strncmp ( const struct StringBuffer str1,
const struct StringBuffer str2,
size_t  n 
)
static

Compare n bytes of 'str1' and 'str2'.

Parameters
str1first string to compare
str2second string for comparison
nnumber of bytes to compare
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1046 of file regex_internal.c.

1049 {
1050  size_t max;
1051 
1052  if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1053  return -1;
1054  max = GNUNET_MAX (str1->slen, str2->slen);
1055  if (max > n)
1056  max = n;
1057  return memcmp (str1->sbuf, str2->sbuf, max);
1058 }
#define GNUNET_MAX(a, b)
#define max(x, y)

References GNUNET_MAX, max, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strncmp_cstr()

static int sb_strncmp_cstr ( const struct StringBuffer str1,
const char *  str2,
size_t  n 
)
static

Compare n bytes of 'str1' and 'str2'.

Parameters
str1first string to compare
str2second C string for comparison
nnumber of bytes to compare (and length of str2)
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1071 of file regex_internal.c.

1072 {
1073  if (str1->slen < n)
1074  return -1;
1075  return memcmp (str1->sbuf, str2, n);
1076 }

References StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_init()

static void sb_init ( struct StringBuffer sb,
size_t  n 
)
static

Initialize string buffer for storing strings of up to n characters.

Parameters
sbbuffer to initialize
ndesired target length

Definition at line 1087 of file regex_internal.c.

1088 {
1089  sb->null_flag = GNUNET_NO;
1090  sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1091  sb->blen = n;
1092  sb->slen = 0;
1093 }

References StringBuffer::abuf, StringBuffer::blen, GNUNET_malloc, GNUNET_NO, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs(), and automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ sb_strkcmp()

static int sb_strkcmp ( const struct StringBuffer str1,
const struct StringBuffer str2,
size_t  k 
)
static

Compare 'str1', starting from position 'k', with whole 'str2'.

Parameters
str1first string to compare, starting from position 'k'
str2second string for comparison
kstarting position in 'str1'
Returns
-1 if any of the strings is NULL, 0 if equal, non 0 otherwise

Definition at line 1106 of file regex_internal.c.

1109 {
1110  if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1111  (k > str1->slen) || (str1->slen - k != str2->slen))
1112  return -1;
1113  return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1114 }

References GNUNET_YES, StringBuffer::null_flag, StringBuffer::sbuf, and StringBuffer::slen.

Referenced by automaton_create_proofs_simplify().

Here is the caller graph for this function:

◆ number_states()

static void number_states ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-first numbering of the states.

Parameters
clsstates array.
countcurrent state counter.
scurrent state.

Definition at line 1126 of file regex_internal.c.

1129 {
1130  struct REGEX_INTERNAL_State **states = cls;
1131 
1132  s->dfs_id = count;
1133  if (NULL != states)
1134  states[count] = s;
1135 }
unsigned int dfs_id
Linear state ID acquired by depth-first-search.

References REGEX_INTERNAL_State::dfs_id.

Referenced by automaton_create_proofs(), and REGEX_INTERNAL_construct_nfa().

Here is the caller graph for this function:

◆ automaton_create_proofs_simplify()

static void automaton_create_proofs_simplify ( const struct StringBuffer R_last_ij,
const struct StringBuffer R_last_ik,
const struct StringBuffer R_last_kk,
const struct StringBuffer R_last_kj,
struct StringBuffer R_cur_ij,
struct StringBuffer R_cur_l,
struct StringBuffer R_cur_r 
)
static

Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.

Parameters
R_last_ijvalue of $R^{(k-1)_{ij}.
R_last_ikvalue of $R^{(k-1)_{ik}.
R_last_kkvalue of $R^{(k-1)_{kk}.
R_last_kjvalue of $R^{(k-1)_{kj}.
R_cur_ijresult for this inductive step is saved in R_cur_ij, R_cur_ij is expected to be NULL when called!
R_cur_loptimization – kept between iterations to avoid realloc
R_cur_roptimization – kept between iterations to avoid realloc

Definition at line 1158 of file regex_internal.c.

1165 {
1166  struct StringBuffer R_temp_ij;
1167  struct StringBuffer R_temp_ik;
1168  struct StringBuffer R_temp_kj;
1169  struct StringBuffer R_temp_kk;
1170  int eps_check;
1171  int ij_ik_cmp;
1172  int ij_kj_cmp;
1173  int ik_kk_cmp;
1174  int kk_kj_cmp;
1175  int clean_ik_kk_cmp;
1176  int clean_kk_kj_cmp;
1177  size_t length;
1178  size_t length_l;
1179  size_t length_r;
1180 
1181  /*
1182  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1183  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1184  * R_cur_ij = R_cur_l | R_cur_r
1185  * R_cur_l == R^{(k-1)}_{ij}
1186  * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1187  */if ((GNUNET_YES == R_last_ij->null_flag) &&
1188  ((GNUNET_YES == R_last_ik->null_flag) ||
1189  (GNUNET_YES == R_last_kj->null_flag)))
1190  {
1191  /* R^{(k)}_{ij} = N | N */
1192  R_cur_ij->null_flag = GNUNET_YES;
1193  R_cur_ij->synced = GNUNET_NO;
1194  return;
1195  }
1196 
1197  if ((GNUNET_YES == R_last_ik->null_flag) ||
1198  (GNUNET_YES == R_last_kj->null_flag))
1199  {
1200  /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1201  if (GNUNET_YES == R_last_ij->synced)
1202  {
1203  R_cur_ij->synced = GNUNET_YES;
1204  R_cur_ij->null_flag = GNUNET_NO;
1205  return;
1206  }
1207  R_cur_ij->synced = GNUNET_YES;
1208  sb_strdup (R_cur_ij, R_last_ij);
1209  return;
1210  }
1211  R_cur_ij->synced = GNUNET_NO;
1212 
1213  /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1214  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1215 
1216  R_cur_r->null_flag = GNUNET_YES;
1217  R_cur_r->slen = 0;
1218  R_cur_l->null_flag = GNUNET_YES;
1219  R_cur_l->slen = 0;
1220 
1221  /* cache results from strcmp, we might need these many times */
1222  ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1223  ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1224  ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1225  kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1226 
1227  /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1228  * as parentheses, so we can better compare the contents */
1229 
1230  memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1231  memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1232  memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1233  memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1234  remove_epsilon (R_last_ik, &R_temp_ik);
1235  remove_epsilon (R_last_kk, &R_temp_kk);
1236  remove_epsilon (R_last_kj, &R_temp_kj);
1237  remove_parentheses (&R_temp_ik);
1238  remove_parentheses (&R_temp_kk);
1239  remove_parentheses (&R_temp_kj);
1240  clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1241  clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1242 
1243  /* construct R_cur_l (and, if necessary R_cur_r) */
1244  if (GNUNET_YES != R_last_ij->null_flag)
1245  {
1246  /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1247  * as parentheses, so we can better compare the contents */
1248  remove_epsilon (R_last_ij, &R_temp_ij);
1249  remove_parentheses (&R_temp_ij);
1250 
1251  if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1252  (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1253  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1254  {
1255  if (0 == R_temp_ij.slen)
1256  {
1257  R_cur_r->null_flag = GNUNET_NO;
1258  }
1259  else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1260  ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1261  (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1262  {
1263  /*
1264  * a|(e|a)a*(e|a) = a*
1265  * a|(e|a)(e|a)*(e|a) = a*
1266  * (e|a)|aa*a = a*
1267  * (e|a)|aa*(e|a) = a*
1268  * (e|a)|(e|a)a*a = a*
1269  * (e|a)|(e|a)a*(e|a) = a*
1270  * (e|a)|(e|a)(e|a)*(e|a) = a*
1271  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1272  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1273  else
1274  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1275  }
1276  else
1277  {
1278  /*
1279  * a|aa*a = a+
1280  * a|(e|a)a*a = a+
1281  * a|aa*(e|a) = a+
1282  * a|(e|a)(e|a)*a = a+
1283  * a|a(e|a)*(e|a) = a+
1284  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1286  else
1287  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1288  }
1289  }
1290  else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1291  (0 != clean_ik_kk_cmp))
1292  {
1293  /* a|ab*b = ab* */
1294  if (0 == R_last_kk->slen)
1295  sb_strdup (R_cur_r, R_last_ij);
1296  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1297  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1298  else
1299  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1300  R_cur_l->null_flag = GNUNET_YES;
1301  }
1302  else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1303  (0 != clean_kk_kj_cmp))
1304  {
1305  /* a|bb*a = b*a */
1306  if (R_last_kk->slen < 1)
1307  {
1308  sb_strdup (R_cur_r, R_last_kj);
1309  }
1310  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1311  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1312  else
1313  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1314 
1315  R_cur_l->null_flag = GNUNET_YES;
1316  }
1317  else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1318  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1319  {
1320  /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1321  if (needs_parentheses (&R_temp_kk))
1322  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1323  else
1324  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1325  R_cur_l->null_flag = GNUNET_YES;
1326  }
1327  else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1328  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1329  {
1330  /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1331  if (needs_parentheses (&R_temp_kk))
1332  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1333  else
1334  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1335  R_cur_l->null_flag = GNUNET_YES;
1336  }
1337  else
1338  {
1339  sb_strdup (R_cur_l, R_last_ij);
1340  remove_parentheses (R_cur_l);
1341  }
1342  }
1343  else
1344  {
1345  /* we have no left side */
1346  R_cur_l->null_flag = GNUNET_YES;
1347  }
1348 
1349  /* construct R_cur_r, if not already constructed */
1350  if (GNUNET_YES == R_cur_r->null_flag)
1351  {
1352  length = R_temp_kk.slen - R_last_ik->slen;
1353 
1354  /* a(ba)*bx = (ab)+x */
1355  if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1356  (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1357  (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1358  (0 < R_last_ik->slen) &&
1359  (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1360  (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1361  {
1362  struct StringBuffer temp_a;
1363  struct StringBuffer temp_b;
1364 
1365  sb_init (&temp_a, length);
1366  sb_init (&temp_b, R_last_kj->slen - length);
1367 
1368  length_l = length;
1369  temp_a.sbuf = temp_a.abuf;
1370  GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1371  temp_a.slen = length_l;
1372 
1373  length_r = R_last_kj->slen - length;
1374  temp_b.sbuf = temp_b.abuf;
1375  GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1376  temp_b.slen = length_r;
1377 
1378  /* e|(ab)+ = (ab)* */
1379  if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1380  (0 == temp_b.slen))
1381  {
1382  sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1383  sb_free (R_cur_l);
1384  R_cur_l->null_flag = GNUNET_YES;
1385  }
1386  else
1387  {
1388  sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1389  }
1390  sb_free (&temp_a);
1391  sb_free (&temp_b);
1392  }
1393  else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1394  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1395  {
1396  /*
1397  * (e|a)a*(e|a) = a*
1398  * (e|a)(e|a)*(e|a) = a*
1399  */
1400  if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1401  {
1402  if (needs_parentheses (&R_temp_kk))
1403  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1404  else
1405  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1406  }
1407  /* aa*a = a+a */
1408  else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1409  (! has_epsilon (R_last_ik)))
1410  {
1411  if (needs_parentheses (&R_temp_kk))
1412  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1413  else
1414  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1415  }
1416  /*
1417  * (e|a)a*a = a+
1418  * aa*(e|a) = a+
1419  * a(e|a)*(e|a) = a+
1420  * (e|a)a*a = a+
1421  */else
1422  {
1423  eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1424  + has_epsilon (R_last_kj));
1425 
1426  if (1 == eps_check)
1427  {
1428  if (needs_parentheses (&R_temp_kk))
1429  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1430  else
1431  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1432  }
1433  }
1434  }
1435  /*
1436  * aa*b = a+b
1437  * (e|a)(e|a)*b = a*b
1438  */
1439  else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1440  {
1441  if (has_epsilon (R_last_ik))
1442  {
1443  if (needs_parentheses (&R_temp_kk))
1444  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1445  else
1446  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1447  }
1448  else
1449  {
1450  if (needs_parentheses (&R_temp_kk))
1451  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1452  else
1453  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1454  }
1455  }
1456  /*
1457  * ba*a = ba+
1458  * b(e|a)*(e|a) = ba*
1459  */
1460  else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1461  {
1462  if (has_epsilon (R_last_kj))
1463  {
1464  if (needs_parentheses (&R_temp_kk))
1465  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1466  else
1467  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1468  }
1469  else
1470  {
1471  if (needs_parentheses (&R_temp_kk))
1472  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1473  else
1474  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1475  }
1476  }
1477  else
1478  {
1479  if (0 < R_temp_kk.slen)
1480  {
1481  if (needs_parentheses (&R_temp_kk))
1482  {
1483  sb_printf3 (R_cur_r,
1484  "%.*s(%.*s)*%.*s",
1485  3,
1486  R_last_ik,
1487  &R_temp_kk,
1488  R_last_kj);
1489  }
1490  else
1491  {
1492  sb_printf3 (R_cur_r,
1493  "%.*s%.*s*%.*s",
1494  1,
1495  R_last_ik,
1496  &R_temp_kk,
1497  R_last_kj);
1498  }
1499  }
1500  else
1501  {
1502  sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1503  }
1504  }
1505  }
1506  sb_free (&R_temp_ij);
1507  sb_free (&R_temp_ik);
1508  sb_free (&R_temp_kk);
1509  sb_free (&R_temp_kj);
1510 
1511  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1512  {
1513  R_cur_ij->null_flag = GNUNET_YES;
1514  return;
1515  }
1516 
1517  if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1518  {
1519  struct StringBuffer tmp;
1520 
1521  tmp = *R_cur_ij;
1522  *R_cur_ij = *R_cur_l;
1523  *R_cur_l = tmp;
1524  return;
1525  }
1526 
1527  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1528  {
1529  struct StringBuffer tmp;
1530 
1531  tmp = *R_cur_ij;
1532  *R_cur_ij = *R_cur_r;
1533  *R_cur_r = tmp;
1534  return;
1535  }
1536 
1537  if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1538  {
1539  struct StringBuffer tmp;
1540 
1541  tmp = *R_cur_ij;
1542  *R_cur_ij = *R_cur_l;
1543  *R_cur_l = tmp;
1544  return;
1545  }
1546  sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1547 }
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
String container for faster string operations.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...

References StringBuffer::abuf, GNUNET_memcpy, GNUNET_NO, GNUNET_YES, has_epsilon(), needs_parentheses(), StringBuffer::null_flag, remove_epsilon(), remove_parentheses(), sb_free(), sb_init(), sb_nullstrcmp(), sb_printf1(), sb_printf2(), sb_printf3(), sb_strcmp(), sb_strdup(), sb_strkcmp(), sb_strncmp(), sb_strncmp_cstr(), StringBuffer::sbuf, StringBuffer::slen, and StringBuffer::synced.

Referenced by automaton_create_proofs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ automaton_create_proofs()

static int automaton_create_proofs ( struct REGEX_INTERNAL_Automaton a)
static

Create proofs for all states in the given automaton.

Implementation of the algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and Computation 3rd Edition" by Hopcroft, Motwani and Ullman.

Each state in the automaton gets assigned 'proof' and 'hash' (hash of the proof) fields. The starting state will only have a valid proof/hash if it has any incoming transitions.

Parameters
aautomaton for which to assign proofs and hashes, must not be NULL

Definition at line 1562 of file regex_internal.c.

1563 {
1564  unsigned int n = a->state_count;
1565  struct REGEX_INTERNAL_State *states[n];
1566  struct StringBuffer *R_last;
1567  struct StringBuffer *R_cur;
1568  struct StringBuffer R_cur_r;
1569  struct StringBuffer R_cur_l;
1570  struct StringBuffer *R_swap;
1571  struct REGEX_INTERNAL_Transition *t;
1572  struct StringBuffer complete_regex;
1573  unsigned int i;
1574  unsigned int j;
1575  unsigned int k;
1576 
1577  R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1578  R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1579  if ((NULL == R_last) || (NULL == R_cur))
1580  {
1582  GNUNET_free (R_cur);
1583  GNUNET_free (R_last);
1584  return GNUNET_SYSERR;
1585  }
1586 
1587  /* create depth-first numbering of the states, initializes 'state' */
1589  a->start,
1590  NULL,
1591  NULL,
1592  &number_states,
1593  states);
1594 
1595  for (i = 0; i < n; i++)
1596  GNUNET_assert (NULL != states[i]);
1597  for (i = 0; i < n; i++)
1598  for (j = 0; j < n; j++)
1599  R_last[i * n + j].null_flag = GNUNET_YES;
1600 
1601  /* Compute regular expressions of length "1" between each pair of states */
1602  for (i = 0; i < n; i++)
1603  {
1604  for (t = states[i]->transitions_head; NULL != t; t = t->next)
1605  {
1606  j = t->to_state->dfs_id;
1607  if (GNUNET_YES == R_last[i * n + j].null_flag)
1608  {
1609  sb_strdup_cstr (&R_last[i * n + j], t->label);
1610  }
1611  else
1612  {
1613  sb_append_cstr (&R_last[i * n + j], "|");
1614  sb_append_cstr (&R_last[i * n + j], t->label);
1615  }
1616  }
1617  /* add self-loop: i is reachable from i via epsilon-transition */
1618  if (GNUNET_YES == R_last[i * n + i].null_flag)
1619  {
1620  R_last[i * n + i].slen = 0;
1621  R_last[i * n + i].null_flag = GNUNET_NO;
1622  }
1623  else
1624  {
1625  sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1626  }
1627  }
1628  for (i = 0; i < n; i++)
1629  for (j = 0; j < n; j++)
1630  if (needs_parentheses (&R_last[i * n + j]))
1631  sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1632  /* Compute regular expressions of length "k" between each pair of states per
1633  * induction */
1634  memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1635  memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1636  for (k = 0; k < n; k++)
1637  {
1638  for (i = 0; i < n; i++)
1639  {
1640  for (j = 0; j < n; j++)
1641  {
1642  /* Basis for the recursion:
1643  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1644  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1645  */
1646 
1647  /* Create R_cur[i][j] and simplify the expression */
1648  automaton_create_proofs_simplify (&R_last[i * n + j],
1649  &R_last[i * n + k],
1650  &R_last[k * n + k],
1651  &R_last[k * n + j],
1652  &R_cur[i * n + j],
1653  &R_cur_l,
1654  &R_cur_r);
1655  }
1656  }
1657  /* set R_last = R_cur */
1658  R_swap = R_last;
1659  R_last = R_cur;
1660  R_cur = R_swap;
1661  /* clear 'R_cur' for next iteration */
1662  for (i = 0; i < n; i++)
1663  for (j = 0; j < n; j++)
1664  R_cur[i * n + j].null_flag = GNUNET_YES;
1665  }
1666  sb_free (&R_cur_l);
1667  sb_free (&R_cur_r);
1668  /* assign proofs and hashes */
1669  for (i = 0; i < n; i++)
1670  {
1671  if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1672  {
1673  states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1674  R_last[a->start->dfs_id * n + i].slen);
1675  GNUNET_CRYPTO_hash (states[i]->proof,
1676  strlen (states[i]->proof),
1677  &states[i]->hash);
1678  }
1679  }
1680 
1681  /* complete regex for whole DFA: union of all pairs (start state/accepting
1682  * state(s)). */
1683  sb_init (&complete_regex, 16 * n);
1684  for (i = 0; i < n; i++)
1685  {
1686  if (states[i]->accepting)
1687  {
1688  if ((0 == complete_regex.slen) &&
1689  (0 < R_last[a->start->dfs_id * n + i].slen))
1690  {
1691  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1692  }
1693  else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1694  (0 < R_last[a->start->dfs_id * n + i].slen))
1695  {
1696  sb_append_cstr (&complete_regex, "|");
1697  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1698  }
1699  }
1700  }
1701  a->canonical_regex =
1702  GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1703 
1704  /* cleanup */
1705  sb_free (&complete_regex);
1706  for (i = 0; i < n; i++)
1707  for (j = 0; j < n; j++)
1708  {
1709  sb_free (&R_cur[i * n + j]);
1710  sb_free (&R_last[i * n + j]);
1711  }
1712  GNUNET_free (R_cur);
1713  GNUNET_free (R_last);
1714  return GNUNET_OK;
1715 }
static uint64_t proof
Definition: gnunet-scrypt.c:49
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
@ GNUNET_OK
@ GNUNET_SYSERR
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)

References automaton_create_proofs_simplify(), REGEX_INTERNAL_Automaton::canonical_regex, REGEX_INTERNAL_State::dfs_id, GNUNET_assert, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_NO, GNUNET_OK, GNUNET_strndup, GNUNET_SYSERR, GNUNET_YES, needs_parentheses(), GNUNET_SCHEDULER_Task::next, StringBuffer::null_flag, number_states(), REGEX_INTERNAL_State::proof, proof, REGEX_INTERNAL_automaton_traverse(), sb_append(), sb_append_cstr(), sb_free(), sb_init(), sb_strdup_cstr(), sb_wrap(), StringBuffer::sbuf, StringBuffer::slen, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and t.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_state_create()

static struct REGEX_INTERNAL_State* dfa_state_create ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_StateSet nfa_states 
)
static

Creates a new DFA state based on a set of NFA states.

Needs to be freed using automaton_destroy_state.

Parameters
ctxcontext
nfa_statesset of NFA states on which the DFA should be based on
Returns
new DFA state

Definition at line 1728 of file regex_internal.c.

1730 {
1731  struct REGEX_INTERNAL_State *s;
1732  char *pos;
1733  size_t len;
1734  struct REGEX_INTERNAL_State *cstate;
1735  struct REGEX_INTERNAL_Transition *ctran;
1736  unsigned int i;
1737 
1738  s = GNUNET_new (struct REGEX_INTERNAL_State);
1739  s->id = ctx->state_id++;
1740  s->index = -1;
1741  s->lowlink = -1;
1742 
1743  if (NULL == nfa_states)
1744  {
1745  GNUNET_asprintf (&s->name, "s%i", s->id);
1746  return s;
1747  }
1748 
1749  s->nfa_set = *nfa_states;
1750 
1751  if (nfa_states->off < 1)
1752  return s;
1753 
1754  /* Create a name based on 'nfa_states' */
1755  len = nfa_states->off * 14 + 4;
1756  s->name = GNUNET_malloc (len);
1757  strcat (s->name, "{");
1758  pos = s->name + 1;
1759 
1760  for (i = 0; i < nfa_states->off; i++)
1761  {
1762  cstate = nfa_states->states[i];
1763  GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1764  pos += strlen (pos);
1765 
1766  /* Add a transition for each distinct label to NULL state */
1767  for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1768  if (NULL != ctran->label)
1769  state_add_transition (ctx, s, ctran->label, NULL);
1770 
1771  /* If the nfa_states contain an accepting state, the new dfa state is also
1772  * accepting. */
1773  if (cstate->accepting)
1774  s->accepting = 1;
1775  }
1776  pos[-1] = '}';
1777  s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1778 
1779  memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1780  return s;
1781 }
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int index
Used for SCC detection.

References REGEX_INTERNAL_State::accepting, ctx, GNUNET_asprintf(), GNUNET_malloc, GNUNET_new, GNUNET_realloc, GNUNET_snprintf(), REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_Transition::label, len, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::name, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::nfa_set, REGEX_INTERNAL_StateSet::off, state_add_transition(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_State::transitions_head.

Referenced by construct_dfa_states(), and REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_move()

static unsigned int dfa_move ( struct REGEX_INTERNAL_State **  s,
const char *  str 
)
static

Move from the given state 's' to the next state on transition 'str'.

Consumes as much of the given 'str' as possible (useful for strided DFAs). On return 's' will point to the next state, and the length of the substring used for this transition will be returned. If no transition possible 0 is returned and 's' points to NULL.

Parameters
sstarting state, will point to the next state or NULL (if no transition possible)
stredge label to follow (will match longest common prefix)
Returns
length of the substring consumed from 'str'

Definition at line 1798 of file regex_internal.c.

1799 {
1800  struct REGEX_INTERNAL_Transition *t;
1801  struct REGEX_INTERNAL_State *new_s;
1802  unsigned int len;
1803  unsigned int max_len;
1804 
1805  if (NULL == s)
1806  return 0;
1807 
1808  new_s = NULL;
1809  max_len = 0;
1810  for (t = (*s)->transitions_head; NULL != t; t = t->next)
1811  {
1812  len = strlen (t->label);
1813 
1814  if (0 == strncmp (t->label, str, len))
1815  {
1816  if (len >= max_len)
1817  {
1818  max_len = len;
1819  new_s = t->to_state;
1820  }
1821  }
1822  }
1823 
1824  *s = new_s;
1825  return max_len;
1826 }

References len, GNUNET_SCHEDULER_Task::next, and t.

Referenced by evaluate_dfa().

Here is the caller graph for this function:

◆ mark_states()

static void mark_states ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Set the given state 'marked' to GNUNET_YES.

Used by the dfa_remove_unreachable_states() function to detect unreachable states in the automaton.

Parameters
clsclosure, not used.
countcount, not used.
sstate where the marked attribute will be set to GNUNET_YES.

Definition at line 1839 of file regex_internal.c.

1842 {
1843  s->marked = GNUNET_YES;
1844 }
int marked
Marking of the state.

References GNUNET_YES, and REGEX_INTERNAL_State::marked.

Referenced by dfa_remove_unreachable_states().

Here is the caller graph for this function:

◆ dfa_remove_unreachable_states()

static void dfa_remove_unreachable_states ( struct REGEX_INTERNAL_Automaton a)
static

Remove all unreachable states from DFA 'a'.

Unreachable states are those states that are not reachable from the starting state.

Parameters
aDFA automaton

Definition at line 1854 of file regex_internal.c.

1855 {
1856  struct REGEX_INTERNAL_State *s;
1857  struct REGEX_INTERNAL_State *s_next;
1858 
1859  /* 1. unmark all states */
1860  for (s = a->states_head; NULL != s; s = s->next)
1861  s->marked = GNUNET_NO;
1862 
1863  /* 2. traverse dfa from start state and mark all visited states */
1865  a->start,
1866  NULL,
1867  NULL,
1868  &mark_states,
1869  NULL);
1870 
1871  /* 3. delete all states that were not visited */
1872  for (s = a->states_head; NULL != s; s = s_next)
1873  {
1874  s_next = s->next;
1875  if (GNUNET_NO == s->marked)
1876  automaton_remove_state (a, s);
1877  }
1878 }
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.

References automaton_remove_state(), GNUNET_NO, mark_states(), REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::states_head.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_remove_dead_states()

static void dfa_remove_dead_states ( struct REGEX_INTERNAL_Automaton a)
static

Remove all dead states from the DFA 'a'.

Dead states are those states that do not transition to any other state but themselves.

Parameters
aDFA automaton

Definition at line 1888 of file regex_internal.c.

1889 {
1890  struct REGEX_INTERNAL_State *s;
1891  struct REGEX_INTERNAL_State *s_next;
1892  struct REGEX_INTERNAL_Transition *t;
1893  int dead;
1894 
1895  GNUNET_assert (DFA == a->type);
1896 
1897  for (s = a->states_head; NULL != s; s = s_next)
1898  {
1899  s_next = s->next;
1900 
1901  if (s->accepting)
1902  continue;
1903 
1904  dead = 1;
1905  for (t = s->transitions_head; NULL != t; t = t->next)
1906  {
1907  if ((NULL != t->to_state) && (t->to_state != s) )
1908  {
1909  dead = 0;
1910  break;
1911  }
1912  }
1913 
1914  if (0 == dead)
1915  continue;
1916 
1917  /* state s is dead, remove it */
1918  automaton_remove_state (a, s);
1919  }
1920 }
@ DFA
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.

References REGEX_INTERNAL_State::accepting, automaton_remove_state(), DFA, GNUNET_assert, REGEX_INTERNAL_State::next, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Automaton::states_head, t, REGEX_INTERNAL_State::transitions_head, and REGEX_INTERNAL_Automaton::type.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_merge_nondistinguishable_states()

static int dfa_merge_nondistinguishable_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a 
)
static

Merge all non distinguishable states in the DFA 'a'.

Parameters
ctxcontext
aDFA automaton
Returns
GNUNET_OK on success

Definition at line 1931 of file regex_internal.c.

1933 {
1934  uint32_t *table;
1935  struct REGEX_INTERNAL_State *s1;
1936  struct REGEX_INTERNAL_State *s2;
1937  struct REGEX_INTERNAL_Transition *t1;
1938  struct REGEX_INTERNAL_Transition *t2;
1939  struct REGEX_INTERNAL_State *s1_next;
1940  struct REGEX_INTERNAL_State *s2_next;
1941  int change;
1942  unsigned int num_equal_edges;
1943  unsigned int i;
1944  unsigned int state_cnt;
1945  unsigned long long idx;
1946  unsigned long long idx1;
1947 
1948  if ((NULL == a) || (0 == a->state_count))
1949  {
1951  "Could not merge nondistinguishable states, automaton was NULL.\n");
1952  return GNUNET_SYSERR;
1953  }
1954 
1955  state_cnt = a->state_count;
1957  (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1958  if (NULL == table)
1959  {
1961  return GNUNET_SYSERR;
1962  }
1963 
1964  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1965  s1->marked = i++;
1966 
1967  /* Mark all pairs of accepting/!accepting states */
1968  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1969  for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1970  if ((s1->accepting && ! s2->accepting) ||
1971  (! s1->accepting && s2->accepting))
1972  {
1973  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1974  table[idx / 32] |= (1U << (idx % 32));
1975  }
1976 
1977  /* Find all equal states */
1978  change = 1;
1979  while (0 != change)
1980  {
1981  change = 0;
1982  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1983  {
1984  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1985  {
1986  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987  if (0 != (table[idx / 32] & (1U << (idx % 32))))
1988  continue;
1989  num_equal_edges = 0;
1990  for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1991  {
1992  for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1993  {
1994  if (0 == strcmp (t1->label, t2->label))
1995  {
1996  num_equal_edges++;
1997  /* same edge, but targets definitively different, so we're different
1998  as well */
1999  if (t1->to_state->marked > t2->to_state->marked)
2000  idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2001  + t2->to_state->marked;
2002  else
2003  idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2004  + t1->to_state->marked;
2005  if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2006  {
2007  table[idx / 32] |= (1U << (idx % 32));
2008  change = 1; /* changed a marker, need to run again */
2009  }
2010  }
2011  }
2012  }
2013  if ((num_equal_edges != s1->transition_count) ||
2014  (num_equal_edges != s2->transition_count))
2015  {
2016  /* Make sure ALL edges of possible equal states are the same */
2017  table[idx / 32] |= (1U << (idx % 32));
2018  change = 1; /* changed a marker, need to run again */
2019  }
2020  }
2021  }
2022  }
2023 
2024  /* Merge states that are equal */
2025  for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2026  {
2027  s1_next = s1->next;
2028  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2029  {
2030  s2_next = s2->next;
2031  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2032  if (0 == (table[idx / 32] & (1U << (idx % 32))))
2033  automaton_merge_states (ctx, a, s1, s2);
2034  }
2035  }
2036 
2037  GNUNET_free (table);
2038  return GNUNET_OK;
2039 }
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.

References REGEX_INTERNAL_State::accepting, automaton_merge_states(), ctx, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_log_strerror, GNUNET_malloc_large, GNUNET_OK, GNUNET_SYSERR, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, table, REGEX_INTERNAL_Transition::to_state, REGEX_INTERNAL_State::transition_count, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_minimize().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_minimize()

static int dfa_minimize ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton a 
)
static

Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging all non distinguishable states.

Parameters
ctxcontext
aDFA automaton
Returns
GNUNET_OK on success

Definition at line 2051 of file regex_internal.c.

2053 {
2054  if (NULL == a)
2055  return GNUNET_SYSERR;
2056 
2057  GNUNET_assert (DFA == a->type);
2058 
2059  /* 1. remove unreachable states */
2061 
2062  /* 2. remove dead states */
2064 
2065  /* 3. Merge nondistinguishable states */
2067  return GNUNET_SYSERR;
2068  return GNUNET_OK;
2069 }
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.

References ctx, DFA, dfa_merge_nondistinguishable_states(), dfa_remove_dead_states(), dfa_remove_unreachable_states(), GNUNET_assert, GNUNET_OK, GNUNET_SYSERR, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_add_multi_strides_helper()

static void dfa_add_multi_strides_helper ( void *  cls,
const unsigned int  depth,
char *  label,
struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State s 
)
static

Recursive helper function to add strides to a DFA.

Parameters
clscontext, contains stride length and strided transitions DLL.
depthcurrent depth of the depth-first traversal of the graph.
labelcurrent label, string that contains all labels on the path from 'start' to 's'.
startstart state for the depth-first traversal of the graph.
scurrent state in the depth-first traversal

Definition at line 2106 of file regex_internal.c.

2111 {
2112  struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2113  struct REGEX_INTERNAL_Transition *t;
2114  char *new_label;
2115 
2116  if (depth == ctx->stride)
2117  {
2119  t->label = GNUNET_strdup (label);
2120  t->to_state = s;
2121  t->from_state = start;
2122  GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2123  ctx->transitions_tail,
2124  t);
2125  }
2126  else
2127  {
2128  for (t = s->transitions_head; NULL != t; t = t->next)
2129  {
2130  /* Do not consider self-loops, because it end's up in too many
2131  * transitions */
2132  if (t->to_state == t->from_state)
2133  continue;
2134 
2135  if (NULL != label)
2136  {
2137  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2138  }
2139  else
2140  new_label = GNUNET_strdup (t->label);
2141 
2143  (depth + 1),
2144  new_label,
2145  start,
2146  t->to_state);
2147  }
2148  }
2149  GNUNET_free (label);
2150 }
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
Context for adding strided transitions to a DFA.

References ctx, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_strdup, REGEX_INTERNAL_Transition::label, GNUNET_SCHEDULER_Task::next, start, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_add_multi_strides().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_add_multi_strides()

static void dfa_add_multi_strides ( void *  cls,
const unsigned int  count,
struct REGEX_INTERNAL_State s 
)
static

Function called for each state in the DFA.

Starts a traversal of depth set in context starting from state 's'.

Parameters
clscontext.
countnot used.
scurrent state.

Definition at line 2162 of file regex_internal.c.

2165 {
2166  dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2167 }

References dfa_add_multi_strides_helper().

Referenced by REGEX_INTERNAL_dfa_add_multi_strides().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_dfa_add_multi_strides()

void REGEX_INTERNAL_dfa_add_multi_strides ( struct REGEX_INTERNAL_Context regex_ctx,
struct REGEX_INTERNAL_Automaton dfa,
const unsigned int  stride_len 
)

Adds multi-strided transitions to the given 'dfa'.

Parameters
regex_ctxregex context needed to add transitions to the automaton.
dfaDFA to which the multi strided transitions should be added.
stride_lenlength of the strides.

Definition at line 2178 of file regex_internal.c.

2181 {
2182  struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2183  struct REGEX_INTERNAL_Transition *t;
2184  struct REGEX_INTERNAL_Transition *t_next;
2185 
2186  if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2187  return;
2188 
2189  /* Compute the new transitions of given stride_len */
2191  dfa->start,
2192  NULL,
2193  NULL,
2195  &ctx);
2196 
2197  /* Add all the new transitions to the automaton. */
2198  for (t = ctx.transitions_head; NULL != t; t = t_next)
2199  {
2200  t_next = t->next;
2201  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2202  GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2203  GNUNET_free (t->label);
2204  GNUNET_free (t);
2205  }
2206 
2207  /* Mark this automaton as multistrided */
2208  dfa->is_multistrided = GNUNET_YES;
2209 }
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.

References ctx, dfa_add_multi_strides(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_YES, REGEX_INTERNAL_Automaton::is_multistrided, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, state_add_transition(), and t.

Here is the call graph for this function:

◆ dfa_compress_paths_helper()

void dfa_compress_paths_helper ( struct REGEX_INTERNAL_Automaton dfa,
struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State cur,
char *  label,
unsigned int  max_len,
struct REGEX_INTERNAL_Transition **  transitions_head,
struct REGEX_INTERNAL_Transition **  transitions_tail 
)

Recursive Helper function for DFA path compression.

Does DFS on the DFA graph and adds new transitions to the given transitions DLL and marks states that should be removed by setting state->contained to GNUNET_YES.

Parameters
dfaDFA for which the paths should be compressed.
startstarting state for linear path search.
curcurrent state in the recursive DFS.
labelcurrent label (string of traversed labels).
max_lenmaximal path compression length.
transitions_headtransitions DLL.
transitions_tailtransitions DLL.

Definition at line 2226 of file regex_internal.c.

2233 {
2234  struct REGEX_INTERNAL_Transition *t;
2235  char *new_label;
2236 
2237 
2238  if ((NULL != label) &&
2239  (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2240  cur->accepting) ||
2241  (GNUNET_YES == cur->marked) ) ||
2242  ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2243  label))) ||
2244  ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2245  label)))))
2246  {
2248  t->label = GNUNET_strdup (label);
2249  t->to_state = cur;
2250  t->from_state = start;
2251  GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2252 
2253  if (GNUNET_NO == cur->marked)
2254  {
2256  cur,
2257  cur,
2258  NULL,
2259  max_len,
2260  transitions_head,
2261  transitions_tail);
2262  }
2263  return;
2264  }
2265  else if (cur != start)
2266  cur->contained = GNUNET_YES;
2267 
2268  if ((GNUNET_YES == cur->marked) && (cur != start))
2269  return;
2270 
2271  cur->marked = GNUNET_YES;
2272 
2273 
2274  for (t = cur->transitions_head; NULL != t; t = t->next)
2275  {
2276  if (NULL != label)
2277  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2278  else
2279  new_label = GNUNET_strdup (t->label);
2280 
2281  if (t->to_state != cur)
2282  {
2284  start,
2285  t->to_state,
2286  new_label,
2287  max_len,
2288  transitions_head,
2289  transitions_tail);
2290  }
2291  GNUNET_free (new_label);
2292  }
2293 }
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
unsigned int incoming_transition_count
Number of incoming transitions.
int contained
Marking the state as contained.

References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, GNUNET_asprintf(), GNUNET_CONTAINER_DLL_insert, GNUNET_free, GNUNET_new, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_strdup, GNUNET_YES, REGEX_INTERNAL_State::incoming_transition_count, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_State::marked, GNUNET_SCHEDULER_Task::next, start, REGEX_INTERNAL_Automaton::start, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by dfa_compress_paths().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ dfa_compress_paths()

static void dfa_compress_paths ( struct REGEX_INTERNAL_Context regex_ctx,
struct REGEX_INTERNAL_Automaton dfa,
unsigned int  max_len 
)
static

Compress paths in the given 'dfa'.

Linear paths like 0->1->2->3 will be compressed to 0->3 by combining transitions.

Parameters
regex_ctxcontext for adding new transitions.
dfaDFA representation, will directly modify the given DFA.
max_lenmaximal length of the compressed paths.

Definition at line 2305 of file regex_internal.c.

2308 {
2309  struct REGEX_INTERNAL_State *s;
2310  struct REGEX_INTERNAL_State *s_next;
2311  struct REGEX_INTERNAL_Transition *t;
2312  struct REGEX_INTERNAL_Transition *t_next;
2313  struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2314  struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2315 
2316  if (NULL == dfa)
2317  return;
2318 
2319  /* Count the incoming transitions on each state. */
2320  for (s = dfa->states_head; NULL != s; s = s->next)
2321  {
2322  for (t = s->transitions_head; NULL != t; t = t->next)
2323  {
2324  if (NULL != t->to_state)
2325  t->to_state->incoming_transition_count++;
2326  }
2327  }
2328 
2329  /* Unmark all states. */
2330  for (s = dfa->states_head; NULL != s; s = s->next)
2331  {
2332  s->marked = GNUNET_NO;
2333  s->contained = GNUNET_NO;
2334  }
2335 
2336  /* Add strides and mark states that can be deleted. */
2338  dfa->start,
2339  dfa->start,
2340  NULL,
2341  max_len,
2342  &transitions_head,
2343  &transitions_tail);
2344 
2345  /* Add all the new transitions to the automaton. */
2346  for (t = transitions_head; NULL != t; t = t_next)
2347  {
2348  t_next = t->next;
2349  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2350  GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2351  GNUNET_free (t->label);
2352  GNUNET_free (t);
2353  }
2354 
2355  /* Remove marked states (including their incoming and outgoing transitions). */
2356  for (s = dfa->states_head; NULL != s; s = s_next)
2357  {
2358  s_next = s->next;
2359  if (GNUNET_YES == s->contained)
2360  automaton_remove_state (dfa, s);
2361  }
2362 }

References automaton_remove_state(), REGEX_INTERNAL_State::contained, dfa_compress_paths_helper(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_NO, GNUNET_YES, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::next, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, t, and REGEX_INTERNAL_State::transitions_head.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_fragment_create()

static struct REGEX_INTERNAL_Automaton* nfa_fragment_create ( struct REGEX_INTERNAL_State start,
struct REGEX_INTERNAL_State end 
)
static

Creates a new NFA fragment.

Needs to be cleared using automaton_fragment_clear.

Parameters
startstarting state
endend state
Returns
new NFA fragment

Definition at line 2375 of file regex_internal.c.

2377 {
2378  struct REGEX_INTERNAL_Automaton *n;
2379 
2380  n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2381 
2382  n->type = NFA;
2383  n->start = NULL;
2384  n->end = NULL;
2385  n->state_count = 0;
2386 
2387  if ((NULL == start) || (NULL == end))
2388  return n;
2389 
2390  automaton_add_state (n, end);
2392 
2393  n->state_count = 2;
2394 
2395  n->start = start;
2396  n->end = end;
2397 
2398  return n;
2399 }
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
@ NFA
Automaton representation.

References automaton_add_state(), end, REGEX_INTERNAL_Automaton::end, GNUNET_new, NFA, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, and REGEX_INTERNAL_Automaton::type.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_states()

static void nfa_add_states ( struct REGEX_INTERNAL_Automaton n,
struct REGEX_INTERNAL_State states_head,
struct REGEX_INTERNAL_State states_tail 
)
static

Adds a list of states to the given automaton 'n'.

Parameters
nautomaton to which the states should be added
states_headhead of the DLL of states
states_tailtail of the DLL of states

Definition at line 2410 of file regex_internal.c.

2413 {
2414  struct REGEX_INTERNAL_State *s;
2415 
2416  if ((NULL == n) || (NULL == states_head))
2417  {
2418  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2419  return;
2420  }
2421 
2422  if (NULL == n->states_head)
2423  {
2424  n->states_head = states_head;
2425  n->states_tail = states_tail;
2426  return;
2427  }
2428 
2429  if (NULL != states_head)
2430  {
2431  n->states_tail->next = states_head;
2432  n->states_tail = states_tail;
2433  }
2434 
2435  for (s = states_head; NULL != s; s = s->next)
2436  n->state_count++;
2437 }

References GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by nfa_add_alternation(), nfa_add_concatenation(), nfa_add_question_op(), and nfa_add_star_op().

Here is the caller graph for this function:

◆ nfa_state_create()

static struct REGEX_INTERNAL_State* nfa_state_create ( struct REGEX_INTERNAL_Context ctx,
int  accepting 
)
static

Creates a new NFA state.

Needs to be freed using automaton_destroy_state.

Parameters
ctxcontext
acceptingis it an accepting state or not
Returns
new NFA state

Definition at line 2449 of file regex_internal.c.

2450 {
2451  struct REGEX_INTERNAL_State *s;
2452 
2453  s = GNUNET_new (struct REGEX_INTERNAL_State);
2454  s->id = ctx->state_id++;
2455  s->accepting = accepting;
2456  s->marked = GNUNET_NO;
2457  s->contained = 0;
2458  s->index = -1;
2459  s->lowlink = -1;
2460  s->scc_id = 0;
2461  s->name = NULL;
2462  GNUNET_asprintf (&s->name, "s%i", s->id);
2463 
2464  return s;
2465 }
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).

References REGEX_INTERNAL_State::accepting, REGEX_INTERNAL_State::contained, ctx, GNUNET_asprintf(), GNUNET_new, GNUNET_NO, REGEX_INTERNAL_State::id, REGEX_INTERNAL_State::index, REGEX_INTERNAL_State::lowlink, REGEX_INTERNAL_State::marked, REGEX_INTERNAL_State::name, and REGEX_INTERNAL_State::scc_id.

Referenced by nfa_add_alternation(), nfa_add_label(), nfa_add_question_op(), and nfa_add_star_op().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_closure_set_create()

static void nfa_closure_set_create ( struct REGEX_INTERNAL_StateSet ret,
struct REGEX_INTERNAL_Automaton nfa,
struct REGEX_INTERNAL_StateSet states,
const char *  label 
)
static

Calculates the closure set for the given set of states.

Parameters
retset to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
nfathe NFA containing 's'
stateslist of states on which to base the closure on
labeltransitioning label for which to base the closure on, pass NULL for epsilon transition

Definition at line 2478 of file regex_internal.c.

2482 {
2483  struct REGEX_INTERNAL_State *s;
2484  unsigned int i;
2485  struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2486  struct REGEX_INTERNAL_State *clsstate;
2487  struct REGEX_INTERNAL_State *currentstate;
2488  struct REGEX_INTERNAL_Transition *ctran;
2489 
2490  memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2491  if (NULL == states)
2492  return;
2493 
2494  for (i = 0; i < states->off; i++)
2495  {
2496  s = states->states[i];
2497 
2498  /* Add start state to closure only for epsilon closure */
2499  if (NULL == label)
2500  state_set_append (ret, s);
2501 
2502  /* initialize work stack */
2503  cls_stack.head = NULL;
2504  cls_stack.tail = NULL;
2505  GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2506  cls_stack.len = 1;
2507 
2508  while (NULL != (currentstate = cls_stack.tail))
2509  {
2511  cls_stack.head,
2512  cls_stack.tail,
2513  currentstate);
2514  cls_stack.len--;
2515  for (ctran = currentstate->transitions_head; NULL != ctran;
2516  ctran = ctran->next)
2517  {
2518  if (NULL == (clsstate = ctran->to_state))
2519  continue;
2520  if (0 != clsstate->contained)
2521  continue;
2522  if (0 != nullstrcmp (label, ctran->label))
2523  continue;
2524  state_set_append (ret, clsstate);
2526  cls_stack.head,
2527  cls_stack.tail,
2528  clsstate);
2529  cls_stack.len++;
2530  clsstate->contained = 1;
2531  }
2532  }
2533  }
2534  for (i = 0; i < ret->off; i++)
2535  ret->states[i]->contained = 0;
2536 
2537  if (ret->off > 1)
2538  qsort (ret->states,
2539  ret->off,
2540  sizeof(struct REGEX_INTERNAL_State *),
2541  &state_compare);
2542 }
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
Set of states using MDLL API.

References REGEX_INTERNAL_State::contained, GNUNET_CONTAINER_MDLL_insert, GNUNET_CONTAINER_MDLL_insert_tail, GNUNET_CONTAINER_MDLL_remove, REGEX_INTERNAL_StateSet_MDLL::head, REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_StateSet_MDLL::len, REGEX_INTERNAL_Transition::next, nullstrcmp(), REGEX_INTERNAL_StateSet::off, ret, state_compare(), state_set_append(), REGEX_INTERNAL_StateSet::states, REGEX_INTERNAL_StateSet_MDLL::tail, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by construct_dfa_states(), evaluate_nfa(), and REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_concatenation()

static void nfa_add_concatenation ( struct REGEX_INTERNAL_Context ctx)
static

Pops two NFA fragments (a, b) from the stack and concatenates them (ab)

Parameters
ctxcontext

Definition at line 2551 of file regex_internal.c.

2552 {
2553  struct REGEX_INTERNAL_Automaton *a;
2554  struct REGEX_INTERNAL_Automaton *b;
2555  struct REGEX_INTERNAL_Automaton *new_nfa;
2556 
2557  b = ctx->stack_tail;
2558  GNUNET_assert (NULL != b);
2559  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2560  a = ctx->stack_tail;
2561  GNUNET_assert (NULL != a);
2562  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2563 
2564  state_add_transition (ctx, a->end, NULL, b->start);
2565  a->end->accepting = 0;
2566  b->end->accepting = 1;
2567 
2568  new_nfa = nfa_fragment_create (NULL, NULL);
2569  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2570  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2571  new_nfa->start = a->start;
2572  new_nfa->end = b->end;
2573  new_nfa->state_count += a->state_count + b->state_count;
2576 
2577  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2578 }
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_star_op()

static void nfa_add_star_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops a NFA fragment from the stack (a) and adds a new fragment (a*)

Parameters
ctxcontext

Definition at line 2587 of file regex_internal.c.

2588 {
2589  struct REGEX_INTERNAL_Automaton *a;
2590  struct REGEX_INTERNAL_Automaton *new_nfa;
2591  struct REGEX_INTERNAL_State *start;
2592  struct REGEX_INTERNAL_State *end;
2593 
2594  a = ctx->stack_tail;
2595 
2596  if (NULL == a)
2597  {
2598  GNUNET_log (
2600  "nfa_add_star_op failed, because there was no element on the stack");
2601  return;
2602  }
2603 
2604  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2605 
2606  start = nfa_state_create (ctx, 0);
2607  end = nfa_state_create (ctx, 1);
2608 
2609  state_add_transition (ctx, start, NULL, a->start);
2610  state_add_transition (ctx, start, NULL, end);
2611  state_add_transition (ctx, a->end, NULL, a->start);
2612  state_add_transition (ctx, a->end, NULL, end);
2613 
2614  a->end->accepting = 0;
2615  end->accepting = 1;
2616 
2617  new_nfa = nfa_fragment_create (start, end);
2618  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2620 
2621  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2622 }
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_plus_op()

static void nfa_add_plus_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops an NFA fragment (a) from the stack and adds a new fragment (a+)

Parameters
ctxcontext

Definition at line 2631 of file regex_internal.c.

2632 {
2633  struct REGEX_INTERNAL_Automaton *a;
2634 
2635  a = ctx->stack_tail;
2636 
2637  if (NULL == a)
2638  {
2639  GNUNET_log (
2641  "nfa_add_plus_op failed, because there was no element on the stack");
2642  return;
2643  }
2644 
2645  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2646 
2647  state_add_transition (ctx, a->end, NULL, a->start);
2648 
2649  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2650 }

References ctx, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and state_add_transition().

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_question_op()

static void nfa_add_question_op ( struct REGEX_INTERNAL_Context ctx)
static

Pops an NFA fragment (a) from the stack and adds a new fragment (a?)

Parameters
ctxcontext

Definition at line 2659 of file regex_internal.c.

2660 {
2661  struct REGEX_INTERNAL_Automaton *a;
2662  struct REGEX_INTERNAL_Automaton *new_nfa;
2663  struct REGEX_INTERNAL_State *start;
2664  struct REGEX_INTERNAL_State *end;
2665 
2666  a = ctx->stack_tail;
2667  if (NULL == a)
2668  {
2669  GNUNET_log (
2671  "nfa_add_question_op failed, because there was no element on the stack");
2672  return;
2673  }
2674 
2675  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2676 
2677  start = nfa_state_create (ctx, 0);
2678  end = nfa_state_create (ctx, 1);
2679 
2680  state_add_transition (ctx, start, NULL, a->start);
2681  state_add_transition (ctx, start, NULL, end);
2682  state_add_transition (ctx, a->end, NULL, end);
2683 
2684  a->end->accepting = 0;
2685 
2686  new_nfa = nfa_fragment_create (start, end);
2687  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2688  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2690 }

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_alternation()

static void nfa_add_alternation ( struct REGEX_INTERNAL_Context ctx)
static

Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a and b (a|b)

Parameters
ctxcontext

Definition at line 2700 of file regex_internal.c.

2701 {
2702  struct REGEX_INTERNAL_Automaton *a;
2703  struct REGEX_INTERNAL_Automaton *b;
2704  struct REGEX_INTERNAL_Automaton *new_nfa;
2705  struct REGEX_INTERNAL_State *start;
2706  struct REGEX_INTERNAL_State *end;
2707 
2708  b = ctx->stack_tail;
2709  GNUNET_assert (NULL != b);
2710  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2711  a = ctx->stack_tail;
2712  GNUNET_assert (NULL != a);
2713  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2714 
2715  start = nfa_state_create (ctx, 0);
2716  end = nfa_state_create (ctx, 1);
2717  state_add_transition (ctx, start, NULL, a->start);
2718  state_add_transition (ctx, start, NULL, b->start);
2719 
2720  state_add_transition (ctx, a->end, NULL, end);
2721  state_add_transition (ctx, b->end, NULL, end);
2722 
2723  a->end->accepting = 0;
2724  b->end->accepting = 0;
2725  end->accepting = 1;
2726 
2727  new_nfa = nfa_fragment_create (start, end);
2728  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2729  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2732 
2733  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2734 }

References REGEX_INTERNAL_State::accepting, automaton_fragment_clear(), ctx, end, REGEX_INTERNAL_Automaton::end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, GNUNET_CONTAINER_DLL_remove, nfa_add_states(), nfa_fragment_create(), nfa_state_create(), start, REGEX_INTERNAL_Automaton::start, state_add_transition(), REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nfa_add_label()

static void nfa_add_label ( struct REGEX_INTERNAL_Context ctx,
const char *  label 
)
static

Adds a new nfa fragment to the stack.

Parameters
ctxcontext
labellabel for nfa transition

Definition at line 2744 of file regex_internal.c.

2745 {
2746  struct REGEX_INTERNAL_Automaton *n;
2747  struct REGEX_INTERNAL_State *start;
2748  struct REGEX_INTERNAL_State *end;
2749 
2750  GNUNET_assert (NULL != ctx);
2751 
2752  start = nfa_state_create (ctx, 0);
2753  end = nfa_state_create (ctx, 1);
2754  state_add_transition (ctx, start, label, end);
2755  n = nfa_fragment_create (start, end);
2756  GNUNET_assert (NULL != n);
2757  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2758 }

References ctx, end, GNUNET_assert, GNUNET_CONTAINER_DLL_insert_tail, nfa_fragment_create(), nfa_state_create(), start, and state_add_transition().

Referenced by REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_context_init()

static void REGEX_INTERNAL_context_init ( struct REGEX_INTERNAL_Context ctx)
static

Initialize a new context.

Parameters
ctxcontext

Definition at line 2767 of file regex_internal.c.

2768 {
2769  if (NULL == ctx)
2770  {
2771  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2772  return;
2773  }
2774  ctx->state_id = 0;
2775  ctx->transition_id = 0;
2776  ctx->stack_head = NULL;
2777  ctx->stack_tail = NULL;
2778 }

References ctx, GNUNET_ERROR_TYPE_ERROR, and GNUNET_log.

Referenced by REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().

Here is the caller graph for this function:

◆ REGEX_INTERNAL_construct_nfa()

struct REGEX_INTERNAL_Automaton* REGEX_INTERNAL_construct_nfa ( const char *  regex,
const size_t  len 
)

Construct an NFA by parsing the regex string of length 'len'.

Parameters
regexregular expression string
lenlength of the string
Returns
NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton

Definition at line 2790 of file regex_internal.c.

2791 {
2792  struct REGEX_INTERNAL_Context ctx;
2793  struct REGEX_INTERNAL_Automaton *nfa;
2794  const char *regexp;
2795  char curlabel[2];
2796  char *error_msg;
2797  unsigned int count;
2798  unsigned int altcount;
2799  unsigned int atomcount;
2800  unsigned int poff;
2801  unsigned int psize;
2802 
2803  struct
2804  {
2805  int altcount;
2806  int atomcount;
2807  } *p;
2808 
2809  if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2810  {
2812  "Could not parse regex. Empty regex string provided.\n");
2813 
2814  return NULL;
2815  }
2817 
2818  regexp = regex;
2819  curlabel[1] = '\0';
2820  p = NULL;
2821  error_msg = NULL;
2822  altcount = 0;
2823  atomcount = 0;
2824  poff = 0;
2825  psize = 0;
2826 
2827  for (count = 0; count < len && *regexp; count++, regexp++)
2828  {
2829  switch (*regexp)
2830  {
2831  case '(':
2832  if (atomcount > 1)
2833  {
2834  --atomcount;
2836  }
2837  if (poff == psize)
2838  GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2839  p[poff].altcount = altcount;
2840  p[poff].atomcount = atomcount;
2841  poff++;
2842  altcount = 0;
2843  atomcount = 0;
2844  break;
2845 
2846  case '|':
2847  if (0 == atomcount)
2848  {
2849  error_msg = "Cannot append '|' to nothing";
2850  goto error;
2851  }
2852  while (--atomcount > 0)
2854  altcount++;
2855  break;
2856 
2857  case ')':
2858  if (0 == poff)
2859  {
2860  error_msg = "Missing opening '('";
2861  goto error;
2862  }
2863  if (0 == atomcount)
2864  {
2865  /* Ignore this: "()" */
2866  poff--;
2867  altcount = p[poff].altcount;
2868  atomcount = p[poff].atomcount;
2869  break;
2870  }
2871  while (--atomcount > 0)
2873  for (; altcount > 0; altcount--)
2875  poff--;
2876  altcount = p[poff].altcount;
2877  atomcount = p[poff].atomcount;
2878  atomcount++;
2879  break;
2880 
2881  case '*':
2882  if (atomcount == 0)
2883  {
2884  error_msg = "Cannot append '*' to nothing";
2885  goto error;
2886  }
2887  nfa_add_star_op (&ctx);
2888  break;
2889 
2890  case '+':
2891  if (atomcount == 0)
2892  {
2893  error_msg = "Cannot append '+' to nothing";
2894  goto error;
2895  }
2896  nfa_add_plus_op (&ctx);
2897  break;
2898 
2899  case '?':
2900  if (atomcount == 0)
2901  {
2902  error_msg = "Cannot append '?' to nothing";
2903  goto error;
2904  }
2906  break;
2907 
2908  default:
2909  if (atomcount > 1)
2910  {
2911  --atomcount;
2913  }
2914  curlabel[0] = *regexp;
2915  nfa_add_label (&ctx, curlabel);
2916  atomcount++;
2917  break;
2918  }
2919  }
2920  if (0 != poff)
2921  {
2922  error_msg = "Unbalanced parenthesis";
2923  goto error;
2924  }
2925  while (--atomcount > 0)
2927  for (; altcount > 0; altcount--)
2929 
2930  GNUNET_array_grow (p, psize, 0);
2931 
2932  nfa = ctx.stack_tail;
2933  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2934 
2935  if (NULL != ctx.stack_head)
2936  {
2937  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2938  goto error;
2939  }
2940 
2941  /* Remember the regex that was used to generate this NFA */
2942  nfa->regex = GNUNET_strdup (regex);
2943 
2944  /* create depth-first numbering of the states for pretty printing */
2946  NULL,
2947  NULL,
2948  NULL,
2949  &number_states,
2950  NULL);
2951 
2952  /* No multistriding added so far */
2953  nfa->is_multistrided = GNUNET_NO;
2954 
2955  return nfa;
2956 
2957 error:
2958  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2959  if (NULL != error_msg)
2960  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2961 
2962  GNUNET_free (p);
2963 
2964  while (NULL != (nfa = ctx.stack_head))
2965  {
2966  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2968  }
2969 
2970  return NULL;
2971 }
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...

References ctx, GNUNET_array_grow, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_NO, GNUNET_strdup, REGEX_INTERNAL_Automaton::is_multistrided, len, nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), number_states(), p, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_automaton_traverse(), and REGEX_INTERNAL_context_init().

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ construct_dfa_states()

static void construct_dfa_states ( struct REGEX_INTERNAL_Context ctx,
struct REGEX_INTERNAL_Automaton nfa,
struct REGEX_INTERNAL_Automaton dfa,
struct REGEX_INTERNAL_State dfa_state 
)
static

Create DFA states based on given 'nfa' and starting with 'dfa_state'.

Parameters
ctxcontext.
nfaNFA automaton.
dfaDFA automaton.
dfa_statecurrent dfa state, pass epsilon closure of first nfa state for starting.

Definition at line 2984 of file regex_internal.c.

2988 {
2989  struct REGEX_INTERNAL_Transition *ctran;
2990  struct REGEX_INTERNAL_State *new_dfa_state;
2991  struct REGEX_INTERNAL_State *state_contains;
2992  struct REGEX_INTERNAL_State *state_iter;
2993  struct REGEX_INTERNAL_StateSet tmp;
2994  struct REGEX_INTERNAL_StateSet nfa_set;
2995 
2996  for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2997  {
2998  if ((NULL == ctran->label) || (NULL != ctran->to_state) )
2999  continue;
3000 
3001  nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3002  nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3003  state_set_clear (&tmp);
3004 
3005  state_contains = NULL;
3006  for (state_iter = dfa->states_head; NULL != state_iter;
3007  state_iter = state_iter->next)
3008  {
3009  if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3010  {
3011  state_contains = state_iter;
3012  break;
3013  }
3014  }
3015  if (NULL == state_contains)
3016  {
3017  new_dfa_state = dfa_state_create (ctx, &nfa_set);
3018  automaton_add_state (dfa, new_dfa_state);
3019  ctran->to_state = new_dfa_state;
3020  construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3021  }
3022  else
3023  {
3024  ctran->to_state = state_contains;
3025  state_set_clear (&nfa_set);
3026  }
3027  }
3028 }
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.

References automaton_add_state(), ctx, dfa_state_create(), REGEX_INTERNAL_Transition::label, REGEX_INTERNAL_Transition::next, REGEX_INTERNAL_State::next, nfa_closure_set_create(), REGEX_INTERNAL_State::nfa_set, state_set_clear(), state_set_compare(), REGEX_INTERNAL_Automaton::states_head, REGEX_INTERNAL_Transition::to_state, and REGEX_INTERNAL_State::transitions_head.

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_construct_dfa()

struct REGEX_INTERNAL_Automaton* REGEX_INTERNAL_construct_dfa ( const char *  regex,
const size_t  len,
unsigned int  max_path_len 
)

Construct DFA for the given 'regex' of length 'len'.

Path compression means, that for example a DFA o -> a -> b -> c -> o will be compressed to o -> abc -> o. Note that this parameter influences the non-determinism of states of the resulting NFA in the DHT (number of outgoing edges with the same label). For example for an application that stores IPv4 addresses as bitstrings it could make sense to limit the path compression to 4 or 8.

Parameters
regexregular expression string.
lenlength of the regular expression.
max_path_lenlimit the path compression length to the given value. If set to 1, no path compression is applied. Set to 0 for maximal possible path compression (generally not desirable).
Returns
DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy().

Definition at line 3032 of file regex_internal.c.

3035 {
3036  struct REGEX_INTERNAL_Context ctx;
3037  struct REGEX_INTERNAL_Automaton *dfa;
3038  struct REGEX_INTERNAL_Automaton *nfa;
3039  struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3040  struct REGEX_INTERNAL_StateSet singleton_set;
3041 
3043 
3044  /* Create NFA */
3045  nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3046 
3047  if (NULL == nfa)
3048  {
3050  "Could not create DFA, because NFA creation failed\n");
3051  return NULL;
3052  }
3053 
3054  dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3055  dfa->type = DFA;
3056  dfa->regex = GNUNET_strdup (regex);
3057 
3058  /* Create DFA start state from epsilon closure */
3059  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3060  state_set_append (&singleton_set, nfa->start);
3061  nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3062  state_set_clear (&singleton_set);
3063  dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3064  automaton_add_state (dfa, dfa->start);
3065 
3066  construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3068 
3069  /* Minimize DFA */
3070  if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3071  {
3073  return NULL;
3074  }
3075 
3076  /* Create proofs and hashes for all states */
3077  if (GNUNET_OK != automaton_create_proofs (dfa))
3078  {
3080  return NULL;
3081  }
3082 
3083  /* Compress linear DFA paths */
3084  if (1 != max_path_len)
3085  dfa_compress_paths (&ctx, dfa, max_path_len);
3086 
3087  return dfa;
3088 }
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.

References automaton_add_state(), automaton_create_proofs(), construct_dfa_states(), ctx, DFA, dfa_compress_paths(), dfa_minimize(), dfa_state_create(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_new, GNUNET_OK, GNUNET_strdup, len, nfa_closure_set_create(), REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_context_init(), REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), and REGEX_INTERNAL_Automaton::type.

Referenced by announce_regex(), main(), and REGEX_INTERNAL_announce().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_automaton_destroy()

void REGEX_INTERNAL_automaton_destroy ( struct REGEX_INTERNAL_Automaton a)

Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.

data structure.

Parameters
aautomaton to be destroyed.

Definition at line 3092 of file regex_internal.c.

3093 {
3094  struct REGEX_INTERNAL_State *s;
3095  struct REGEX_INTERNAL_State *next_state;
3096 
3097  if (NULL == a)
3098  return;
3099 
3100  GNUNET_free (a->regex);
3102 
3103  for (s = a->states_head; NULL != s; s = next_state)
3104  {
3105  next_state = s->next;
3108  }
3109 
3110  GNUNET_free (a);
3111 }

References automaton_destroy_state(), REGEX_INTERNAL_Automaton::canonical_regex, GNUNET_CONTAINER_DLL_remove, GNUNET_free, REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_Automaton::states_tail.

Referenced by announce_regex(), main(), REGEX_INTERNAL_announce_cancel(), REGEX_INTERNAL_construct_dfa(), and REGEX_INTERNAL_construct_nfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ evaluate_dfa()

static int evaluate_dfa ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)
static

Evaluates the given string using the given DFA automaton.

Parameters
aautomaton, type must be DFA
stringstring that should be evaluated
Returns
0 if string matches, non-0 otherwise

Definition at line 3123 of file regex_internal.c.

3124 {
3125  const char *strp;
3126  struct REGEX_INTERNAL_State *s;
3127  unsigned int step_len;
3128 
3129  if (DFA != a->type)
3130  {
3132  "Tried to evaluate DFA, but NFA automaton given");
3133  return -1;
3134  }
3135 
3136  s = a->start;
3137 
3138  /* If the string is empty but the starting state is accepting, we accept. */
3139  if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3140  return 0;
3141 
3142  for (strp = string; NULL != strp && *strp; strp += step_len)
3143  {
3144  step_len = dfa_move (&s, strp);
3145 
3146  if (NULL == s)
3147  break;
3148  }
3149 
3150  if ((NULL != s) && s->accepting)
3151  return 0;
3152 
3153  return 1;
3154 }
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.

References REGEX_INTERNAL_State::accepting, DFA, dfa_move(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, REGEX_INTERNAL_Automaton::start, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_eval().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ evaluate_nfa()

static int evaluate_nfa ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)
static

Evaluates the given string using the given NFA automaton.

Parameters
aautomaton, type must be NFA
stringstring that should be evaluated
Returns
0 if string matches, non-0 otherwise

Definition at line 3165 of file regex_internal.c.

3166 {
3167  const char *strp;
3168  char str[2];
3169  struct REGEX_INTERNAL_State *s;
3170  struct REGEX_INTERNAL_StateSet sset;
3171  struct REGEX_INTERNAL_StateSet new_sset;
3172  struct REGEX_INTERNAL_StateSet singleton_set;
3173  unsigned int i;
3174  int result;
3175 
3176  if (NFA != a->type)
3177  {
3179  "Tried to evaluate NFA, but DFA automaton given");
3180  return -1;
3181  }
3182 
3183  /* If the string is empty but the starting state is accepting, we accept. */
3184  if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3185  return 0;
3186 
3187  result = 1;
3188  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3189  state_set_append (&singleton_set, a->start);
3190  nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3191  state_set_clear (&singleton_set);
3192 
3193  str[1] = '\0';
3194  for (strp = string; NULL != strp && *strp; strp++)
3195  {
3196  str[0] = *strp;
3197  nfa_closure_set_create (&new_sset, a, &sset, str);
3198  state_set_clear (&sset);
3199  nfa_closure_set_create (&sset, a, &new_sset, 0);
3200  state_set_clear (&new_sset);
3201  }
3202 
3203  for (i = 0; i < sset.off; i++)
3204  {
3205  s = sset.states[i];
3206  if ((NULL != s) && (s->accepting))
3207  {
3208  result = 0;
3209  break;
3210  }
3211  }
3212 
3213  state_set_clear (&sset);
3214  return result;
3215 }

References REGEX_INTERNAL_State::accepting, GNUNET_ERROR_TYPE_ERROR, GNUNET_log, NFA, nfa_closure_set_create(), REGEX_INTERNAL_StateSet::off, result, REGEX_INTERNAL_Automaton::start, state_set_append(), state_set_clear(), REGEX_INTERNAL_StateSet::states, and REGEX_INTERNAL_Automaton::type.

Referenced by REGEX_INTERNAL_eval().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_eval()

int REGEX_INTERNAL_eval ( struct REGEX_INTERNAL_Automaton a,
const char *  string 
)

Evaluates the given 'string' against the given compiled regex.

Parameters
aautomaton.
stringstring to check.
Returns
0 if string matches, non 0 otherwise.

Definition at line 3219 of file regex_internal.c.

3220 {
3221  int result;
3222 
3223  switch (a->type)
3224  {
3225  case DFA:
3226  result = evaluate_dfa (a, string);
3227  break;
3228 
3229  case NFA:
3230  result = evaluate_nfa (a, string);
3231  break;
3232 
3233  default:
3235  "Evaluating regex failed, automaton has no type!\n");
3237  break;
3238  }
3239 
3240  return result;
3241 }
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.

References DFA, evaluate_dfa(), evaluate_nfa(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_SYSERR, NFA, result, and REGEX_INTERNAL_Automaton::type.

Here is the call graph for this function:

◆ REGEX_INTERNAL_get_canonical_regex()

const char* REGEX_INTERNAL_get_canonical_regex ( struct REGEX_INTERNAL_Automaton a)

Get the canonical regex of the given automaton.

When constructing the automaton a proof is computed for each state, consisting of the regular expression leading to this state. A complete regex for the automaton can be computed by combining these proofs. As of now this function is only useful for testing.

Parameters
aautomaton for which the canonical regex should be returned.
Returns
canonical regex string.

Definition at line 3245 of file regex_internal.c.

3246 {
3247  if (NULL == a)
3248  return NULL;
3249 
3250  return a->canonical_regex;
3251 }

References REGEX_INTERNAL_Automaton::canonical_regex.

◆ REGEX_INTERNAL_get_transition_count()

unsigned int REGEX_INTERNAL_get_transition_count ( struct REGEX_INTERNAL_Automaton a)

Get the number of transitions that are contained in the given automaton.

Parameters
aautomaton for which the number of transitions should be returned.
Returns
number of transitions in the given automaton.

Definition at line 3262 of file regex_internal.c.

3263 {
3264  unsigned int t_count;
3265  struct REGEX_INTERNAL_State *s;
3266 
3267  if (NULL == a)
3268  return 0;
3269 
3270  t_count = 0;
3271  for (s = a->states_head; NULL != s; s = s->next)
3272  t_count += s->transition_count;
3273 
3274  return t_count;
3275 }

References REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::transition_count.

◆ REGEX_INTERNAL_get_first_key()

size_t REGEX_INTERNAL_get_first_key ( const char *  input_string,
size_t  string_len,
struct GNUNET_HashCode key 
)

Get the first key for the given input_string.

This hashes the first x bits of the input_string.

Parameters
input_stringstring.
string_lenlength of the input_string.
keypointer to where to write the hash code.
Returns
number of bits of input_string that have been consumed to construct the key

Definition at line 3289 of file regex_internal.c.

3292 {
3293  size_t size;
3294 
3295  size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3297  if (NULL == input_string)
3298  {
3299  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3300  return 0;
3301  }
3302  GNUNET_CRYPTO_hash (input_string, size, key);
3303 
3304  return size;
3305 }
struct GNUNET_HashCode key
The key used in the DHT.
static unsigned int size
Size of the "table".
Definition: peer.c:68

References GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_ERROR, GNUNET_log, GNUNET_REGEX_INITIAL_BYTES, key, and size.

Referenced by REGEX_INTERNAL_search().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ iterate_initial_edge()

static void iterate_initial_edge ( unsigned int  min_len,
unsigned int  max_len,
char *  consumed_string,
struct REGEX_INTERNAL_State state,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)
static

Recursive function that calls the iterator for each synthetic start state.

Parameters
min_lenminimum length of the path in the graph.
max_lenmaximum length of the path in the graph.
consumed_stringstring consumed by traversing the graph till this state.
statecurrent state of the automaton.
iteratoriterator function called for each edge.
iterator_clsclosure for the iterator function.

Definition at line 3319 of file regex_internal.c.

3325 {
3326  char *temp;
3327  struct REGEX_INTERNAL_Transition *t;
3328  unsigned int num_edges = state->transition_count;
3329  struct REGEX_BLOCK_Edge edges[num_edges];
3330  struct REGEX_BLOCK_Edge edge[1];
3331  struct GNUNET_HashCode hash;
3332  struct GNUNET_HashCode hash_new;
3333  unsigned int cur_len;
3334 
3335  if (NULL != consumed_string)
3336  cur_len = strlen (consumed_string);
3337  else
3338  cur_len = 0;
3339 
3340  if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3341  (cur_len > 0) && (NULL != consumed_string))
3342  {
3343  if (cur_len <= max_len)
3344  {
3345  if ((NULL != state->proof) &&
3346  (0 != strcmp (consumed_string, state->proof)))
3347  {
3348  (void) state_get_edges (state, edges);
3349  GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3351  "Start state for string `%s' is %s\n",
3352  consumed_string,
3353  GNUNET_h2s (&hash));
3354  iterator (iterator_cls,
3355  &hash,
3356  consumed_string,
3357  state->accepting,
3358  num_edges,
3359  edges);
3360  }
3361 
3362  if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3363  (state->transition_count < 1) && (cur_len < max_len))
3364  {
3365  /* Special case for regex consisting of just a string that is shorter than
3366  * max_len */
3367  edge[0].label = &consumed_string[cur_len - 1];
3368  edge[0].destination = state->hash;
3369  temp = GNUNET_strdup (consumed_string);
3370  temp[cur_len - 1] = '\0';
3371  GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3373  "Start state for short string `%s' is %s\n",
3374  temp,
3375  GNUNET_h2s (&hash_new));
3376  iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3377  GNUNET_free (temp);
3378  }
3379  }
3380  else /* cur_len > max_len */
3381  {
3382  /* Case where the concatenated labels are longer than max_len, then split. */
3383  edge[0].label = &consumed_string[max_len];
3384  edge[0].destination = state->hash;
3385  temp = GNUNET_strdup (consumed_string);
3386  temp[max_len] = '\0';
3387  GNUNET_CRYPTO_hash (temp, max_len, &hash);
3389  "Start state at split edge `%s'-`%s` is %s\n",
3390  temp,
3391  edge[0].label,
3392  GNUNET_h2s (&hash_new));
3393  iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3394  GNUNET_free (temp);
3395  }
3396  }
3397 
3398  if (cur_len < max_len)
3399  {
3400  for (t = state->transitions_head; NULL != t; t = t->next)
3401  {
3402  if (NULL != strchr (t->label, (int) '.'))
3403  {
3404  /* Wildcards not allowed during starting states */
3405  GNUNET_break (0);
3406  continue;
3407  }
3408  if (NULL != consumed_string)
3409  GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3410  else
3411  GNUNET_asprintf (&temp, "%s", t->label);
3412  iterate_initial_edge (min_len,
3413  max_len,
3414  temp,
3415  t->to_state,
3416  iterator,
3417  iterator_cls);
3418  GNUNET_free (temp);
3419  }
3420  }
3421 }
static int iterator(void *cls, const struct GNUNET_PeerIdentity *key, void *value)
Iterator over hash map entries.
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
@ GNUNET_ERROR_TYPE_DEBUG
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
A 512-bit hashcode.
Edge representation.

References REGEX_BLOCK_Edge::destination, GNUNET_asprintf(), GNUNET_break, GNUNET_CRYPTO_hash(), GNUNET_ERROR_TYPE_DEBUG, GNUNET_free, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_strdup, GNUNET_YES, iterator(), REGEX_BLOCK_Edge::label, GNUNET_SCHEDULER_Task::next, state, state_get_edges(), and t.

Referenced by REGEX_INTERNAL_iterate_all_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_iterate_all_edges()

void REGEX_INTERNAL_iterate_all_edges ( struct REGEX_INTERNAL_Automaton a,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)

Iterate over all edges starting from start state of automaton 'a'.

Calling iterator for each edge.

Parameters
aautomaton.
iteratoriterator called for each edge.
iterator_clsclosure.

Definition at line 3433 of file regex_internal.c.

3436 {
3437  struct REGEX_INTERNAL_State *s;
3438 
3439  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3442  NULL,
3443  a->start,
3444  iterator,
3445  iterator_cls);
3446  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3447  for (s = a->states_head; NULL != s; s = s->next)
3448  {
3449  struct REGEX_BLOCK_Edge edges[s->transition_count];
3450  unsigned int num_edges;
3451 
3452  num_edges = state_get_edges (s, edges);
3453  if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3454  {
3456  "Creating DFA edges at `%s' under key %s\n",
3457  s->proof,
3458  GNUNET_h2s (&s->hash));
3459  iterator (iterator_cls,
3460  &s->hash,
3461  s->proof,
3462  s->accepting,
3463  num_edges,
3464  edges);
3465  }
3466  s->marked = GNUNET_NO;
3467  }
3468 }
struct GNUNET_HashCode hash
Hash of the state.

References GNUNET_ERROR_TYPE_DEBUG, GNUNET_h2s(), GNUNET_log, GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, iterate_initial_edge(), iterator(), REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::start, state_get_edges(), and REGEX_INTERNAL_Automaton::states_head.

Referenced by announce_regex(), main(), and REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ store_all_states()

static void store_all_states ( void *  cls,
const struct GNUNET_HashCode key,
const char *  proof,
int  accepting,
unsigned int  num_edges,
const struct REGEX_BLOCK_Edge edges 
)
static

Iterator over all edges of a dfa.

Stores all of them in a HashMap for later reachability marking.

Parameters
clsClosure (HashMap)
keyhash for current state.
proofproof for current state
acceptingGNUNET_YES if this is an accepting state, GNUNET_NO if not.
num_edgesnumber of edges leaving current state.
edgesedges leaving current state.

Definition at line 3509 of file regex_internal.c.

3515 {
3516  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3517  struct temporal_state_store *tmp;
3518  size_t edges_size;
3519 
3520  tmp = GNUNET_new (struct temporal_state_store);
3521  tmp->reachable = GNUNET_NO;
3522  tmp->proof = GNUNET_strdup (proof);
3523  tmp->accepting = accepting;
3524  tmp->num_edges = num_edges;
3525  edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3526  tmp->edges = GNUNET_malloc (edges_size);
3527  GNUNET_memcpy (tmp->edges, edges, edges_size);
3530  hm,
3531  key,
3532  tmp,
3534 }
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
Internal representation of the hash map.
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges

References temporal_state_store::accepting, temporal_state_store::edges, GNUNET_assert, GNUNET_CONTAINER_multihashmap_put(), GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST, GNUNET_malloc, GNUNET_memcpy, GNUNET_new, GNUNET_NO, GNUNET_strdup, GNUNET_YES, key, temporal_state_store::num_edges, temporal_state_store::proof, proof, and temporal_state_store::reachable.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ mark_as_reachable()

static void mark_as_reachable ( struct temporal_state_store state,
struct GNUNET_CONTAINER_MultiHashMap hm 
)
static

Mark state as reachable and call recursively on all its edges.

If already marked as reachable, do nothing.

Parameters
stateState to mark as reachable.
hmHashMap which stores all the states indexed by key.

Definition at line 3546 of file regex_internal.c.

3548 {
3549  struct temporal_state_store *child;
3550  unsigned int i;
3551 
3552  if (GNUNET_YES == state->reachable)
3553  /* visited */
3554  return;
3555 
3556  state->reachable = GNUNET_YES;
3557  for (i = 0; i < state->num_edges; i++)
3558  {
3559  child =
3560  GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3561  if (NULL == child)
3562  {
3563  GNUNET_break (0);
3564  continue;
3565  }
3566  mark_as_reachable (child, hm);
3567  }
3568 }
static pid_t child
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.

References child, GNUNET_break, GNUNET_CONTAINER_multihashmap_get(), GNUNET_YES, and state.

Referenced by reachability_iterator().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ reachability_iterator()

static int reachability_iterator ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Iterator over hash map entries to mark the ones that are reachable.

Parameters
clsclosure
keycurrent key code
valuevalue in the hash map
Returns
GNUNET_YES if we should continue to iterate, GNUNET_NO if not.

Definition at line 3581 of file regex_internal.c.

3584 {
3585  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3586  struct temporal_state_store *state = value;
3587 
3588  if (GNUNET_YES == state->reachable)
3589  /* already visited and marked */
3590  return GNUNET_YES;
3591 
3592  if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3593  (GNUNET_NO == state->accepting) )
3594  /* not directly reachable */
3595  return GNUNET_YES;
3596 
3597  mark_as_reachable (state, hm);
3598  return GNUNET_YES;
3599 }
static char * value
Value of the record to add/remove.

References GNUNET_NO, GNUNET_REGEX_INITIAL_BYTES, GNUNET_YES, mark_as_reachable(), state, and value.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ iterate_reachables()

static int iterate_reachables ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Iterator over hash map entries.

Calling the callback on the ones marked as reachables.

Parameters
clsclosure
keycurrent key code
valuevalue in the hash map
Returns
GNUNET_YES if we should continue to iterate, GNUNET_NO if not.

Definition at line 3613 of file regex_internal.c.

3614 {
3615  struct client_iterator *ci = cls;
3616  struct temporal_state_store *state = value;
3617 
3618  if (GNUNET_YES == state->reachable)
3619  {
3620  ci->iterator (ci->iterator_cls,
3621  key,
3622  state->proof,
3623  state->accepting,
3624  state->num_edges,
3625  state->edges);
3626  }
3627  GNUNET_free (state->edges);
3628  GNUNET_free (state->proof);
3629  GNUNET_free (state);
3630  return GNUNET_YES;
3631 }
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator

References GNUNET_free, GNUNET_YES, client_iterator::iterator, client_iterator::iterator_cls, key, state, and value.

Referenced by REGEX_INTERNAL_iterate_reachable_edges().

Here is the caller graph for this function:

◆ REGEX_INTERNAL_iterate_reachable_edges()

void REGEX_INTERNAL_iterate_reachable_edges ( struct REGEX_INTERNAL_Automaton a,
REGEX_INTERNAL_KeyIterator  iterator,
void *  iterator_cls 
)

Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.

Call the iterator for each such edge.

Parameters
aautomaton.
iteratoriterator called for each reachable edge.
iterator_clsclosure.

Definition at line 3635 of file regex_internal.c.

3638 {
3639  struct GNUNET_CONTAINER_MultiHashMap *hm;
3640  struct client_iterator ci;
3641 
3643  ci.iterator = iterator;
3644  ci.iterator_cls = iterator_cls;
3645 
3649 
3651 }
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.

References GNUNET_CONTAINER_multihashmap_create(), GNUNET_CONTAINER_multihashmap_destroy(), GNUNET_CONTAINER_multihashmap_iterate(), GNUNET_NO, iterate_reachables(), client_iterator::iterator, iterator(), client_iterator::iterator_cls, reachability_iterator(), REGEX_INTERNAL_iterate_all_edges(), REGEX_INTERNAL_Automaton::state_count, and store_all_states().

Referenced by main(), and REGEX_INTERNAL_reannounce().

Here is the call graph for this function:
Here is the caller graph for this function: