GNUnet  0.20.0
regex_internal.h File Reference

common internal definitions for regex library. More...

Include dependency graph for regex_internal.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  REGEX_INTERNAL_Transition
 Transition between two states. More...
 
struct  REGEX_INTERNAL_StateSet
 Set of states. More...
 
struct  REGEX_INTERNAL_State
 A state. More...
 
struct  REGEX_INTERNAL_Automaton
 Automaton representation. More...
 
struct  REGEX_INTERNAL_Context
 Context that contains an id counter for states and transitions as well as a DLL of automatons used as a stack for NFA construction. More...
 

Macros

#define ALLOWED_LITERALS    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 char array of literals that are allowed inside a regex (apart from the operators) More...
 

Typedefs

typedef int(* REGEX_INTERNAL_traverse_check) (void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
 Function that gets passed to automaton traversal and is called before each next traversal from state 's' using transition 't' to check if traversal should proceed. More...
 
typedef void(* REGEX_INTERNAL_traverse_action) (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
 Function that is called with each state, when traversing an automaton. More...
 

Enumerations

enum  REGEX_INTERNAL_AutomatonType { NFA , DFA }
 Type of an automaton. More...
 

Functions

struct REGEX_INTERNAL_AutomatonREGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
 Construct an NFA by parsing the regex string of length 'len'. More...
 
void REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
 Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them. More...
 
const char * REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
 Get the canonical regex of the given automaton. More...
 
unsigned int REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
 Get the number of transitions that are contained in the given automaton. More...
 
void REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
 Adds multi-strided transitions to the given 'dfa'. More...
 

Detailed Description

common internal definitions for regex library.

Author
Maximilian Szengel

Definition in file regex_internal.h.

Macro Definition Documentation

◆ ALLOWED_LITERALS

#define ALLOWED_LITERALS    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

char array of literals that are allowed inside a regex (apart from the operators)

Definition at line 42 of file regex_internal.h.

Typedef Documentation

◆ REGEX_INTERNAL_traverse_check

typedef int(* REGEX_INTERNAL_traverse_check) (void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)

Function that gets passed to automaton traversal and is called before each next traversal from state 's' using transition 't' to check if traversal should proceed.

Return GNUNET_NO to stop traversal or GNUNET_YES to continue.

Parameters
clsclosure for the check.
scurrent state in the traversal.
tcurrent transition from state 's' that will be used for the next step.
Returns
GNUNET_YES to proceed traversal, GNUNET_NO to stop.

Definition at line 343 of file regex_internal.h.

◆ REGEX_INTERNAL_traverse_action

typedef void(* REGEX_INTERNAL_traverse_action) (void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)

Function that is called with each state, when traversing an automaton.

Parameters
clsclosure.
countcurrent count of the state, from 0 to a->state_count -1.
sstate.

Definition at line 356 of file regex_internal.h.

Enumeration Type Documentation

◆ REGEX_INTERNAL_AutomatonType

Type of an automaton.

Enumerator
NFA 
DFA 

Definition at line 249 of file regex_internal.h.

250 {
251  NFA,
252  DFA
253 };
@ NFA
@ DFA

Function Documentation

◆ REGEX_INTERNAL_construct_nfa()

struct REGEX_INTERNAL_Automaton* REGEX_INTERNAL_construct_nfa ( const char *  regex,
const size_t  len 
)

Construct an NFA by parsing the regex string of length 'len'.

Parameters
regexregular expression string.
lenlength of the string.
Returns
NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
Parameters
regexregular expression string
lenlength of the string
Returns
NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton

Definition at line 2790 of file regex_internal.c.

2791 {
2792  struct REGEX_INTERNAL_Context ctx;
2793  struct REGEX_INTERNAL_Automaton *nfa;
2794  const char *regexp;
2795  char curlabel[2];
2796  char *error_msg;
2797  unsigned int count;
2798  unsigned int altcount;
2799  unsigned int atomcount;
2800  unsigned int poff;
2801  unsigned int psize;
2802 
2803  struct
2804  {
2805  int altcount;
2806  int atomcount;
2807  } *p;
2808 
2809  if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2810  {
2812  "Could not parse regex. Empty regex string provided.\n");
2813 
2814  return NULL;
2815  }
2817 
2818  regexp = regex;
2819  curlabel[1] = '\0';
2820  p = NULL;
2821  error_msg = NULL;
2822  altcount = 0;
2823  atomcount = 0;
2824  poff = 0;
2825  psize = 0;
2826 
2827  for (count = 0; count < len && *regexp; count++, regexp++)
2828  {
2829  switch (*regexp)
2830  {
2831  case '(':
2832  if (atomcount > 1)
2833  {
2834  --atomcount;
2836  }
2837  if (poff == psize)
2838  GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2839  p[poff].altcount = altcount;
2840  p[poff].atomcount = atomcount;
2841  poff++;
2842  altcount = 0;
2843  atomcount = 0;
2844  break;
2845 
2846  case '|':
2847  if (0 == atomcount)
2848  {
2849  error_msg = "Cannot append '|' to nothing";
2850  goto error;
2851  }
2852  while (--atomcount > 0)
2854  altcount++;
2855  break;
2856 
2857  case ')':
2858  if (0 == poff)
2859  {
2860  error_msg = "Missing opening '('";
2861  goto error;
2862  }
2863  if (0 == atomcount)
2864  {
2865  /* Ignore this: "()" */
2866  poff--;
2867  altcount = p[poff].altcount;
2868  atomcount = p[poff].atomcount;
2869  break;
2870  }
2871  while (--atomcount > 0)
2873  for (; altcount > 0; altcount--)
2875  poff--;
2876  altcount = p[poff].altcount;
2877  atomcount = p[poff].atomcount;
2878  atomcount++;
2879  break;
2880 
2881  case '*':
2882  if (atomcount == 0)
2883  {
2884  error_msg = "Cannot append '*' to nothing";
2885  goto error;
2886  }
2887  nfa_add_star_op (&ctx);
2888  break;
2889 
2890  case '+':
2891  if (atomcount == 0)
2892  {
2893  error_msg = "Cannot append '+' to nothing";
2894  goto error;
2895  }
2896  nfa_add_plus_op (&ctx);
2897  break;
2898 
2899  case '?':
2900  if (atomcount == 0)
2901  {
2902  error_msg = "Cannot append '?' to nothing";
2903  goto error;
2904  }
2906  break;
2907 
2908  default:
2909  if (atomcount > 1)
2910  {
2911  --atomcount;
2913  }
2914  curlabel[0] = *regexp;
2915  nfa_add_label (&ctx, curlabel);
2916  atomcount++;
2917  break;
2918  }
2919  }
2920  if (0 != poff)
2921  {
2922  error_msg = "Unbalanced parenthesis";
2923  goto error;
2924  }
2925  while (--atomcount > 0)
2927  for (; altcount > 0; altcount--)
2929 
2930  GNUNET_array_grow (p, psize, 0);
2931 
2932  nfa = ctx.stack_tail;
2933  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2934 
2935  if (NULL != ctx.stack_head)
2936  {
2937  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2938  goto error;
2939  }
2940 
2941  /* Remember the regex that was used to generate this NFA */
2942  nfa->regex = GNUNET_strdup (regex);
2943 
2944  /* create depth-first numbering of the states for pretty printing */
2946  NULL,
2947  NULL,
2948  NULL,
2949  &number_states,
2950  NULL);
2951 
2952  /* No multistriding added so far */
2953  nfa->is_multistrided = GNUNET_NO;
2954 
2955  return nfa;
2956 
2957 error:
2958  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2959  if (NULL != error_msg)
2960  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2961 
2962  GNUNET_free (p);
2963 
2964  while (NULL != (nfa = ctx.stack_head))
2965  {
2966  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2968  }
2969 
2970  return NULL;
2971 }
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_log(kind,...)
@ GNUNET_NO
@ GNUNET_ERROR_TYPE_ERROR
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_free(ptr)
Wrapper around free.
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
Automaton representation.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...

References ctx, GNUNET_array_grow, GNUNET_CONTAINER_DLL_remove, GNUNET_ERROR_TYPE_ERROR, GNUNET_free, GNUNET_log, GNUNET_NO, GNUNET_strdup, REGEX_INTERNAL_Automaton::is_multistrided, len, nfa_add_alternation(), nfa_add_concatenation(), nfa_add_label(), nfa_add_plus_op(), nfa_add_question_op(), nfa_add_star_op(), number_states(), p, REGEX_INTERNAL_Automaton::regex, REGEX_INTERNAL_automaton_destroy(), REGEX_INTERNAL_automaton_traverse(), and REGEX_INTERNAL_context_init().

Referenced by REGEX_INTERNAL_construct_dfa().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_automaton_traverse()

void REGEX_INTERNAL_automaton_traverse ( const struct REGEX_INTERNAL_Automaton a,
struct REGEX_INTERNAL_State start,
REGEX_INTERNAL_traverse_check  check,
void *  check_cls,
REGEX_INTERNAL_traverse_action  action,
void *  action_cls 
)

Traverses the given automaton using depth-first-search (DFS) from it's start state, visiting all reachable states and calling 'action' on each one of them.

Parameters
aautomaton to be traversed.
startstart state, pass a->start or NULL to traverse the whole automaton.
checkfunction that is checked before advancing on each transition in the DFS.
check_clsclosure for check.
actionaction to be performed on each state.
action_clsclosure for action

Definition at line 505 of file regex_internal.c.

511 {
512  unsigned int count;
513  struct REGEX_INTERNAL_State *s;
514 
515  if ((NULL == a) || (0 == a->state_count))
516  return;
517 
518  int marks[a->state_count];
519 
520  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521  s = s->next, count++)
522  {
523  s->traversal_id = count;
524  marks[s->traversal_id] = GNUNET_NO;
525  }
526 
527  count = 0;
528 
529  if (NULL == start)
530  s = a->start;
531  else
532  s = start;
533 
535  marks,
536  &count,
537  check,
538  check_cls,
539  action,
540  action_cls);
541 }
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
struct REGEX_INTERNAL_State * start
First state of the automaton.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.

References consensus-simulation::action, automaton_state_traverse(), GNUNET_NO, REGEX_INTERNAL_State::next, start, REGEX_INTERNAL_Automaton::start, REGEX_INTERNAL_Automaton::state_count, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::traversal_id.

Referenced by automaton_create_proofs(), dfa_remove_unreachable_states(), REGEX_INTERNAL_construct_nfa(), REGEX_INTERNAL_dfa_add_multi_strides(), and REGEX_TEST_automaton_save_graph().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ REGEX_INTERNAL_get_canonical_regex()

const char* REGEX_INTERNAL_get_canonical_regex ( struct REGEX_INTERNAL_Automaton a)

Get the canonical regex of the given automaton.

When constructing the automaton a proof is computed for each state, consisting of the regular expression leading to this state. A complete regex for the automaton can be computed by combining these proofs. As of now this function is only useful for testing.

Parameters
aautomaton for which the canonical regex should be returned.
Returns
canonical regex string.

Definition at line 3245 of file regex_internal.c.

3246 {
3247  if (NULL == a)
3248  return NULL;
3249 
3250  return a->canonical_regex;
3251 }
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)

References REGEX_INTERNAL_Automaton::canonical_regex.

◆ REGEX_INTERNAL_get_transition_count()

unsigned int REGEX_INTERNAL_get_transition_count ( struct REGEX_INTERNAL_Automaton a)

Get the number of transitions that are contained in the given automaton.

Parameters
aautomaton for which the number of transitions should be returned.
Returns
number of transitions in the given automaton.

Definition at line 3262 of file regex_internal.c.

3263 {
3264  unsigned int t_count;
3265  struct REGEX_INTERNAL_State *s;
3266 
3267  if (NULL == a)
3268  return 0;
3269 
3270  t_count = 0;
3271  for (s = a->states_head; NULL != s; s = s->next)
3272  t_count += s->transition_count;
3273 
3274  return t_count;
3275 }
unsigned int transition_count
Number of transitions from this state to other states.

References REGEX_INTERNAL_State::next, REGEX_INTERNAL_Automaton::states_head, and REGEX_INTERNAL_State::transition_count.

◆ REGEX_INTERNAL_dfa_add_multi_strides()

void REGEX_INTERNAL_dfa_add_multi_strides ( struct REGEX_INTERNAL_Context regex_ctx,
struct REGEX_INTERNAL_Automaton dfa,
const unsigned int  stride_len 
)

Adds multi-strided transitions to the given 'dfa'.

Parameters
regex_ctxregex context needed to add transitions to the automaton.
dfaDFA to which the multi strided transitions should be added.
stride_lenlength of the strides.

Definition at line 2178 of file regex_internal.c.

2181 {
2182  struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2183  struct REGEX_INTERNAL_Transition *t;
2184  struct REGEX_INTERNAL_Transition *t_next;
2185 
2186  if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2187  return;
2188 
2189  /* Compute the new transitions of given stride_len */
2191  dfa->start,
2192  NULL,
2193  NULL,
2195  &ctx);
2196 
2197  /* Add all the new transitions to the automaton. */
2198  for (t = ctx.transitions_head; NULL != t; t = t_next)
2199  {
2200  t_next = t->next;
2201  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2202  GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2203  GNUNET_free (t->label);
2204  GNUNET_free (t);
2205  }
2206 
2207  /* Mark this automaton as multistrided */
2208  dfa->is_multistrided = GNUNET_YES;
2209 }
static struct GNUNET_SCHEDULER_Task * t
Main task.
@ GNUNET_YES
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
Context for adding strided transitions to a DFA.
Transition between two states.

References ctx, dfa_add_multi_strides(), GNUNET_CONTAINER_DLL_remove, GNUNET_free, GNUNET_YES, REGEX_INTERNAL_Automaton::is_multistrided, GNUNET_SCHEDULER_Task::next, REGEX_INTERNAL_automaton_traverse(), REGEX_INTERNAL_Automaton::start, state_add_transition(), and t.

Here is the call graph for this function: