GNUnet  0.11.x
regex_internal.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
26 #include "platform.h"
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
31 
32 
37 #define REGEX_DEBUG_DFA GNUNET_NO
38 
43 {
48 
53 
57  unsigned int len;
58 };
59 
60 
67 static void
70 {
71  if (set->off == set->size)
72  GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73  set->states[set->off++] = state;
74 }
75 
76 
85 static int
86 nullstrcmp (const char *str1, const char *str2)
87 {
88  if ((NULL == str1) != (NULL == str2))
89  return -1;
90  if ((NULL == str1) && (NULL == str2))
91  return 0;
92 
93  return strcmp (str1, str2);
94 }
95 
96 
106 static void
108  struct REGEX_INTERNAL_State *from_state,
109  const char *label,
110  struct REGEX_INTERNAL_State *to_state)
111 {
113  struct REGEX_INTERNAL_Transition *oth;
114 
115  if (NULL == from_state)
116  {
117  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118  return;
119  }
120 
121  /* Do not add duplicate state transitions */
122  for (t = from_state->transitions_head; NULL != t; t = t->next)
123  {
124  if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125  (t->from_state == from_state) )
126  return;
127  }
128 
129  /* sort transitions by label */
130  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131  {
132  if (0 < nullstrcmp (oth->label, label))
133  break;
134  }
135 
137  if (NULL != ctx)
138  t->id = ctx->transition_id++;
139  if (NULL != label)
140  t->label = GNUNET_strdup (label);
141  else
142  t->label = NULL;
143  t->to_state = to_state;
144  t->from_state = from_state;
145 
146  /* Add outgoing transition to 'from_state' */
147  from_state->transition_count++;
149  from_state->transitions_tail,
150  oth,
151  t);
152 }
153 
154 
161 static void
163  struct REGEX_INTERNAL_Transition *transition)
164 {
165  if ((NULL == state) || (NULL == transition))
166  return;
167 
168  if (transition->from_state != state)
169  return;
170 
171  GNUNET_free_non_null (transition->label);
172 
173  state->transition_count--;
175  state->transitions_tail,
176  transition);
177 
178  GNUNET_free (transition);
179 }
180 
181 
192 static int
193 state_compare (const void *a, const void *b)
194 {
195  struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196  struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197 
198  return (*s1)->id - (*s2)->id;
199 }
200 
201 
211 static unsigned int
213 {
215  unsigned int count;
216 
217  if (NULL == s)
218  return 0;
219 
220  count = 0;
221 
222  for (t = s->transitions_head; NULL != t; t = t->next)
223  {
224  if (NULL != t->to_state)
225  {
226  edges[count].label = t->label;
227  edges[count].destination = t->to_state->hash;
228  count++;
229  }
230  }
231  return count;
232 }
233 
234 
243 static int
245  struct REGEX_INTERNAL_StateSet *sset2)
246 {
247  int result;
248  unsigned int i;
249 
250  if ((NULL == sset1) || (NULL == sset2))
251  return 1;
252 
253  result = sset1->off - sset2->off;
254  if (result < 0)
255  return -1;
256  if (result > 0)
257  return 1;
258  for (i = 0; i < sset1->off; i++)
259  if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260  break;
261  return result;
262 }
263 
264 
270 static void
272 {
273  GNUNET_array_grow (set->states, set->size, 0);
274  set->off = 0;
275 }
276 
277 
284 static void
286 {
287  if (NULL == a)
288  return;
289 
290  a->start = NULL;
291  a->end = NULL;
292  a->states_head = NULL;
293  a->states_tail = NULL;
294  a->state_count = 0;
295  GNUNET_free (a);
296 }
297 
298 
304 static void
306 {
308  struct REGEX_INTERNAL_Transition *next_t;
309 
310  if (NULL == s)
311  return;
312 
315  state_set_clear (&s->nfa_set);
316  for (t = s->transitions_head; NULL != t; t = next_t)
317  {
318  next_t = t->next;
320  }
321 
322  GNUNET_free (s);
323 }
324 
325 
334 static void
336  struct REGEX_INTERNAL_State *s)
337 {
338  struct REGEX_INTERNAL_State *s_check;
339  struct REGEX_INTERNAL_Transition *t_check;
340  struct REGEX_INTERNAL_Transition *t_check_next;
341 
342  if ((NULL == a) || (NULL == s))
343  return;
344 
345  /* remove all transitions leading to this state */
346  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347  {
348  for (t_check = s_check->transitions_head; NULL != t_check;
349  t_check = t_check_next)
350  {
351  t_check_next = t_check->next;
352  if (t_check->to_state == s)
353  state_remove_transition (s_check, t_check);
354  }
355  }
356 
357  /* remove state */
359  a->state_count--;
360 
362 }
363 
364 
374 static void
376  struct REGEX_INTERNAL_Automaton *a,
377  struct REGEX_INTERNAL_State *s1,
378  struct REGEX_INTERNAL_State *s2)
379 {
380  struct REGEX_INTERNAL_State *s_check;
381  struct REGEX_INTERNAL_Transition *t_check;
383  struct REGEX_INTERNAL_Transition *t_next;
384  int is_dup;
385 
386  if (s1 == s2)
387  return;
388 
389  /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390  * does not already exists, if it already exists remove transition. */
391  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392  {
393  for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394  {
395  t_next = t_check->next;
396 
397  if (s2 == t_check->to_state)
398  {
399  is_dup = GNUNET_NO;
400  for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401  {
402  if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403  is_dup = GNUNET_YES;
404  }
405  if (GNUNET_NO == is_dup)
406  t_check->to_state = s1;
407  else
408  state_remove_transition (t_check->from_state, t_check);
409  }
410  }
411  }
412 
413  /* 2. Add all transitions from s2 to sX to s1 */
414  for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415  {
416  if (t_check->to_state != s1)
417  state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418  }
419 
420  /* 3. Rename s1 to {s1,s2} */
421 #if REGEX_DEBUG_DFA
422  char *new_name;
423 
424  new_name = s1->name;
425  GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426  GNUNET_free (new_name);
427 #endif
428 
429  /* remove state */
431  a->state_count--;
433 }
434 
435 
443 static void
445  struct REGEX_INTERNAL_State *s)
446 {
448  a->state_count++;
449 }
450 
451 
466 static void
468  int *marks,
469  unsigned int *count,
471  void *check_cls,
473  void *action_cls)
474 {
476 
477  if (GNUNET_YES == marks[s->traversal_id])
478  return;
479 
480  marks[s->traversal_id] = GNUNET_YES;
481 
482  if (NULL != action)
483  action (action_cls, *count, s);
484 
485  (*count)++;
486 
487  for (t = s->transitions_head; NULL != t; t = t->next)
488  {
489  if ((NULL == check) ||
490  ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491  {
493  marks,
494  count,
495  check,
496  check_cls,
497  action,
498  action_cls);
499  }
500  }
501 }
502 
503 
517 void
519  struct REGEX_INTERNAL_State *start,
521  void *check_cls,
523  void *action_cls)
524 {
525  unsigned int count;
526  struct REGEX_INTERNAL_State *s;
527 
528  if ((NULL == a) || (0 == a->state_count))
529  return;
530 
531  int marks[a->state_count];
532 
533  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
534  s = s->next, count++)
535  {
536  s->traversal_id = count;
537  marks[s->traversal_id] = GNUNET_NO;
538  }
539 
540  count = 0;
541 
542  if (NULL == start)
543  s = a->start;
544  else
545  s = start;
546 
548  marks,
549  &count,
550  check,
551  check_cls,
552  action,
553  action_cls);
554 }
555 
556 
561 {
566  char *sbuf;
567 
571  char *abuf;
572 
576  size_t slen;
577 
581  unsigned int blen;
582 
586  int16_t null_flag;
587 
597  int16_t synced;
598 };
599 
600 
609 static int
610 sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
611 {
612  if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
613  return 0;
614  if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
615  return -1;
616  if (s1->slen != s2->slen)
617  return -1;
618  if (0 == s1->slen)
619  return 0;
620  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
621 }
622 
623 
632 static int
633 sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
634 {
635  if (s1->slen != s2->slen)
636  return -1;
637  if (0 == s1->slen)
638  return 0;
639  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
640 }
641 
642 
650 static void
651 sb_realloc (struct StringBuffer *ret, size_t nlen)
652 {
653  char *old;
654 
655  GNUNET_assert (nlen >= ret->slen);
656  old = ret->abuf;
657  ret->abuf = GNUNET_malloc (nlen);
658  ret->blen = nlen;
659  GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
660  ret->sbuf = ret->abuf;
661  GNUNET_free_non_null (old);
662 }
663 
664 
671 static void
672 sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
673 {
674  if (GNUNET_YES == ret->null_flag)
675  ret->slen = 0;
676  ret->null_flag = GNUNET_NO;
677  if (ret->blen < sarg->slen + ret->slen)
678  sb_realloc (ret, ret->blen + sarg->slen + 128);
679  GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
680  ret->slen += sarg->slen;
681 }
682 
683 
690 static void
691 sb_append_cstr (struct StringBuffer *ret, const char *cstr)
692 {
693  size_t cstr_len = strlen (cstr);
694 
695  if (GNUNET_YES == ret->null_flag)
696  ret->slen = 0;
697  ret->null_flag = GNUNET_NO;
698  if (ret->blen < cstr_len + ret->slen)
699  sb_realloc (ret, ret->blen + cstr_len + 128);
700  GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
701  ret->slen += cstr_len;
702 }
703 
704 
715 static void
716 sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
717 {
718  char *temp;
719 
720  if (GNUNET_YES == ret->null_flag)
721  ret->slen = 0;
722  ret->null_flag = GNUNET_NO;
723  temp = GNUNET_malloc (ret->slen + extra_chars + 1);
724  GNUNET_snprintf (temp,
725  ret->slen + extra_chars + 1,
726  format,
727  (int) ret->slen,
728  ret->sbuf);
729  GNUNET_free_non_null (ret->abuf);
730  ret->abuf = temp;
731  ret->sbuf = temp;
732  ret->blen = ret->slen + extra_chars + 1;
733  ret->slen = ret->slen + extra_chars;
734 }
735 
736 
746 static void
748  const char *format,
749  size_t extra_chars,
750  const struct StringBuffer *sarg)
751 {
752  if (ret->blen < sarg->slen + extra_chars + 1)
753  sb_realloc (ret, sarg->slen + extra_chars + 1);
754  ret->null_flag = GNUNET_NO;
755  ret->sbuf = ret->abuf;
756  ret->slen = sarg->slen + extra_chars;
757  GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
758 }
759 
760 
770 static void
772  const char *format,
773  size_t extra_chars,
774  const struct StringBuffer *sarg1,
775  const struct StringBuffer *sarg2)
776 {
777  if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
778  sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
779  ret->null_flag = GNUNET_NO;
780  ret->slen = sarg1->slen + sarg2->slen + extra_chars;
781  ret->sbuf = ret->abuf;
782  GNUNET_snprintf (ret->sbuf,
783  ret->blen,
784  format,
785  (int) sarg1->slen,
786  sarg1->sbuf,
787  (int) sarg2->slen,
788  sarg2->sbuf);
789 }
790 
791 
803 static void
805  const char *format,
806  size_t extra_chars,
807  const struct StringBuffer *sarg1,
808  const struct StringBuffer *sarg2,
809  const struct StringBuffer *sarg3)
810 {
811  if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
812  sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
813  ret->null_flag = GNUNET_NO;
814  ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
815  ret->sbuf = ret->abuf;
816  GNUNET_snprintf (ret->sbuf,
817  ret->blen,
818  format,
819  (int) sarg1->slen,
820  sarg1->sbuf,
821  (int) sarg2->slen,
822  sarg2->sbuf,
823  (int) sarg3->slen,
824  sarg3->sbuf);
825 }
826 
827 
834 static void
835 sb_free (struct StringBuffer *sb)
836 {
837  GNUNET_array_grow (sb->abuf, sb->blen, 0);
838  sb->slen = 0;
839  sb->sbuf = NULL;
840  sb->null_flag = GNUNET_YES;
841 }
842 
843 
850 static void
851 sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
852 
853 {
854  out->null_flag = in->null_flag;
855  if (GNUNET_YES == out->null_flag)
856  return;
857  if (out->blen < in->slen)
858  {
859  GNUNET_array_grow (out->abuf, out->blen, in->slen);
860  }
861  out->sbuf = out->abuf;
862  out->slen = in->slen;
863  GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
864 }
865 
866 
873 static void
874 sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
875 {
876  if (NULL == cstr)
877  {
878  out->null_flag = GNUNET_YES;
879  return;
880  }
881  out->null_flag = GNUNET_NO;
882  out->slen = strlen (cstr);
883  if (out->blen < out->slen)
884  {
885  GNUNET_array_grow (out->abuf, out->blen, out->slen);
886  }
887  out->sbuf = out->abuf;
888  GNUNET_memcpy (out->sbuf, cstr, out->slen);
889 }
890 
891 
900 static int
901 needs_parentheses (const struct StringBuffer *str)
902 {
903  size_t slen;
904  const char *op;
905  const char *cl;
906  const char *pos;
907  const char *end;
908  unsigned int cnt;
909 
910  if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
911  return GNUNET_NO;
912  pos = str->sbuf;
913  if ('(' != pos[0])
914  return GNUNET_YES;
915  end = str->sbuf + slen;
916  cnt = 1;
917  pos++;
918  while (cnt > 0)
919  {
920  cl = memchr (pos, ')', end - pos);
921  if (NULL == cl)
922  {
923  GNUNET_break (0);
924  return GNUNET_YES;
925  }
926  /* while '(' before ')', count opening parens */
927  while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
928  {
929  cnt++;
930  pos = op + 1;
931  }
932  /* got ')' first */
933  cnt--;
934  pos = cl + 1;
935  }
936  return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
937 }
938 
939 
949 static void
951 {
952  size_t slen;
953  const char *pos;
954  const char *end;
955  const char *sbuf;
956  const char *op;
957  const char *cp;
958  unsigned int cnt;
959 
960  if (0)
961  return;
962  sbuf = str->sbuf;
963  if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
964  ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
965  return;
966  cnt = 0;
967  pos = &sbuf[1];
968  end = &sbuf[slen - 1];
969  op = memchr (pos, '(', end - pos);
970  cp = memchr (pos, ')', end - pos);
971  while (NULL != cp)
972  {
973  while ((NULL != op) && (op < cp))
974  {
975  cnt++;
976  pos = op + 1;
977  op = memchr (pos, '(', end - pos);
978  }
979  while ((NULL != cp) && ((NULL == op) || (cp < op)))
980  {
981  if (0 == cnt)
982  return; /* can't strip parens */
983  cnt--;
984  pos = cp + 1;
985  cp = memchr (pos, ')', end - pos);
986  }
987  }
988  if (0 != cnt)
989  {
990  GNUNET_break (0);
991  return;
992  }
993  str->sbuf++;
994  str->slen -= 2;
995 }
996 
997 
1006 static int
1007 has_epsilon (const struct StringBuffer *str)
1008 {
1009  return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
1010  ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1011  (')' == str->sbuf[str->slen - 1]);
1012 }
1013 
1014 
1024 static void
1025 remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1026 {
1027  if (GNUNET_YES == str->null_flag)
1028  {
1029  ret->null_flag = GNUNET_YES;
1030  return;
1031  }
1032  if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1033  (')' == str->sbuf[str->slen - 1]))
1034  {
1035  /* remove epsilon */
1036  if (ret->blen < str->slen - 3)
1037  {
1038  GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1039  }
1040  ret->sbuf = ret->abuf;
1041  ret->slen = str->slen - 3;
1042  GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1043  return;
1044  }
1045  sb_strdup (ret, str);
1046 }
1047 
1048 
1058 static int
1059 sb_strncmp (const struct StringBuffer *str1,
1060  const struct StringBuffer *str2,
1061  size_t n)
1062 {
1063  size_t max;
1064 
1065  if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1066  return -1;
1067  max = GNUNET_MAX (str1->slen, str2->slen);
1068  if (max > n)
1069  max = n;
1070  return memcmp (str1->sbuf, str2->sbuf, max);
1071 }
1072 
1073 
1083 static int
1084 sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1085 {
1086  if (str1->slen < n)
1087  return -1;
1088  return memcmp (str1->sbuf, str2, n);
1089 }
1090 
1091 
1099 static void
1100 sb_init (struct StringBuffer *sb, size_t n)
1101 {
1102  sb->null_flag = GNUNET_NO;
1103  sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1104  sb->blen = n;
1105  sb->slen = 0;
1106 }
1107 
1108 
1118 static int
1119 sb_strkcmp (const struct StringBuffer *str1,
1120  const struct StringBuffer *str2,
1121  size_t k)
1122 {
1123  if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1124  (k > str1->slen) || (str1->slen - k != str2->slen))
1125  return -1;
1126  return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1127 }
1128 
1129 
1138 static void
1139 number_states (void *cls,
1140  const unsigned int count,
1141  struct REGEX_INTERNAL_State *s)
1142 {
1143  struct REGEX_INTERNAL_State **states = cls;
1144 
1145  s->dfs_id = count;
1146  if (NULL != states)
1147  states[count] = s;
1148 }
1149 
1150 
1151 #define PRIS(a) \
1152  ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1153  ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1154 
1155 
1170 static void
1172  const struct StringBuffer *R_last_ik,
1173  const struct StringBuffer *R_last_kk,
1174  const struct StringBuffer *R_last_kj,
1175  struct StringBuffer *R_cur_ij,
1176  struct StringBuffer *R_cur_l,
1177  struct StringBuffer *R_cur_r)
1178 {
1179  struct StringBuffer R_temp_ij;
1180  struct StringBuffer R_temp_ik;
1181  struct StringBuffer R_temp_kj;
1182  struct StringBuffer R_temp_kk;
1183  int eps_check;
1184  int ij_ik_cmp;
1185  int ij_kj_cmp;
1186  int ik_kk_cmp;
1187  int kk_kj_cmp;
1188  int clean_ik_kk_cmp;
1189  int clean_kk_kj_cmp;
1190  size_t length;
1191  size_t length_l;
1192  size_t length_r;
1193 
1194  /*
1195  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1196  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1197  * R_cur_ij = R_cur_l | R_cur_r
1198  * R_cur_l == R^{(k-1)}_{ij}
1199  * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1200  */if ((GNUNET_YES == R_last_ij->null_flag) &&
1201  ((GNUNET_YES == R_last_ik->null_flag) ||
1202  (GNUNET_YES == R_last_kj->null_flag)))
1203  {
1204  /* R^{(k)}_{ij} = N | N */
1205  R_cur_ij->null_flag = GNUNET_YES;
1206  R_cur_ij->synced = GNUNET_NO;
1207  return;
1208  }
1209 
1210  if ((GNUNET_YES == R_last_ik->null_flag) ||
1211  (GNUNET_YES == R_last_kj->null_flag))
1212  {
1213  /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1214  if (GNUNET_YES == R_last_ij->synced)
1215  {
1216  R_cur_ij->synced = GNUNET_YES;
1217  R_cur_ij->null_flag = GNUNET_NO;
1218  return;
1219  }
1220  R_cur_ij->synced = GNUNET_YES;
1221  sb_strdup (R_cur_ij, R_last_ij);
1222  return;
1223  }
1224  R_cur_ij->synced = GNUNET_NO;
1225 
1226  /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1227  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1228 
1229  R_cur_r->null_flag = GNUNET_YES;
1230  R_cur_r->slen = 0;
1231  R_cur_l->null_flag = GNUNET_YES;
1232  R_cur_l->slen = 0;
1233 
1234  /* cache results from strcmp, we might need these many times */
1235  ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1236  ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1237  ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1238  kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1239 
1240  /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1241  * as parentheses, so we can better compare the contents */
1242 
1243  memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1244  memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1245  memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1246  memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1247  remove_epsilon (R_last_ik, &R_temp_ik);
1248  remove_epsilon (R_last_kk, &R_temp_kk);
1249  remove_epsilon (R_last_kj, &R_temp_kj);
1250  remove_parentheses (&R_temp_ik);
1251  remove_parentheses (&R_temp_kk);
1252  remove_parentheses (&R_temp_kj);
1253  clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1254  clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1255 
1256  /* construct R_cur_l (and, if necessary R_cur_r) */
1257  if (GNUNET_YES != R_last_ij->null_flag)
1258  {
1259  /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1260  * as parentheses, so we can better compare the contents */
1261  remove_epsilon (R_last_ij, &R_temp_ij);
1262  remove_parentheses (&R_temp_ij);
1263 
1264  if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1265  (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1266  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1267  {
1268  if (0 == R_temp_ij.slen)
1269  {
1270  R_cur_r->null_flag = GNUNET_NO;
1271  }
1272  else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1273  ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1274  (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1275  {
1276  /*
1277  * a|(e|a)a*(e|a) = a*
1278  * a|(e|a)(e|a)*(e|a) = a*
1279  * (e|a)|aa*a = a*
1280  * (e|a)|aa*(e|a) = a*
1281  * (e|a)|(e|a)a*a = a*
1282  * (e|a)|(e|a)a*(e|a) = a*
1283  * (e|a)|(e|a)(e|a)*(e|a) = a*
1284  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1286  else
1287  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1288  }
1289  else
1290  {
1291  /*
1292  * a|aa*a = a+
1293  * a|(e|a)a*a = a+
1294  * a|aa*(e|a) = a+
1295  * a|(e|a)(e|a)*a = a+
1296  * a|a(e|a)*(e|a) = a+
1297  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1298  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1299  else
1300  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1301  }
1302  }
1303  else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1304  (0 != clean_ik_kk_cmp))
1305  {
1306  /* a|ab*b = ab* */
1307  if (0 == R_last_kk->slen)
1308  sb_strdup (R_cur_r, R_last_ij);
1309  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1310  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1311  else
1312  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1313  R_cur_l->null_flag = GNUNET_YES;
1314  }
1315  else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1316  (0 != clean_kk_kj_cmp))
1317  {
1318  /* a|bb*a = b*a */
1319  if (R_last_kk->slen < 1)
1320  {
1321  sb_strdup (R_cur_r, R_last_kj);
1322  }
1323  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1324  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1325  else
1326  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1327 
1328  R_cur_l->null_flag = GNUNET_YES;
1329  }
1330  else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1331  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1332  {
1333  /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1334  if (needs_parentheses (&R_temp_kk))
1335  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1336  else
1337  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1338  R_cur_l->null_flag = GNUNET_YES;
1339  }
1340  else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1341  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1342  {
1343  /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1344  if (needs_parentheses (&R_temp_kk))
1345  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1346  else
1347  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1348  R_cur_l->null_flag = GNUNET_YES;
1349  }
1350  else
1351  {
1352  sb_strdup (R_cur_l, R_last_ij);
1353  remove_parentheses (R_cur_l);
1354  }
1355  }
1356  else
1357  {
1358  /* we have no left side */
1359  R_cur_l->null_flag = GNUNET_YES;
1360  }
1361 
1362  /* construct R_cur_r, if not already constructed */
1363  if (GNUNET_YES == R_cur_r->null_flag)
1364  {
1365  length = R_temp_kk.slen - R_last_ik->slen;
1366 
1367  /* a(ba)*bx = (ab)+x */
1368  if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1369  (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1370  (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1371  (0 < R_last_ik->slen) &&
1372  (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1373  (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1374  {
1375  struct StringBuffer temp_a;
1376  struct StringBuffer temp_b;
1377 
1378  sb_init (&temp_a, length);
1379  sb_init (&temp_b, R_last_kj->slen - length);
1380 
1381  length_l = length;
1382  temp_a.sbuf = temp_a.abuf;
1383  GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1384  temp_a.slen = length_l;
1385 
1386  length_r = R_last_kj->slen - length;
1387  temp_b.sbuf = temp_b.abuf;
1388  GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1389  temp_b.slen = length_r;
1390 
1391  /* e|(ab)+ = (ab)* */
1392  if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1393  (0 == temp_b.slen))
1394  {
1395  sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1396  sb_free (R_cur_l);
1397  R_cur_l->null_flag = GNUNET_YES;
1398  }
1399  else
1400  {
1401  sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1402  }
1403  sb_free (&temp_a);
1404  sb_free (&temp_b);
1405  }
1406  else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1407  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1408  {
1409  /*
1410  * (e|a)a*(e|a) = a*
1411  * (e|a)(e|a)*(e|a) = a*
1412  */
1413  if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1414  {
1415  if (needs_parentheses (&R_temp_kk))
1416  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1417  else
1418  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1419  }
1420  /* aa*a = a+a */
1421  else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1422  (! has_epsilon (R_last_ik)))
1423  {
1424  if (needs_parentheses (&R_temp_kk))
1425  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1426  else
1427  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1428  }
1429  /*
1430  * (e|a)a*a = a+
1431  * aa*(e|a) = a+
1432  * a(e|a)*(e|a) = a+
1433  * (e|a)a*a = a+
1434  */else
1435  {
1436  eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1437  + has_epsilon (R_last_kj));
1438 
1439  if (1 == eps_check)
1440  {
1441  if (needs_parentheses (&R_temp_kk))
1442  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1443  else
1444  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1445  }
1446  }
1447  }
1448  /*
1449  * aa*b = a+b
1450  * (e|a)(e|a)*b = a*b
1451  */
1452  else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1453  {
1454  if (has_epsilon (R_last_ik))
1455  {
1456  if (needs_parentheses (&R_temp_kk))
1457  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1458  else
1459  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1460  }
1461  else
1462  {
1463  if (needs_parentheses (&R_temp_kk))
1464  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1465  else
1466  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1467  }
1468  }
1469  /*
1470  * ba*a = ba+
1471  * b(e|a)*(e|a) = ba*
1472  */
1473  else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1474  {
1475  if (has_epsilon (R_last_kj))
1476  {
1477  if (needs_parentheses (&R_temp_kk))
1478  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1479  else
1480  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1481  }
1482  else
1483  {
1484  if (needs_parentheses (&R_temp_kk))
1485  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1486  else
1487  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1488  }
1489  }
1490  else
1491  {
1492  if (0 < R_temp_kk.slen)
1493  {
1494  if (needs_parentheses (&R_temp_kk))
1495  {
1496  sb_printf3 (R_cur_r,
1497  "%.*s(%.*s)*%.*s",
1498  3,
1499  R_last_ik,
1500  &R_temp_kk,
1501  R_last_kj);
1502  }
1503  else
1504  {
1505  sb_printf3 (R_cur_r,
1506  "%.*s%.*s*%.*s",
1507  1,
1508  R_last_ik,
1509  &R_temp_kk,
1510  R_last_kj);
1511  }
1512  }
1513  else
1514  {
1515  sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1516  }
1517  }
1518  }
1519  sb_free (&R_temp_ij);
1520  sb_free (&R_temp_ik);
1521  sb_free (&R_temp_kk);
1522  sb_free (&R_temp_kj);
1523 
1524  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1525  {
1526  R_cur_ij->null_flag = GNUNET_YES;
1527  return;
1528  }
1529 
1530  if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1531  {
1532  struct StringBuffer tmp;
1533 
1534  tmp = *R_cur_ij;
1535  *R_cur_ij = *R_cur_l;
1536  *R_cur_l = tmp;
1537  return;
1538  }
1539 
1540  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1541  {
1542  struct StringBuffer tmp;
1543 
1544  tmp = *R_cur_ij;
1545  *R_cur_ij = *R_cur_r;
1546  *R_cur_r = tmp;
1547  return;
1548  }
1549 
1550  if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1551  {
1552  struct StringBuffer tmp;
1553 
1554  tmp = *R_cur_ij;
1555  *R_cur_ij = *R_cur_l;
1556  *R_cur_l = tmp;
1557  return;
1558  }
1559  sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1560 }
1561 
1562 
1574 static int
1576 {
1577  unsigned int n = a->state_count;
1578  struct REGEX_INTERNAL_State *states[n];
1579  struct StringBuffer *R_last;
1580  struct StringBuffer *R_cur;
1581  struct StringBuffer R_cur_r;
1582  struct StringBuffer R_cur_l;
1583  struct StringBuffer *R_swap;
1584  struct REGEX_INTERNAL_Transition *t;
1585  struct StringBuffer complete_regex;
1586  unsigned int i;
1587  unsigned int j;
1588  unsigned int k;
1589 
1590  R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1591  R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1592  if ((NULL == R_last) || (NULL == R_cur))
1593  {
1595  GNUNET_free_non_null (R_cur);
1596  GNUNET_free_non_null (R_last);
1597  return GNUNET_SYSERR;
1598  }
1599 
1600  /* create depth-first numbering of the states, initializes 'state' */
1602  a->start,
1603  NULL,
1604  NULL,
1605  &number_states,
1606  states);
1607 
1608  for (i = 0; i < n; i++)
1609  GNUNET_assert (NULL != states[i]);
1610  for (i = 0; i < n; i++)
1611  for (j = 0; j < n; j++)
1612  R_last[i * n + j].null_flag = GNUNET_YES;
1613 
1614  /* Compute regular expressions of length "1" between each pair of states */
1615  for (i = 0; i < n; i++)
1616  {
1617  for (t = states[i]->transitions_head; NULL != t; t = t->next)
1618  {
1619  j = t->to_state->dfs_id;
1620  if (GNUNET_YES == R_last[i * n + j].null_flag)
1621  {
1622  sb_strdup_cstr (&R_last[i * n + j], t->label);
1623  }
1624  else
1625  {
1626  sb_append_cstr (&R_last[i * n + j], "|");
1627  sb_append_cstr (&R_last[i * n + j], t->label);
1628  }
1629  }
1630  /* add self-loop: i is reachable from i via epsilon-transition */
1631  if (GNUNET_YES == R_last[i * n + i].null_flag)
1632  {
1633  R_last[i * n + i].slen = 0;
1634  R_last[i * n + i].null_flag = GNUNET_NO;
1635  }
1636  else
1637  {
1638  sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1639  }
1640  }
1641  for (i = 0; i < n; i++)
1642  for (j = 0; j < n; j++)
1643  if (needs_parentheses (&R_last[i * n + j]))
1644  sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1645  /* Compute regular expressions of length "k" between each pair of states per
1646  * induction */
1647  memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1648  memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1649  for (k = 0; k < n; k++)
1650  {
1651  for (i = 0; i < n; i++)
1652  {
1653  for (j = 0; j < n; j++)
1654  {
1655  /* Basis for the recursion:
1656  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1657  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1658  */
1659 
1660  /* Create R_cur[i][j] and simplify the expression */
1661  automaton_create_proofs_simplify (&R_last[i * n + j],
1662  &R_last[i * n + k],
1663  &R_last[k * n + k],
1664  &R_last[k * n + j],
1665  &R_cur[i * n + j],
1666  &R_cur_l,
1667  &R_cur_r);
1668  }
1669  }
1670  /* set R_last = R_cur */
1671  R_swap = R_last;
1672  R_last = R_cur;
1673  R_cur = R_swap;
1674  /* clear 'R_cur' for next iteration */
1675  for (i = 0; i < n; i++)
1676  for (j = 0; j < n; j++)
1677  R_cur[i * n + j].null_flag = GNUNET_YES;
1678  }
1679  sb_free (&R_cur_l);
1680  sb_free (&R_cur_r);
1681  /* assign proofs and hashes */
1682  for (i = 0; i < n; i++)
1683  {
1684  if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1685  {
1686  states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1687  R_last[a->start->dfs_id * n + i].slen);
1688  GNUNET_CRYPTO_hash (states[i]->proof,
1689  strlen (states[i]->proof),
1690  &states[i]->hash);
1691  }
1692  }
1693 
1694  /* complete regex for whole DFA: union of all pairs (start state/accepting
1695  * state(s)). */
1696  sb_init (&complete_regex, 16 * n);
1697  for (i = 0; i < n; i++)
1698  {
1699  if (states[i]->accepting)
1700  {
1701  if ((0 == complete_regex.slen) &&
1702  (0 < R_last[a->start->dfs_id * n + i].slen))
1703  {
1704  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1705  }
1706  else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1707  (0 < R_last[a->start->dfs_id * n + i].slen))
1708  {
1709  sb_append_cstr (&complete_regex, "|");
1710  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1711  }
1712  }
1713  }
1714  a->canonical_regex =
1715  GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1716 
1717  /* cleanup */
1718  sb_free (&complete_regex);
1719  for (i = 0; i < n; i++)
1720  for (j = 0; j < n; j++)
1721  {
1722  sb_free (&R_cur[i * n + j]);
1723  sb_free (&R_last[i * n + j]);
1724  }
1725  GNUNET_free (R_cur);
1726  GNUNET_free (R_last);
1727  return GNUNET_OK;
1728 }
1729 
1730 
1740 static struct REGEX_INTERNAL_State *
1742  struct REGEX_INTERNAL_StateSet *nfa_states)
1743 {
1744  struct REGEX_INTERNAL_State *s;
1745  char *pos;
1746  size_t len;
1747  struct REGEX_INTERNAL_State *cstate;
1748  struct REGEX_INTERNAL_Transition *ctran;
1749  unsigned int i;
1750 
1751  s = GNUNET_new (struct REGEX_INTERNAL_State);
1752  s->id = ctx->state_id++;
1753  s->index = -1;
1754  s->lowlink = -1;
1755 
1756  if (NULL == nfa_states)
1757  {
1758  GNUNET_asprintf (&s->name, "s%i", s->id);
1759  return s;
1760  }
1761 
1762  s->nfa_set = *nfa_states;
1763 
1764  if (nfa_states->off < 1)
1765  return s;
1766 
1767  /* Create a name based on 'nfa_states' */
1768  len = nfa_states->off * 14 + 4;
1769  s->name = GNUNET_malloc (len);
1770  strcat (s->name, "{");
1771  pos = s->name + 1;
1772 
1773  for (i = 0; i < nfa_states->off; i++)
1774  {
1775  cstate = nfa_states->states[i];
1776  GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1777  pos += strlen (pos);
1778 
1779  /* Add a transition for each distinct label to NULL state */
1780  for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1781  if (NULL != ctran->label)
1782  state_add_transition (ctx, s, ctran->label, NULL);
1783 
1784  /* If the nfa_states contain an accepting state, the new dfa state is also
1785  * accepting. */
1786  if (cstate->accepting)
1787  s->accepting = 1;
1788  }
1789  pos[-1] = '}';
1790  s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1791 
1792  memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1793  return s;
1794 }
1795 
1796 
1810 static unsigned int
1811 dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1812 {
1813  struct REGEX_INTERNAL_Transition *t;
1814  struct REGEX_INTERNAL_State *new_s;
1815  unsigned int len;
1816  unsigned int max_len;
1817 
1818  if (NULL == s)
1819  return 0;
1820 
1821  new_s = NULL;
1822  max_len = 0;
1823  for (t = (*s)->transitions_head; NULL != t; t = t->next)
1824  {
1825  len = strlen (t->label);
1826 
1827  if (0 == strncmp (t->label, str, len))
1828  {
1829  if (len >= max_len)
1830  {
1831  max_len = len;
1832  new_s = t->to_state;
1833  }
1834  }
1835  }
1836 
1837  *s = new_s;
1838  return max_len;
1839 }
1840 
1841 
1851 static void
1852 mark_states (void *cls,
1853  const unsigned int count,
1854  struct REGEX_INTERNAL_State *s)
1855 {
1856  s->marked = GNUNET_YES;
1857 }
1858 
1859 
1866 static void
1868 {
1869  struct REGEX_INTERNAL_State *s;
1870  struct REGEX_INTERNAL_State *s_next;
1871 
1872  /* 1. unmark all states */
1873  for (s = a->states_head; NULL != s; s = s->next)
1874  s->marked = GNUNET_NO;
1875 
1876  /* 2. traverse dfa from start state and mark all visited states */
1878  a->start,
1879  NULL,
1880  NULL,
1881  &mark_states,
1882  NULL);
1883 
1884  /* 3. delete all states that were not visited */
1885  for (s = a->states_head; NULL != s; s = s_next)
1886  {
1887  s_next = s->next;
1888  if (GNUNET_NO == s->marked)
1889  automaton_remove_state (a, s);
1890  }
1891 }
1892 
1893 
1900 static void
1902 {
1903  struct REGEX_INTERNAL_State *s;
1904  struct REGEX_INTERNAL_State *s_next;
1905  struct REGEX_INTERNAL_Transition *t;
1906  int dead;
1907 
1908  GNUNET_assert (DFA == a->type);
1909 
1910  for (s = a->states_head; NULL != s; s = s_next)
1911  {
1912  s_next = s->next;
1913 
1914  if (s->accepting)
1915  continue;
1916 
1917  dead = 1;
1918  for (t = s->transitions_head; NULL != t; t = t->next)
1919  {
1920  if ((NULL != t->to_state) && (t->to_state != s) )
1921  {
1922  dead = 0;
1923  break;
1924  }
1925  }
1926 
1927  if (0 == dead)
1928  continue;
1929 
1930  /* state s is dead, remove it */
1931  automaton_remove_state (a, s);
1932  }
1933 }
1934 
1935 
1943 static int
1945  struct REGEX_INTERNAL_Automaton *a)
1946 {
1947  uint32_t *table;
1948  struct REGEX_INTERNAL_State *s1;
1949  struct REGEX_INTERNAL_State *s2;
1950  struct REGEX_INTERNAL_Transition *t1;
1951  struct REGEX_INTERNAL_Transition *t2;
1952  struct REGEX_INTERNAL_State *s1_next;
1953  struct REGEX_INTERNAL_State *s2_next;
1954  int change;
1955  unsigned int num_equal_edges;
1956  unsigned int i;
1957  unsigned int state_cnt;
1958  unsigned long long idx;
1959  unsigned long long idx1;
1960 
1961  if ((NULL == a) || (0 == a->state_count))
1962  {
1964  "Could not merge nondistinguishable states, automaton was NULL.\n");
1965  return GNUNET_SYSERR;
1966  }
1967 
1968  state_cnt = a->state_count;
1969  table = GNUNET_malloc_large (
1970  (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1971  if (NULL == table)
1972  {
1974  return GNUNET_SYSERR;
1975  }
1976 
1977  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1978  s1->marked = i++;
1979 
1980  /* Mark all pairs of accepting/!accepting states */
1981  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1982  for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1983  if ((s1->accepting && ! s2->accepting) ||
1984  (! s1->accepting && s2->accepting))
1985  {
1986  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987  table[idx / 32] |= (1U << (idx % 32));
1988  }
1989 
1990  /* Find all equal states */
1991  change = 1;
1992  while (0 != change)
1993  {
1994  change = 0;
1995  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1996  {
1997  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1998  {
1999  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2000  if (0 != (table[idx / 32] & (1U << (idx % 32))))
2001  continue;
2002  num_equal_edges = 0;
2003  for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2004  {
2005  for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2006  {
2007  if (0 == strcmp (t1->label, t2->label))
2008  {
2009  num_equal_edges++;
2010  /* same edge, but targets definitively different, so we're different
2011  as well */
2012  if (t1->to_state->marked > t2->to_state->marked)
2013  idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2014  + t2->to_state->marked;
2015  else
2016  idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2017  + t1->to_state->marked;
2018  if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2019  {
2020  table[idx / 32] |= (1U << (idx % 32));
2021  change = 1; /* changed a marker, need to run again */
2022  }
2023  }
2024  }
2025  }
2026  if ((num_equal_edges != s1->transition_count) ||
2027  (num_equal_edges != s2->transition_count))
2028  {
2029  /* Make sure ALL edges of possible equal states are the same */
2030  table[idx / 32] |= (1U << (idx % 32));
2031  change = 1; /* changed a marker, need to run again */
2032  }
2033  }
2034  }
2035  }
2036 
2037  /* Merge states that are equal */
2038  for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2039  {
2040  s1_next = s1->next;
2041  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2042  {
2043  s2_next = s2->next;
2044  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2045  if (0 == (table[idx / 32] & (1U << (idx % 32))))
2046  automaton_merge_states (ctx, a, s1, s2);
2047  }
2048  }
2049 
2050  GNUNET_free (table);
2051  return GNUNET_OK;
2052 }
2053 
2054 
2063 static int
2065  struct REGEX_INTERNAL_Automaton *a)
2066 {
2067  if (NULL == a)
2068  return GNUNET_SYSERR;
2069 
2070  GNUNET_assert (DFA == a->type);
2071 
2072  /* 1. remove unreachable states */
2074 
2075  /* 2. remove dead states */
2077 
2078  /* 3. Merge nondistinguishable states */
2080  return GNUNET_SYSERR;
2081  return GNUNET_OK;
2082 }
2083 
2084 
2089 {
2093  const unsigned int stride;
2094 
2100 
2105 };
2106 
2107 
2118 static void
2120  const unsigned int depth,
2121  char *label,
2122  struct REGEX_INTERNAL_State *start,
2123  struct REGEX_INTERNAL_State *s)
2124 {
2125  struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2126  struct REGEX_INTERNAL_Transition *t;
2127  char *new_label;
2128 
2129  if (depth == ctx->stride)
2130  {
2132  t->label = GNUNET_strdup (label);
2133  t->to_state = s;
2134  t->from_state = start;
2136  ctx->transitions_tail,
2137  t);
2138  }
2139  else
2140  {
2141  for (t = s->transitions_head; NULL != t; t = t->next)
2142  {
2143  /* Do not consider self-loops, because it end's up in too many
2144  * transitions */
2145  if (t->to_state == t->from_state)
2146  continue;
2147 
2148  if (NULL != label)
2149  {
2150  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2151  }
2152  else
2153  new_label = GNUNET_strdup (t->label);
2154 
2156  (depth + 1),
2157  new_label,
2158  start,
2159  t->to_state);
2160  }
2161  }
2162  GNUNET_free_non_null (label);
2163 }
2164 
2165 
2174 static void
2176  const unsigned int count,
2177  struct REGEX_INTERNAL_State *s)
2178 {
2179  dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2180 }
2181 
2182 
2190 void
2192  struct REGEX_INTERNAL_Automaton *dfa,
2193  const unsigned int stride_len)
2194 {
2195  struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2196  struct REGEX_INTERNAL_Transition *t;
2197  struct REGEX_INTERNAL_Transition *t_next;
2198 
2199  if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2200  return;
2201 
2202  /* Compute the new transitions of given stride_len */
2204  dfa->start,
2205  NULL,
2206  NULL,
2208  &ctx);
2209 
2210  /* Add all the new transitions to the automaton. */
2211  for (t = ctx.transitions_head; NULL != t; t = t_next)
2212  {
2213  t_next = t->next;
2214  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2217  GNUNET_free (t);
2218  }
2219 
2220  /* Mark this automaton as multistrided */
2221  dfa->is_multistrided = GNUNET_YES;
2222 }
2223 
2224 
2238 void
2240  struct REGEX_INTERNAL_State *start,
2241  struct REGEX_INTERNAL_State *cur,
2242  char *label,
2243  unsigned int max_len,
2244  struct REGEX_INTERNAL_Transition **transitions_head,
2245  struct REGEX_INTERNAL_Transition **transitions_tail)
2246 {
2247  struct REGEX_INTERNAL_Transition *t;
2248  char *new_label;
2249 
2250 
2251  if ((NULL != label) &&
2252  (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2253  cur->accepting) ||
2254  (GNUNET_YES == cur->marked) ) ||
2255  ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2256  label))) ||
2257  ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2258  label)))))
2259  {
2261  t->label = GNUNET_strdup (label);
2262  t->to_state = cur;
2263  t->from_state = start;
2264  GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2265 
2266  if (GNUNET_NO == cur->marked)
2267  {
2269  cur,
2270  cur,
2271  NULL,
2272  max_len,
2273  transitions_head,
2274  transitions_tail);
2275  }
2276  return;
2277  }
2278  else if (cur != start)
2279  cur->contained = GNUNET_YES;
2280 
2281  if ((GNUNET_YES == cur->marked) && (cur != start))
2282  return;
2283 
2284  cur->marked = GNUNET_YES;
2285 
2286 
2287  for (t = cur->transitions_head; NULL != t; t = t->next)
2288  {
2289  if (NULL != label)
2290  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2291  else
2292  new_label = GNUNET_strdup (t->label);
2293 
2294  if (t->to_state != cur)
2295  {
2297  start,
2298  t->to_state,
2299  new_label,
2300  max_len,
2301  transitions_head,
2302  transitions_tail);
2303  }
2304  GNUNET_free (new_label);
2305  }
2306 }
2307 
2308 
2317 static void
2319  struct REGEX_INTERNAL_Automaton *dfa,
2320  unsigned int max_len)
2321 {
2322  struct REGEX_INTERNAL_State *s;
2323  struct REGEX_INTERNAL_State *s_next;
2324  struct REGEX_INTERNAL_Transition *t;
2325  struct REGEX_INTERNAL_Transition *t_next;
2326  struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2327  struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2328 
2329  if (NULL == dfa)
2330  return;
2331 
2332  /* Count the incoming transitions on each state. */
2333  for (s = dfa->states_head; NULL != s; s = s->next)
2334  {
2335  for (t = s->transitions_head; NULL != t; t = t->next)
2336  {
2337  if (NULL != t->to_state)
2339  }
2340  }
2341 
2342  /* Unmark all states. */
2343  for (s = dfa->states_head; NULL != s; s = s->next)
2344  {
2345  s->marked = GNUNET_NO;
2346  s->contained = GNUNET_NO;
2347  }
2348 
2349  /* Add strides and mark states that can be deleted. */
2351  dfa->start,
2352  dfa->start,
2353  NULL,
2354  max_len,
2355  &transitions_head,
2356  &transitions_tail);
2357 
2358  /* Add all the new transitions to the automaton. */
2359  for (t = transitions_head; NULL != t; t = t_next)
2360  {
2361  t_next = t->next;
2362  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2363  GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2365  GNUNET_free (t);
2366  }
2367 
2368  /* Remove marked states (including their incoming and outgoing transitions). */
2369  for (s = dfa->states_head; NULL != s; s = s_next)
2370  {
2371  s_next = s->next;
2372  if (GNUNET_YES == s->contained)
2373  automaton_remove_state (dfa, s);
2374  }
2375 }
2376 
2377 
2387 static struct REGEX_INTERNAL_Automaton *
2389  struct REGEX_INTERNAL_State *end)
2390 {
2391  struct REGEX_INTERNAL_Automaton *n;
2392 
2393  n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2394 
2395  n->type = NFA;
2396  n->start = NULL;
2397  n->end = NULL;
2398  n->state_count = 0;
2399 
2400  if ((NULL == start) || (NULL == end))
2401  return n;
2402 
2403  automaton_add_state (n, end);
2404  automaton_add_state (n, start);
2405 
2406  n->state_count = 2;
2407 
2408  n->start = start;
2409  n->end = end;
2410 
2411  return n;
2412 }
2413 
2414 
2422 static void
2426 {
2427  struct REGEX_INTERNAL_State *s;
2428 
2429  if ((NULL == n) || (NULL == states_head))
2430  {
2431  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2432  return;
2433  }
2434 
2435  if (NULL == n->states_head)
2436  {
2437  n->states_head = states_head;
2438  n->states_tail = states_tail;
2439  return;
2440  }
2441 
2442  if (NULL != states_head)
2443  {
2444  n->states_tail->next = states_head;
2445  n->states_tail = states_tail;
2446  }
2447 
2448  for (s = states_head; NULL != s; s = s->next)
2449  n->state_count++;
2450 }
2451 
2452 
2461 static struct REGEX_INTERNAL_State *
2463 {
2464  struct REGEX_INTERNAL_State *s;
2465 
2466  s = GNUNET_new (struct REGEX_INTERNAL_State);
2467  s->id = ctx->state_id++;
2468  s->accepting = accepting;
2469  s->marked = GNUNET_NO;
2470  s->contained = 0;
2471  s->index = -1;
2472  s->lowlink = -1;
2473  s->scc_id = 0;
2474  s->name = NULL;
2475  GNUNET_asprintf (&s->name, "s%i", s->id);
2476 
2477  return s;
2478 }
2479 
2480 
2490 static void
2492  struct REGEX_INTERNAL_Automaton *nfa,
2493  struct REGEX_INTERNAL_StateSet *states,
2494  const char *label)
2495 {
2496  struct REGEX_INTERNAL_State *s;
2497  unsigned int i;
2498  struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2499  struct REGEX_INTERNAL_State *clsstate;
2500  struct REGEX_INTERNAL_State *currentstate;
2501  struct REGEX_INTERNAL_Transition *ctran;
2502 
2503  memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2504  if (NULL == states)
2505  return;
2506 
2507  for (i = 0; i < states->off; i++)
2508  {
2509  s = states->states[i];
2510 
2511  /* Add start state to closure only for epsilon closure */
2512  if (NULL == label)
2513  state_set_append (ret, s);
2514 
2515  /* initialize work stack */
2516  cls_stack.head = NULL;
2517  cls_stack.tail = NULL;
2518  GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2519  cls_stack.len = 1;
2520 
2521  while (NULL != (currentstate = cls_stack.tail))
2522  {
2524  cls_stack.head,
2525  cls_stack.tail,
2526  currentstate);
2527  cls_stack.len--;
2528  for (ctran = currentstate->transitions_head; NULL != ctran;
2529  ctran = ctran->next)
2530  {
2531  if (NULL == (clsstate = ctran->to_state))
2532  continue;
2533  if (0 != clsstate->contained)
2534  continue;
2535  if (0 != nullstrcmp (label, ctran->label))
2536  continue;
2537  state_set_append (ret, clsstate);
2539  cls_stack.head,
2540  cls_stack.tail,
2541  clsstate);
2542  cls_stack.len++;
2543  clsstate->contained = 1;
2544  }
2545  }
2546  }
2547  for (i = 0; i < ret->off; i++)
2548  ret->states[i]->contained = 0;
2549 
2550  if (ret->off > 1)
2551  qsort (ret->states,
2552  ret->off,
2553  sizeof(struct REGEX_INTERNAL_State *),
2554  &state_compare);
2555 }
2556 
2557 
2563 static void
2565 {
2566  struct REGEX_INTERNAL_Automaton *a;
2567  struct REGEX_INTERNAL_Automaton *b;
2568  struct REGEX_INTERNAL_Automaton *new_nfa;
2569 
2570  b = ctx->stack_tail;
2571  GNUNET_assert (NULL != b);
2573  a = ctx->stack_tail;
2574  GNUNET_assert (NULL != a);
2576 
2577  state_add_transition (ctx, a->end, NULL, b->start);
2578  a->end->accepting = 0;
2579  b->end->accepting = 1;
2580 
2581  new_nfa = nfa_fragment_create (NULL, NULL);
2582  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2583  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2584  new_nfa->start = a->start;
2585  new_nfa->end = b->end;
2586  new_nfa->state_count += a->state_count + b->state_count;
2589 
2591 }
2592 
2593 
2599 static void
2601 {
2602  struct REGEX_INTERNAL_Automaton *a;
2603  struct REGEX_INTERNAL_Automaton *new_nfa;
2604  struct REGEX_INTERNAL_State *start;
2605  struct REGEX_INTERNAL_State *end;
2606 
2607  a = ctx->stack_tail;
2608 
2609  if (NULL == a)
2610  {
2611  GNUNET_log (
2613  "nfa_add_star_op failed, because there was no element on the stack");
2614  return;
2615  }
2616 
2618 
2619  start = nfa_state_create (ctx, 0);
2620  end = nfa_state_create (ctx, 1);
2621 
2622  state_add_transition (ctx, start, NULL, a->start);
2623  state_add_transition (ctx, start, NULL, end);
2624  state_add_transition (ctx, a->end, NULL, a->start);
2625  state_add_transition (ctx, a->end, NULL, end);
2626 
2627  a->end->accepting = 0;
2628  end->accepting = 1;
2629 
2630  new_nfa = nfa_fragment_create (start, end);
2631  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2633 
2635 }
2636 
2637 
2643 static void
2645 {
2646  struct REGEX_INTERNAL_Automaton *a;
2647 
2648  a = ctx->stack_tail;
2649 
2650  if (NULL == a)
2651  {
2652  GNUNET_log (
2654  "nfa_add_plus_op failed, because there was no element on the stack");
2655  return;
2656  }
2657 
2659 
2660  state_add_transition (ctx, a->end, NULL, a->start);
2661 
2663 }
2664 
2665 
2671 static void
2673 {
2674  struct REGEX_INTERNAL_Automaton *a;
2675  struct REGEX_INTERNAL_Automaton *new_nfa;
2676  struct REGEX_INTERNAL_State *start;
2677  struct REGEX_INTERNAL_State *end;
2678 
2679  a = ctx->stack_tail;
2680  if (NULL == a)
2681  {
2682  GNUNET_log (
2684  "nfa_add_question_op failed, because there was no element on the stack");
2685  return;
2686  }
2687 
2689 
2690  start = nfa_state_create (ctx, 0);
2691  end = nfa_state_create (ctx, 1);
2692 
2693  state_add_transition (ctx, start, NULL, a->start);
2694  state_add_transition (ctx, start, NULL, end);
2695  state_add_transition (ctx, a->end, NULL, end);
2696 
2697  a->end->accepting = 0;
2698 
2699  new_nfa = nfa_fragment_create (start, end);
2700  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2703 }
2704 
2705 
2712 static void
2714 {
2715  struct REGEX_INTERNAL_Automaton *a;
2716  struct REGEX_INTERNAL_Automaton *b;
2717  struct REGEX_INTERNAL_Automaton *new_nfa;
2718  struct REGEX_INTERNAL_State *start;
2719  struct REGEX_INTERNAL_State *end;
2720 
2721  b = ctx->stack_tail;
2722  GNUNET_assert (NULL != b);
2724  a = ctx->stack_tail;
2725  GNUNET_assert (NULL != a);
2727 
2728  start = nfa_state_create (ctx, 0);
2729  end = nfa_state_create (ctx, 1);
2730  state_add_transition (ctx, start, NULL, a->start);
2731  state_add_transition (ctx, start, NULL, b->start);
2732 
2733  state_add_transition (ctx, a->end, NULL, end);
2734  state_add_transition (ctx, b->end, NULL, end);
2735 
2736  a->end->accepting = 0;
2737  b->end->accepting = 0;
2738  end->accepting = 1;
2739 
2740  new_nfa = nfa_fragment_create (start, end);
2741  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2742  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2745 
2747 }
2748 
2749 
2756 static void
2757 nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2758 {
2759  struct REGEX_INTERNAL_Automaton *n;
2760  struct REGEX_INTERNAL_State *start;
2761  struct REGEX_INTERNAL_State *end;
2762 
2763  GNUNET_assert (NULL != ctx);
2764 
2765  start = nfa_state_create (ctx, 0);
2766  end = nfa_state_create (ctx, 1);
2767  state_add_transition (ctx, start, label, end);
2768  n = nfa_fragment_create (start, end);
2769  GNUNET_assert (NULL != n);
2771 }
2772 
2773 
2779 static void
2781 {
2782  if (NULL == ctx)
2783  {
2784  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2785  return;
2786  }
2787  ctx->state_id = 0;
2788  ctx->transition_id = 0;
2789  ctx->stack_head = NULL;
2790  ctx->stack_tail = NULL;
2791 }
2792 
2793 
2802 struct REGEX_INTERNAL_Automaton *
2803 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2804 {
2805  struct REGEX_INTERNAL_Context ctx;
2806  struct REGEX_INTERNAL_Automaton *nfa;
2807  const char *regexp;
2808  char curlabel[2];
2809  char *error_msg;
2810  unsigned int count;
2811  unsigned int altcount;
2812  unsigned int atomcount;
2813  unsigned int poff;
2814  unsigned int psize;
2815 
2816  struct
2817  {
2818  int altcount;
2819  int atomcount;
2820  } *p;
2821 
2822  if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2823  {
2825  "Could not parse regex. Empty regex string provided.\n");
2826 
2827  return NULL;
2828  }
2830 
2831  regexp = regex;
2832  curlabel[1] = '\0';
2833  p = NULL;
2834  error_msg = NULL;
2835  altcount = 0;
2836  atomcount = 0;
2837  poff = 0;
2838  psize = 0;
2839 
2840  for (count = 0; count < len && *regexp; count++, regexp++)
2841  {
2842  switch (*regexp)
2843  {
2844  case '(':
2845  if (atomcount > 1)
2846  {
2847  --atomcount;
2848  nfa_add_concatenation (&ctx);
2849  }
2850  if (poff == psize)
2851  GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2852  p[poff].altcount = altcount;
2853  p[poff].atomcount = atomcount;
2854  poff++;
2855  altcount = 0;
2856  atomcount = 0;
2857  break;
2858 
2859  case '|':
2860  if (0 == atomcount)
2861  {
2862  error_msg = "Cannot append '|' to nothing";
2863  goto error;
2864  }
2865  while (--atomcount > 0)
2866  nfa_add_concatenation (&ctx);
2867  altcount++;
2868  break;
2869 
2870  case ')':
2871  if (0 == poff)
2872  {
2873  error_msg = "Missing opening '('";
2874  goto error;
2875  }
2876  if (0 == atomcount)
2877  {
2878  /* Ignore this: "()" */
2879  poff--;
2880  altcount = p[poff].altcount;
2881  atomcount = p[poff].atomcount;
2882  break;
2883  }
2884  while (--atomcount > 0)
2885  nfa_add_concatenation (&ctx);
2886  for (; altcount > 0; altcount--)
2887  nfa_add_alternation (&ctx);
2888  poff--;
2889  altcount = p[poff].altcount;
2890  atomcount = p[poff].atomcount;
2891  atomcount++;
2892  break;
2893 
2894  case '*':
2895  if (atomcount == 0)
2896  {
2897  error_msg = "Cannot append '*' to nothing";
2898  goto error;
2899  }
2900  nfa_add_star_op (&ctx);
2901  break;
2902 
2903  case '+':
2904  if (atomcount == 0)
2905  {
2906  error_msg = "Cannot append '+' to nothing";
2907  goto error;
2908  }
2909  nfa_add_plus_op (&ctx);
2910  break;
2911 
2912  case '?':
2913  if (atomcount == 0)
2914  {
2915  error_msg = "Cannot append '?' to nothing";
2916  goto error;
2917  }
2918  nfa_add_question_op (&ctx);
2919  break;
2920 
2921  default:
2922  if (atomcount > 1)
2923  {
2924  --atomcount;
2925  nfa_add_concatenation (&ctx);
2926  }
2927  curlabel[0] = *regexp;
2928  nfa_add_label (&ctx, curlabel);
2929  atomcount++;
2930  break;
2931  }
2932  }
2933  if (0 != poff)
2934  {
2935  error_msg = "Unbalanced parenthesis";
2936  goto error;
2937  }
2938  while (--atomcount > 0)
2939  nfa_add_concatenation (&ctx);
2940  for (; altcount > 0; altcount--)
2941  nfa_add_alternation (&ctx);
2942 
2943  GNUNET_array_grow (p, psize, 0);
2944 
2945  nfa = ctx.stack_tail;
2947 
2948  if (NULL != ctx.stack_head)
2949  {
2950  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2951  goto error;
2952  }
2953 
2954  /* Remember the regex that was used to generate this NFA */
2955  nfa->regex = GNUNET_strdup (regex);
2956 
2957  /* create depth-first numbering of the states for pretty printing */
2959  NULL,
2960  NULL,
2961  NULL,
2962  &number_states,
2963  NULL);
2964 
2965  /* No multistriding added so far */
2966  nfa->is_multistrided = GNUNET_NO;
2967 
2968  return nfa;
2969 
2970 error:
2971  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2972  if (NULL != error_msg)
2973  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2974 
2976 
2977  while (NULL != (nfa = ctx.stack_head))
2978  {
2981  }
2982 
2983  return NULL;
2984 }
2985 
2986 
2996 static void
2998  struct REGEX_INTERNAL_Automaton *nfa,
2999  struct REGEX_INTERNAL_Automaton *dfa,
3000  struct REGEX_INTERNAL_State *dfa_state)
3001 {
3002  struct REGEX_INTERNAL_Transition *ctran;
3003  struct REGEX_INTERNAL_State *new_dfa_state;
3004  struct REGEX_INTERNAL_State *state_contains;
3005  struct REGEX_INTERNAL_State *state_iter;
3006  struct REGEX_INTERNAL_StateSet tmp;
3007  struct REGEX_INTERNAL_StateSet nfa_set;
3008 
3009  for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3010  {
3011  if ((NULL == ctran->label) || (NULL != ctran->to_state) )
3012  continue;
3013 
3014  nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3015  nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3016  state_set_clear (&tmp);
3017 
3018  state_contains = NULL;
3019  for (state_iter = dfa->states_head; NULL != state_iter;
3020  state_iter = state_iter->next)
3021  {
3022  if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3023  {
3024  state_contains = state_iter;
3025  break;
3026  }
3027  }
3028  if (NULL == state_contains)
3029  {
3030  new_dfa_state = dfa_state_create (ctx, &nfa_set);
3031  automaton_add_state (dfa, new_dfa_state);
3032  ctran->to_state = new_dfa_state;
3033  construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3034  }
3035  else
3036  {
3037  ctran->to_state = state_contains;
3038  state_set_clear (&nfa_set);
3039  }
3040  }
3041 }
3042 
3043 
3061 struct REGEX_INTERNAL_Automaton *
3063  const size_t len,
3064  unsigned int max_path_len)
3065 {
3066  struct REGEX_INTERNAL_Context ctx;
3067  struct REGEX_INTERNAL_Automaton *dfa;
3068  struct REGEX_INTERNAL_Automaton *nfa;
3069  struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3070  struct REGEX_INTERNAL_StateSet singleton_set;
3071 
3073 
3074  /* Create NFA */
3075  nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3076 
3077  if (NULL == nfa)
3078  {
3080  "Could not create DFA, because NFA creation failed\n");
3081  return NULL;
3082  }
3083 
3084  dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3085  dfa->type = DFA;
3086  dfa->regex = GNUNET_strdup (regex);
3087 
3088  /* Create DFA start state from epsilon closure */
3089  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3090  state_set_append (&singleton_set, nfa->start);
3091  nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3092  state_set_clear (&singleton_set);
3093  dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3094  automaton_add_state (dfa, dfa->start);
3095 
3096  construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3098 
3099  /* Minimize DFA */
3100  if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3101  {
3103  return NULL;
3104  }
3105 
3106  /* Create proofs and hashes for all states */
3107  if (GNUNET_OK != automaton_create_proofs (dfa))
3108  {
3110  return NULL;
3111  }
3112 
3113  /* Compress linear DFA paths */
3114  if (1 != max_path_len)
3115  dfa_compress_paths (&ctx, dfa, max_path_len);
3116 
3117  return dfa;
3118 }
3119 
3120 
3127 void
3129 {
3130  struct REGEX_INTERNAL_State *s;
3131  struct REGEX_INTERNAL_State *next_state;
3132 
3133  if (NULL == a)
3134  return;
3135 
3138 
3139  for (s = a->states_head; NULL != s; s = next_state)
3140  {
3141  next_state = s->next;
3144  }
3145 
3146  GNUNET_free (a);
3147 }
3148 
3149 
3158 static int
3159 evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3160 {
3161  const char *strp;
3162  struct REGEX_INTERNAL_State *s;
3163  unsigned int step_len;
3164 
3165  if (DFA != a->type)
3166  {
3168  "Tried to evaluate DFA, but NFA automaton given");
3169  return -1;
3170  }
3171 
3172  s = a->start;
3173 
3174  /* If the string is empty but the starting state is accepting, we accept. */
3175  if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3176  return 0;
3177 
3178  for (strp = string; NULL != strp && *strp; strp += step_len)
3179  {
3180  step_len = dfa_move (&s, strp);
3181 
3182  if (NULL == s)
3183  break;
3184  }
3185 
3186  if ((NULL != s) && s->accepting)
3187  return 0;
3188 
3189  return 1;
3190 }
3191 
3192 
3200 static int
3201 evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3202 {
3203  const char *strp;
3204  char str[2];
3205  struct REGEX_INTERNAL_State *s;
3206  struct REGEX_INTERNAL_StateSet sset;
3207  struct REGEX_INTERNAL_StateSet new_sset;
3208  struct REGEX_INTERNAL_StateSet singleton_set;
3209  unsigned int i;
3210  int result;
3211 
3212  if (NFA != a->type)
3213  {
3215  "Tried to evaluate NFA, but DFA automaton given");
3216  return -1;
3217  }
3218 
3219  /* If the string is empty but the starting state is accepting, we accept. */
3220  if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3221  return 0;
3222 
3223  result = 1;
3224  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3225  state_set_append (&singleton_set, a->start);
3226  nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3227  state_set_clear (&singleton_set);
3228 
3229  str[1] = '\0';
3230  for (strp = string; NULL != strp && *strp; strp++)
3231  {
3232  str[0] = *strp;
3233  nfa_closure_set_create (&new_sset, a, &sset, str);
3234  state_set_clear (&sset);
3235  nfa_closure_set_create (&sset, a, &new_sset, 0);
3236  state_set_clear (&new_sset);
3237  }
3238 
3239  for (i = 0; i < sset.off; i++)
3240  {
3241  s = sset.states[i];
3242  if ((NULL != s) && (s->accepting))
3243  {
3244  result = 0;
3245  break;
3246  }
3247  }
3248 
3249  state_set_clear (&sset);
3250  return result;
3251 }
3252 
3253 
3261 int
3262 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3263 {
3264  int result;
3265 
3266  switch (a->type)
3267  {
3268  case DFA:
3269  result = evaluate_dfa (a, string);
3270  break;
3271 
3272  case NFA:
3273  result = evaluate_nfa (a, string);
3274  break;
3275 
3276  default:
3278  "Evaluating regex failed, automaton has no type!\n");
3279  result = GNUNET_SYSERR;
3280  break;
3281  }
3282 
3283  return result;
3284 }
3285 
3286 
3298 const char *
3300 {
3301  if (NULL == a)
3302  return NULL;
3303 
3304  return a->canonical_regex;
3305 }
3306 
3307 
3315 unsigned int
3317 {
3318  unsigned int t_count;
3319  struct REGEX_INTERNAL_State *s;
3320 
3321  if (NULL == a)
3322  return 0;
3323 
3324  t_count = 0;
3325  for (s = a->states_head; NULL != s; s = s->next)
3326  t_count += s->transition_count;
3327 
3328  return t_count;
3329 }
3330 
3331 
3342 size_t
3343 REGEX_INTERNAL_get_first_key (const char *input_string,
3344  size_t string_len,
3345  struct GNUNET_HashCode *key)
3346 {
3347  size_t size;
3348 
3349  size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3351  if (NULL == input_string)
3352  {
3353  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3354  return 0;
3355  }
3356  GNUNET_CRYPTO_hash (input_string, size, key);
3357 
3358  return size;
3359 }
3360 
3361 
3372 static void
3373 iterate_initial_edge (unsigned int min_len,
3374  unsigned int max_len,
3375  char *consumed_string,
3376  struct REGEX_INTERNAL_State *state,
3378  void *iterator_cls)
3379 {
3380  char *temp;
3381  struct REGEX_INTERNAL_Transition *t;
3382  unsigned int num_edges = state->transition_count;
3383  struct REGEX_BLOCK_Edge edges[num_edges];
3384  struct REGEX_BLOCK_Edge edge[1];
3385  struct GNUNET_HashCode hash;
3386  struct GNUNET_HashCode hash_new;
3387  unsigned int cur_len;
3388 
3389  if (NULL != consumed_string)
3390  cur_len = strlen (consumed_string);
3391  else
3392  cur_len = 0;
3393 
3394  if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3395  (cur_len > 0) && (NULL != consumed_string))
3396  {
3397  if (cur_len <= max_len)
3398  {
3399  if ((NULL != state->proof) &&
3400  (0 != strcmp (consumed_string, state->proof)))
3401  {
3402  (void) state_get_edges (state, edges);
3403  GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3405  "Start state for string `%s' is %s\n",
3406  consumed_string,
3407  GNUNET_h2s (&hash));
3408  iterator (iterator_cls,
3409  &hash,
3410  consumed_string,
3411  state->accepting,
3412  num_edges,
3413  edges);
3414  }
3415 
3416  if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3417  (state->transition_count < 1) && (cur_len < max_len))
3418  {
3419  /* Special case for regex consisting of just a string that is shorter than
3420  * max_len */
3421  edge[0].label = &consumed_string[cur_len - 1];
3422  edge[0].destination = state->hash;
3423  temp = GNUNET_strdup (consumed_string);
3424  temp[cur_len - 1] = '\0';
3425  GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3427  "Start state for short string `%s' is %s\n",
3428  temp,
3429  GNUNET_h2s (&hash_new));
3430  iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3431  GNUNET_free (temp);
3432  }
3433  }
3434  else /* cur_len > max_len */
3435  {
3436  /* Case where the concatenated labels are longer than max_len, then split. */
3437  edge[0].label = &consumed_string[max_len];
3438  edge[0].destination = state->hash;
3439  temp = GNUNET_strdup (consumed_string);
3440  temp[max_len] = '\0';
3441  GNUNET_CRYPTO_hash (temp, max_len, &hash);
3443  "Start state at split edge `%s'-`%s` is %s\n",
3444  temp,
3445  edge[0].label,
3446  GNUNET_h2s (&hash_new));
3447  iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3448  GNUNET_free (temp);
3449  }
3450  }
3451 
3452  if (cur_len < max_len)
3453  {
3454  for (t = state->transitions_head; NULL != t; t = t->next)
3455  {
3456  if (NULL != strchr (t->label, (int) '.'))
3457  {
3458  /* Wildcards not allowed during starting states */
3459  GNUNET_break (0);
3460  continue;
3461  }
3462  if (NULL != consumed_string)
3463  GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3464  else
3465  GNUNET_asprintf (&temp, "%s", t->label);
3466  iterate_initial_edge (min_len,
3467  max_len,
3468  temp,
3469  t->to_state,
3470  iterator,
3471  iterator_cls);
3472  GNUNET_free (temp);
3473  }
3474  }
3475 }
3476 
3477 
3486 void
3489  void *iterator_cls)
3490 {
3491  struct REGEX_INTERNAL_State *s;
3492 
3493  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3496  NULL,
3497  a->start,
3498  iterator,
3499  iterator_cls);
3500  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3501  for (s = a->states_head; NULL != s; s = s->next)
3502  {
3503  struct REGEX_BLOCK_Edge edges[s->transition_count];
3504  unsigned int num_edges;
3505 
3506  num_edges = state_get_edges (s, edges);
3507  if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3508  {
3510  "Creating DFA edges at `%s' under key %s\n",
3511  s->proof,
3512  GNUNET_h2s (&s->hash));
3513  iterator (iterator_cls,
3514  &s->hash,
3515  s->proof,
3516  s->accepting,
3517  num_edges,
3518  edges);
3519  }
3520  s->marked = GNUNET_NO;
3521  }
3522 }
3523 
3524 
3532 {
3534  char *proof;
3538 };
3539 
3540 
3545 {
3548 };
3549 
3550 
3562 static void
3563 store_all_states (void *cls,
3564  const struct GNUNET_HashCode *key,
3565  const char *proof,
3566  int accepting,
3567  unsigned int num_edges,
3568  const struct REGEX_BLOCK_Edge *edges)
3569 {
3570  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3571  struct temporal_state_store *tmp;
3572  size_t edges_size;
3573 
3574  tmp = GNUNET_new (struct temporal_state_store);
3575  tmp->reachable = GNUNET_NO;
3576  tmp->proof = GNUNET_strdup (proof);
3577  tmp->accepting = accepting;
3578  tmp->num_edges = num_edges;
3579  edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3580  tmp->edges = GNUNET_malloc (edges_size);
3581  GNUNET_memcpy (tmp->edges, edges, edges_size);
3584  hm,
3585  key,
3586  tmp,
3588 }
3589 
3590 
3599 static void
3601  struct GNUNET_CONTAINER_MultiHashMap *hm)
3602 {
3603  struct temporal_state_store *child;
3604  unsigned int i;
3605 
3606  if (GNUNET_YES == state->reachable)
3607  /* visited */
3608  return;
3609 
3610  state->reachable = GNUNET_YES;
3611  for (i = 0; i < state->num_edges; i++)
3612  {
3613  child =
3615  if (NULL == child)
3616  {
3617  GNUNET_break (0);
3618  continue;
3619  }
3620  mark_as_reachable (child, hm);
3621  }
3622 }
3623 
3624 
3634 static int
3636  const struct GNUNET_HashCode *key,
3637  void *value)
3638 {
3639  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3640  struct temporal_state_store *state = value;
3641 
3642  if (GNUNET_YES == state->reachable)
3643  /* already visited and marked */
3644  return GNUNET_YES;
3645 
3646  if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3647  (GNUNET_NO == state->accepting) )
3648  /* not directly reachable */
3649  return GNUNET_YES;
3650 
3651  mark_as_reachable (state, hm);
3652  return GNUNET_YES;
3653 }
3654 
3655 
3666 static int
3667 iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3668 {
3669  struct client_iterator *ci = cls;
3670  struct temporal_state_store *state = value;
3671 
3672  if (GNUNET_YES == state->reachable)
3673  {
3674  ci->iterator (ci->iterator_cls,
3675  key,
3676  state->proof,
3677  state->accepting,
3678  state->num_edges,
3679  state->edges);
3680  }
3681  GNUNET_free (state->edges);
3682  GNUNET_free (state->proof);
3683  GNUNET_free (state);
3684  return GNUNET_YES;
3685 }
3686 
3687 
3698 void
3701  void *iterator_cls)
3702 {
3703  struct GNUNET_CONTAINER_MultiHashMap *hm;
3704  struct client_iterator ci;
3705 
3707  ci.iterator = iterator;
3709 
3713 
3715 }
3716 
3717 
3718 /* end of regex_internal.c */
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
unsigned int state_count
Number of states in the automaton.
static int iterator(void *cls, const struct GNUNET_PeerIdentity *key, void *value)
Iterator over hash map entries.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
struct GNUNET_HashCode destination
Destionation of the edge.
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
int index
Used for SCC detection.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA &#39;a&#39;.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
unsigned int transition_id
Unique transition id.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
struct GNUNET_HashCode hash
Hash of the state.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
int accepting
If this is an accepting state or not.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id&#39;s of the states that are contained in each set...
int GNUNET_snprintf(char *buf, size_t size, const char *format,...)
Like snprintf, just aborts if the buffer is of insufficient size.
unsigned int state_id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length &#39;len&#39;.
struct REGEX_INTERNAL_Automaton * stack_tail
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton &#39;a&#39;, always use this function to alter the states DLL of the automaton...
unsigned int blen
Number of bytes allocated for sbuf.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
static int ret
Return value of the commandline.
Definition: gnunet-abd.c:81
int contained
Marking the state as contained.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet &#39;set&#39;.
#define GNUNET_NO
Definition: gnunet_common.h:78
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
Automaton representation.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
static struct GNUNET_SCHEDULER_Task * t
Main task.
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
char * proof
Proof for this state.
int marked
Marking of the state.
Internal representation of the hash map.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state &#39;s&#39; to the next state on transition &#39;str&#39;.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
Edge representation.
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
struct REGEX_INTERNAL_State * states_tail
DLL of states.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct REGEX_INTERNAL_State * head
MDLL of states.
Set of states using MDLL API.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it&#39;s start state, visiting all reachable states and calling &#39;action&#39; on each one of them.
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level &#39;level&#39; that indicates a failure of the command &#39;cmd&#39; with the mess...
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
enum State state
current state of profiling
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
, &#39; bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_...
Store regex iterator and cls in one place to pass to the hashmap iterator.
static char * value
Value of the record to add/remove.
const unsigned int stride
Length of the strides.
library to parse regular expressions into dfa
char * name
Human readable name of the state.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static uint64_t proof
Definition: gnunet-scrypt.c:41
unsigned int transition_count
Number of transitions from this state to other states.
static int state_compare(const void *a, const void *b)
Compare two states.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:48
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:55
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_MAX(a, b)
Definition: gnunet_common.h:82
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA &#39;a&#39;.
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given &#39;dfa&#39;.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
struct REGEX_INTERNAL_State ** states
Array of states.
static int result
Global testing status.
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
Transition between two states.
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
Struct to hold all the relevant state information in the HashMap.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
A 512-bit hashcode.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
struct REGEX_INTERNAL_State * start
First state of the automaton.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
struct GNUNET_HashCode key
The key used in the DHT.
#define GNUNET_SYSERR
Definition: gnunet_common.h:76
static unsigned int size
Size of the "table".
Definition: peer.c:67
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
Context for adding strided transitions to a DFA.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given &#39;nfa&#39; and starting with &#39;dfa_state&#39;.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA &#39;a&#39; by removing all unreachable states, removing all dead states and merging a...
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data structure.
unsigned int off
Number of entries in use in the &#39;states&#39; array.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton &#39;a&#39;.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
static int has_epsilon(const struct StringBuffer *str)
Check if the string &#39;str&#39; starts with an epsilon (empty string).
struct REGEX_BLOCK_Edge * edges
const char * label
Label of the edge.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton &#39;a&#39;.
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
unsigned int id
Unique id of this transition.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string against the given compiled regex a.
static struct GNUNET_OS_Process * child
The child process we spawn.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a &#39;transition&#39; from &#39;state&#39;.
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton &#39;n&#39;.
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
unsigned int dfs_id
Linear state ID accquired by depth-first-search.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given &#39;regex&#39; of length &#39;len&#39;.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
#define GNUNET_log(kind,...)
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
REGEX_INTERNAL_KeyIterator iterator
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state &#39;s&#39;.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state &#39;marked&#39; to GNUNET_YES.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
char * label
Label for this transition.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that get&#39;s passed to automaton traversal and is called before each next traversal from state...
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
#define GNUNET_YES
Definition: gnunet_common.h:77
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton &#39;a&#39; that are reachable from a state with a proof of at least GNUN...
size_t slen
Length of the string in the buffer.
String container for faster string operations.
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:144
common internal definitions for regex library.
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
char * abuf
Allocated buffer.
unsigned int len
Length of the MDLL.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of &#39;ret&#39; to fit &#39;nlen&#39; characters; move the existing string to the beginning of...
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as &#39;action&#39; in &#39;REGEX_INTERNAL_automaton_traverse&#39; function to create the depth-...
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MulitHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
int lowlink
Used for SCC detection.
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA &#39;a&#39;.
unsigned int id
Unique state id.
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare &#39;str1&#39;, starting from position &#39;k&#39;, with whole &#39;str2&#39;.
#define GNUNET_malloc(size)
Wrapper around malloc.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given &#39;dfa&#39;.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex...
#define GNUNET_free(ptr)
Wrapper around free.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
struct REGEX_INTERNAL_Automaton * stack_head
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
struct REGEX_INTERNAL_State * tail
MDLL of states.