GNUnet  0.10.x
regex_internal.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
26 #include "platform.h"
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
31 
32 
37 #define REGEX_DEBUG_DFA GNUNET_NO
38 
47 
52 
56  unsigned int len;
57 };
58 
59 
66 static void
69 {
70  if (set->off == set->size)
71  GNUNET_array_grow(set->states, set->size, set->size * 2 + 4);
72  set->states[set->off++] = state;
73 }
74 
75 
84 static int
85 nullstrcmp(const char *str1, const char *str2)
86 {
87  if ((NULL == str1) != (NULL == str2))
88  return -1;
89  if ((NULL == str1) && (NULL == str2))
90  return 0;
91 
92  return strcmp(str1, str2);
93 }
94 
95 
105 static void
107  struct REGEX_INTERNAL_State *from_state,
108  const char *label,
109  struct REGEX_INTERNAL_State *to_state)
110 {
112  struct REGEX_INTERNAL_Transition *oth;
113 
114  if (NULL == from_state)
115  {
116  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
117  return;
118  }
119 
120  /* Do not add duplicate state transitions */
121  for (t = from_state->transitions_head; NULL != t; t = t->next)
122  {
123  if (t->to_state == to_state && 0 == nullstrcmp(t->label, label) &&
124  t->from_state == from_state)
125  return;
126  }
127 
128  /* sort transitions by label */
129  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
130  {
131  if (0 < nullstrcmp(oth->label, label))
132  break;
133  }
134 
136  if (NULL != ctx)
137  t->id = ctx->transition_id++;
138  if (NULL != label)
139  t->label = GNUNET_strdup(label);
140  else
141  t->label = NULL;
142  t->to_state = to_state;
143  t->from_state = from_state;
144 
145  /* Add outgoing transition to 'from_state' */
146  from_state->transition_count++;
148  from_state->transitions_tail,
149  oth,
150  t);
151 }
152 
153 
160 static void
162  struct REGEX_INTERNAL_Transition *transition)
163 {
164  if (NULL == state || NULL == transition)
165  return;
166 
167  if (transition->from_state != state)
168  return;
169 
170  GNUNET_free_non_null(transition->label);
171 
172  state->transition_count--;
174  state->transitions_tail,
175  transition);
176 
177  GNUNET_free(transition);
178 }
179 
180 
191 static int
192 state_compare(const void *a, const void *b)
193 {
194  struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **)a;
195  struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **)b;
196 
197  return (*s1)->id - (*s2)->id;
198 }
199 
200 
210 static unsigned int
212 {
214  unsigned int count;
215 
216  if (NULL == s)
217  return 0;
218 
219  count = 0;
220 
221  for (t = s->transitions_head; NULL != t; t = t->next)
222  {
223  if (NULL != t->to_state)
224  {
225  edges[count].label = t->label;
226  edges[count].destination = t->to_state->hash;
227  count++;
228  }
229  }
230  return count;
231 }
232 
233 
242 static int
244  struct REGEX_INTERNAL_StateSet *sset2)
245 {
246  int result;
247  unsigned int i;
248 
249  if (NULL == sset1 || NULL == sset2)
250  return 1;
251 
252  result = sset1->off - sset2->off;
253  if (result < 0)
254  return -1;
255  if (result > 0)
256  return 1;
257  for (i = 0; i < sset1->off; i++)
258  if (0 != (result = state_compare(&sset1->states[i], &sset2->states[i])))
259  break;
260  return result;
261 }
262 
263 
269 static void
271 {
272  GNUNET_array_grow(set->states, set->size, 0);
273  set->off = 0;
274 }
275 
276 
283 static void
285 {
286  if (NULL == a)
287  return;
288 
289  a->start = NULL;
290  a->end = NULL;
291  a->states_head = NULL;
292  a->states_tail = NULL;
293  a->state_count = 0;
294  GNUNET_free(a);
295 }
296 
297 
303 static void
305 {
307  struct REGEX_INTERNAL_Transition *next_t;
308 
309  if (NULL == s)
310  return;
311 
315  for (t = s->transitions_head; NULL != t; t = next_t)
316  {
317  next_t = t->next;
319  }
320 
321  GNUNET_free(s);
322 }
323 
324 
333 static void
335  struct REGEX_INTERNAL_State *s)
336 {
337  struct REGEX_INTERNAL_State *s_check;
338  struct REGEX_INTERNAL_Transition *t_check;
339  struct REGEX_INTERNAL_Transition *t_check_next;
340 
341  if (NULL == a || NULL == s)
342  return;
343 
344  /* remove all transitions leading to this state */
345  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
346  {
347  for (t_check = s_check->transitions_head; NULL != t_check;
348  t_check = t_check_next)
349  {
350  t_check_next = t_check->next;
351  if (t_check->to_state == s)
352  state_remove_transition(s_check, t_check);
353  }
354  }
355 
356  /* remove state */
358  a->state_count--;
359 
361 }
362 
363 
373 static void
375  struct REGEX_INTERNAL_Automaton *a,
376  struct REGEX_INTERNAL_State *s1,
377  struct REGEX_INTERNAL_State *s2)
378 {
379  struct REGEX_INTERNAL_State *s_check;
380  struct REGEX_INTERNAL_Transition *t_check;
382  struct REGEX_INTERNAL_Transition *t_next;
383  int is_dup;
384 
385  if (s1 == s2)
386  return;
387 
388  /* 1. Make all transitions pointing to s2 point to s1, unless this transition
389  * does not already exists, if it already exists remove transition. */
390  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
391  {
392  for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
393  {
394  t_next = t_check->next;
395 
396  if (s2 == t_check->to_state)
397  {
398  is_dup = GNUNET_NO;
399  for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
400  {
401  if (t->to_state == s1 && 0 == strcmp(t_check->label, t->label))
402  is_dup = GNUNET_YES;
403  }
404  if (GNUNET_NO == is_dup)
405  t_check->to_state = s1;
406  else
407  state_remove_transition(t_check->from_state, t_check);
408  }
409  }
410  }
411 
412  /* 2. Add all transitions from s2 to sX to s1 */
413  for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
414  {
415  if (t_check->to_state != s1)
416  state_add_transition(ctx, s1, t_check->label, t_check->to_state);
417  }
418 
419  /* 3. Rename s1 to {s1,s2} */
420 #if REGEX_DEBUG_DFA
421  char *new_name;
422 
423  new_name = s1->name;
424  GNUNET_asprintf(&s1->name, "{%s,%s}", new_name, s2->name);
425  GNUNET_free(new_name);
426 #endif
427 
428  /* remove state */
430  a->state_count--;
432 }
433 
434 
442 static void
444  struct REGEX_INTERNAL_State *s)
445 {
447  a->state_count++;
448 }
449 
450 
465 static void
467  int *marks,
468  unsigned int *count,
470  void *check_cls,
472  void *action_cls)
473 {
475 
476  if (GNUNET_YES == marks[s->traversal_id])
477  return;
478 
479  marks[s->traversal_id] = GNUNET_YES;
480 
481  if (NULL != action)
482  action(action_cls, *count, s);
483 
484  (*count)++;
485 
486  for (t = s->transitions_head; NULL != t; t = t->next)
487  {
488  if (NULL == check ||
489  (NULL != check && GNUNET_YES == check(check_cls, s, t)))
490  {
492  marks,
493  count,
494  check,
495  check_cls,
496  action,
497  action_cls);
498  }
499  }
500 }
501 
502 
516 void
518  struct REGEX_INTERNAL_State *start,
520  void *check_cls,
522  void *action_cls)
523 {
524  unsigned int count;
525  struct REGEX_INTERNAL_State *s;
526 
527  if (NULL == a || 0 == a->state_count)
528  return;
529 
530  int marks[a->state_count];
531 
532  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
533  s = s->next, count++)
534  {
535  s->traversal_id = count;
536  marks[s->traversal_id] = GNUNET_NO;
537  }
538 
539  count = 0;
540 
541  if (NULL == start)
542  s = a->start;
543  else
544  s = start;
545 
547  marks,
548  &count,
549  check,
550  check_cls,
551  action,
552  action_cls);
553 }
554 
555 
559 struct StringBuffer {
564  char *sbuf;
565 
569  char *abuf;
570 
574  size_t slen;
575 
579  unsigned int blen;
580 
584  int16_t null_flag;
585 
595  int16_t synced;
596 };
597 
598 
607 static int
608 sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
609 {
610  if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
611  return 0;
612  if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
613  return -1;
614  if (s1->slen != s2->slen)
615  return -1;
616  if (0 == s1->slen)
617  return 0;
618  return memcmp(s1->sbuf, s2->sbuf, s1->slen);
619 }
620 
621 
630 static int
631 sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
632 {
633  if (s1->slen != s2->slen)
634  return -1;
635  if (0 == s1->slen)
636  return 0;
637  return memcmp(s1->sbuf, s2->sbuf, s1->slen);
638 }
639 
640 
648 static void
649 sb_realloc(struct StringBuffer *ret, size_t nlen)
650 {
651  char *old;
652 
653  GNUNET_assert(nlen >= ret->slen);
654  old = ret->abuf;
655  ret->abuf = GNUNET_malloc(nlen);
656  ret->blen = nlen;
657  GNUNET_memcpy(ret->abuf, ret->sbuf, ret->slen);
658  ret->sbuf = ret->abuf;
660 }
661 
662 
669 static void
670 sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
671 {
672  if (GNUNET_YES == ret->null_flag)
673  ret->slen = 0;
674  ret->null_flag = GNUNET_NO;
675  if (ret->blen < sarg->slen + ret->slen)
676  sb_realloc(ret, ret->blen + sarg->slen + 128);
677  GNUNET_memcpy(&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
678  ret->slen += sarg->slen;
679 }
680 
681 
688 static void
689 sb_append_cstr(struct StringBuffer *ret, const char *cstr)
690 {
691  size_t cstr_len = strlen(cstr);
692 
693  if (GNUNET_YES == ret->null_flag)
694  ret->slen = 0;
695  ret->null_flag = GNUNET_NO;
696  if (ret->blen < cstr_len + ret->slen)
697  sb_realloc(ret, ret->blen + cstr_len + 128);
698  GNUNET_memcpy(&ret->sbuf[ret->slen], cstr, cstr_len);
699  ret->slen += cstr_len;
700 }
701 
702 
713 static void
714 sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
715 {
716  char *temp;
717 
718  if (GNUNET_YES == ret->null_flag)
719  ret->slen = 0;
720  ret->null_flag = GNUNET_NO;
721  temp = GNUNET_malloc(ret->slen + extra_chars + 1);
722  GNUNET_snprintf(temp,
723  ret->slen + extra_chars + 1,
724  format,
725  (int)ret->slen,
726  ret->sbuf);
728  ret->abuf = temp;
729  ret->sbuf = temp;
730  ret->blen = ret->slen + extra_chars + 1;
731  ret->slen = ret->slen + extra_chars;
732 }
733 
734 
744 static void
746  const char *format,
747  size_t extra_chars,
748  const struct StringBuffer *sarg)
749 {
750  if (ret->blen < sarg->slen + extra_chars + 1)
751  sb_realloc(ret, sarg->slen + extra_chars + 1);
752  ret->null_flag = GNUNET_NO;
753  ret->sbuf = ret->abuf;
754  ret->slen = sarg->slen + extra_chars;
755  GNUNET_snprintf(ret->sbuf, ret->blen, format, (int)sarg->slen, sarg->sbuf);
756 }
757 
758 
768 static void
770  const char *format,
771  size_t extra_chars,
772  const struct StringBuffer *sarg1,
773  const struct StringBuffer *sarg2)
774 {
775  if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
776  sb_realloc(ret, sarg1->slen + sarg2->slen + extra_chars + 1);
777  ret->null_flag = GNUNET_NO;
778  ret->slen = sarg1->slen + sarg2->slen + extra_chars;
779  ret->sbuf = ret->abuf;
780  GNUNET_snprintf(ret->sbuf,
781  ret->blen,
782  format,
783  (int)sarg1->slen,
784  sarg1->sbuf,
785  (int)sarg2->slen,
786  sarg2->sbuf);
787 }
788 
789 
801 static void
803  const char *format,
804  size_t extra_chars,
805  const struct StringBuffer *sarg1,
806  const struct StringBuffer *sarg2,
807  const struct StringBuffer *sarg3)
808 {
809  if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
810  sb_realloc(ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
811  ret->null_flag = GNUNET_NO;
812  ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
813  ret->sbuf = ret->abuf;
814  GNUNET_snprintf(ret->sbuf,
815  ret->blen,
816  format,
817  (int)sarg1->slen,
818  sarg1->sbuf,
819  (int)sarg2->slen,
820  sarg2->sbuf,
821  (int)sarg3->slen,
822  sarg3->sbuf);
823 }
824 
825 
832 static void
834 {
835  GNUNET_array_grow(sb->abuf, sb->blen, 0);
836  sb->slen = 0;
837  sb->sbuf = NULL;
838  sb->null_flag = GNUNET_YES;
839 }
840 
841 
848 static void
849 sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
850 
851 {
852  out->null_flag = in->null_flag;
853  if (GNUNET_YES == out->null_flag)
854  return;
855  if (out->blen < in->slen)
856  {
857  GNUNET_array_grow(out->abuf, out->blen, in->slen);
858  }
859  out->sbuf = out->abuf;
860  out->slen = in->slen;
861  GNUNET_memcpy(out->sbuf, in->sbuf, out->slen);
862 }
863 
864 
871 static void
872 sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
873 {
874  if (NULL == cstr)
875  {
876  out->null_flag = GNUNET_YES;
877  return;
878  }
879  out->null_flag = GNUNET_NO;
880  out->slen = strlen(cstr);
881  if (out->blen < out->slen)
882  {
883  GNUNET_array_grow(out->abuf, out->blen, out->slen);
884  }
885  out->sbuf = out->abuf;
886  GNUNET_memcpy(out->sbuf, cstr, out->slen);
887 }
888 
889 
898 static int
899 needs_parentheses(const struct StringBuffer *str)
900 {
901  size_t slen;
902  const char *op;
903  const char *cl;
904  const char *pos;
905  const char *end;
906  unsigned int cnt;
907 
908  if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
909  return GNUNET_NO;
910  pos = str->sbuf;
911  if ('(' != pos[0])
912  return GNUNET_YES;
913  end = str->sbuf + slen;
914  cnt = 1;
915  pos++;
916  while (cnt > 0)
917  {
918  cl = memchr(pos, ')', end - pos);
919  if (NULL == cl)
920  {
921  GNUNET_break(0);
922  return GNUNET_YES;
923  }
924  /* while '(' before ')', count opening parens */
925  while ((NULL != (op = memchr(pos, '(', end - pos))) && (op < cl))
926  {
927  cnt++;
928  pos = op + 1;
929  }
930  /* got ')' first */
931  cnt--;
932  pos = cl + 1;
933  }
934  return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
935 }
936 
937 
947 static void
949 {
950  size_t slen;
951  const char *pos;
952  const char *end;
953  const char *sbuf;
954  const char *op;
955  const char *cp;
956  unsigned int cnt;
957 
958  if (0)
959  return;
960  sbuf = str->sbuf;
961  if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
962  ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
963  return;
964  cnt = 0;
965  pos = &sbuf[1];
966  end = &sbuf[slen - 1];
967  op = memchr(pos, '(', end - pos);
968  cp = memchr(pos, ')', end - pos);
969  while (NULL != cp)
970  {
971  while ((NULL != op) && (op < cp))
972  {
973  cnt++;
974  pos = op + 1;
975  op = memchr(pos, '(', end - pos);
976  }
977  while ((NULL != cp) && ((NULL == op) || (cp < op)))
978  {
979  if (0 == cnt)
980  return; /* can't strip parens */
981  cnt--;
982  pos = cp + 1;
983  cp = memchr(pos, ')', end - pos);
984  }
985  }
986  if (0 != cnt)
987  {
988  GNUNET_break(0);
989  return;
990  }
991  str->sbuf++;
992  str->slen -= 2;
993 }
994 
995 
1004 static int
1005 has_epsilon(const struct StringBuffer *str)
1006 {
1007  return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
1008  ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1009  (')' == str->sbuf[str->slen - 1]);
1010 }
1011 
1012 
1022 static void
1023 remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
1024 {
1025  if (GNUNET_YES == str->null_flag)
1026  {
1027  ret->null_flag = GNUNET_YES;
1028  return;
1029  }
1030  if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1031  (')' == str->sbuf[str->slen - 1]))
1032  {
1033  /* remove epsilon */
1034  if (ret->blen < str->slen - 3)
1035  {
1036  GNUNET_array_grow(ret->abuf, ret->blen, str->slen - 3);
1037  }
1038  ret->sbuf = ret->abuf;
1039  ret->slen = str->slen - 3;
1040  GNUNET_memcpy(ret->sbuf, &str->sbuf[2], ret->slen);
1041  return;
1042  }
1043  sb_strdup(ret, str);
1044 }
1045 
1046 
1056 static int
1057 sb_strncmp(const struct StringBuffer *str1,
1058  const struct StringBuffer *str2,
1059  size_t n)
1060 {
1061  size_t max;
1062 
1063  if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1064  return -1;
1065  max = GNUNET_MAX(str1->slen, str2->slen);
1066  if (max > n)
1067  max = n;
1068  return memcmp(str1->sbuf, str2->sbuf, max);
1069 }
1070 
1071 
1081 static int
1082 sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
1083 {
1084  if (str1->slen < n)
1085  return -1;
1086  return memcmp(str1->sbuf, str2, n);
1087 }
1088 
1089 
1097 static void
1098 sb_init(struct StringBuffer *sb, size_t n)
1099 {
1100  sb->null_flag = GNUNET_NO;
1101  sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc(n);
1102  sb->blen = n;
1103  sb->slen = 0;
1104 }
1105 
1106 
1116 static int
1117 sb_strkcmp(const struct StringBuffer *str1,
1118  const struct StringBuffer *str2,
1119  size_t k)
1120 {
1121  if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1122  (k > str1->slen) || (str1->slen - k != str2->slen))
1123  return -1;
1124  return memcmp(&str1->sbuf[k], str2->sbuf, str2->slen);
1125 }
1126 
1127 
1136 static void
1137 number_states(void *cls,
1138  const unsigned int count,
1139  struct REGEX_INTERNAL_State *s)
1140 {
1141  struct REGEX_INTERNAL_State **states = cls;
1142 
1143  s->dfs_id = count;
1144  if (NULL != states)
1145  states[count] = s;
1146 }
1147 
1148 
1149 #define PRIS(a) \
1150  ((GNUNET_YES == a.null_flag) ? 6 : (int)a.slen), \
1151  ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1152 
1153 
1168 static void
1170  const struct StringBuffer *R_last_ik,
1171  const struct StringBuffer *R_last_kk,
1172  const struct StringBuffer *R_last_kj,
1173  struct StringBuffer *R_cur_ij,
1174  struct StringBuffer *R_cur_l,
1175  struct StringBuffer *R_cur_r)
1176 {
1177  struct StringBuffer R_temp_ij;
1178  struct StringBuffer R_temp_ik;
1179  struct StringBuffer R_temp_kj;
1180  struct StringBuffer R_temp_kk;
1181  int eps_check;
1182  int ij_ik_cmp;
1183  int ij_kj_cmp;
1184  int ik_kk_cmp;
1185  int kk_kj_cmp;
1186  int clean_ik_kk_cmp;
1187  int clean_kk_kj_cmp;
1188  size_t length;
1189  size_t length_l;
1190  size_t length_r;
1191 
1192  /*
1193  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1194  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1195  * R_cur_ij = R_cur_l | R_cur_r
1196  * R_cur_l == R^{(k-1)}_{ij}
1197  * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1198  */
1199 
1200  if ((GNUNET_YES == R_last_ij->null_flag) &&
1201  ((GNUNET_YES == R_last_ik->null_flag) ||
1202  (GNUNET_YES == R_last_kj->null_flag)))
1203  {
1204  /* R^{(k)}_{ij} = N | N */
1205  R_cur_ij->null_flag = GNUNET_YES;
1206  R_cur_ij->synced = GNUNET_NO;
1207  return;
1208  }
1209 
1210  if ((GNUNET_YES == R_last_ik->null_flag) ||
1211  (GNUNET_YES == R_last_kj->null_flag))
1212  {
1213  /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1214  if (GNUNET_YES == R_last_ij->synced)
1215  {
1216  R_cur_ij->synced = GNUNET_YES;
1217  R_cur_ij->null_flag = GNUNET_NO;
1218  return;
1219  }
1220  R_cur_ij->synced = GNUNET_YES;
1221  sb_strdup(R_cur_ij, R_last_ij);
1222  return;
1223  }
1224  R_cur_ij->synced = GNUNET_NO;
1225 
1226  /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1227  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1228 
1229  R_cur_r->null_flag = GNUNET_YES;
1230  R_cur_r->slen = 0;
1231  R_cur_l->null_flag = GNUNET_YES;
1232  R_cur_l->slen = 0;
1233 
1234  /* cache results from strcmp, we might need these many times */
1235  ij_kj_cmp = sb_nullstrcmp(R_last_ij, R_last_kj);
1236  ij_ik_cmp = sb_nullstrcmp(R_last_ij, R_last_ik);
1237  ik_kk_cmp = sb_nullstrcmp(R_last_ik, R_last_kk);
1238  kk_kj_cmp = sb_nullstrcmp(R_last_kk, R_last_kj);
1239 
1240  /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1241  * as parentheses, so we can better compare the contents */
1242 
1243  memset(&R_temp_ij, 0, sizeof(struct StringBuffer));
1244  memset(&R_temp_ik, 0, sizeof(struct StringBuffer));
1245  memset(&R_temp_kk, 0, sizeof(struct StringBuffer));
1246  memset(&R_temp_kj, 0, sizeof(struct StringBuffer));
1247  remove_epsilon(R_last_ik, &R_temp_ik);
1248  remove_epsilon(R_last_kk, &R_temp_kk);
1249  remove_epsilon(R_last_kj, &R_temp_kj);
1250  remove_parentheses(&R_temp_ik);
1251  remove_parentheses(&R_temp_kk);
1252  remove_parentheses(&R_temp_kj);
1253  clean_ik_kk_cmp = sb_nullstrcmp(R_last_ik, &R_temp_kk);
1254  clean_kk_kj_cmp = sb_nullstrcmp(&R_temp_kk, R_last_kj);
1255 
1256  /* construct R_cur_l (and, if necessary R_cur_r) */
1257  if (GNUNET_YES != R_last_ij->null_flag)
1258  {
1259  /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1260  * as parentheses, so we can better compare the contents */
1261  remove_epsilon(R_last_ij, &R_temp_ij);
1262  remove_parentheses(&R_temp_ij);
1263 
1264  if ((0 == sb_strcmp(&R_temp_ij, &R_temp_ik)) &&
1265  (0 == sb_strcmp(&R_temp_ik, &R_temp_kk)) &&
1266  (0 == sb_strcmp(&R_temp_kk, &R_temp_kj)))
1267  {
1268  if (0 == R_temp_ij.slen)
1269  {
1270  R_cur_r->null_flag = GNUNET_NO;
1271  }
1272  else if ((0 == sb_strncmp_cstr(R_last_ij, "(|", 2)) ||
1273  (0 == sb_strncmp_cstr(R_last_ik, "(|", 2) &&
1274  0 == sb_strncmp_cstr(R_last_kj, "(|", 2)))
1275  {
1276  /*
1277  * a|(e|a)a*(e|a) = a*
1278  * a|(e|a)(e|a)*(e|a) = a*
1279  * (e|a)|aa*a = a*
1280  * (e|a)|aa*(e|a) = a*
1281  * (e|a)|(e|a)a*a = a*
1282  * (e|a)|(e|a)a*(e|a) = a*
1283  * (e|a)|(e|a)(e|a)*(e|a) = a*
1284  */
1285  if (GNUNET_YES == needs_parentheses(&R_temp_ij))
1286  sb_printf1(R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1287  else
1288  sb_printf1(R_cur_r, "%.*s*", 1, &R_temp_ij);
1289  }
1290  else
1291  {
1292  /*
1293  * a|aa*a = a+
1294  * a|(e|a)a*a = a+
1295  * a|aa*(e|a) = a+
1296  * a|(e|a)(e|a)*a = a+
1297  * a|a(e|a)*(e|a) = a+
1298  */
1299  if (GNUNET_YES == needs_parentheses(&R_temp_ij))
1300  sb_printf1(R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1301  else
1302  sb_printf1(R_cur_r, "%.*s+", 1, &R_temp_ij);
1303  }
1304  }
1305  else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1306  (0 != clean_ik_kk_cmp))
1307  {
1308  /* a|ab*b = ab* */
1309  if (0 == R_last_kk->slen)
1310  sb_strdup(R_cur_r, R_last_ij);
1311  else if (GNUNET_YES == needs_parentheses(&R_temp_kk))
1312  sb_printf2(R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1313  else
1314  sb_printf2(R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1315  R_cur_l->null_flag = GNUNET_YES;
1316  }
1317  else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1318  (0 != clean_kk_kj_cmp))
1319  {
1320  /* a|bb*a = b*a */
1321  if (R_last_kk->slen < 1)
1322  {
1323  sb_strdup(R_cur_r, R_last_kj);
1324  }
1325  else if (GNUNET_YES == needs_parentheses(&R_temp_kk))
1326  sb_printf2(R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1327  else
1328  sb_printf2(R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1329 
1330  R_cur_l->null_flag = GNUNET_YES;
1331  }
1332  else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1333  (!has_epsilon(R_last_ij)) && has_epsilon(R_last_kk))
1334  {
1335  /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1336  if (needs_parentheses(&R_temp_kk))
1337  sb_printf2(R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1338  else
1339  sb_printf2(R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1340  R_cur_l->null_flag = GNUNET_YES;
1341  }
1342  else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1343  (!has_epsilon(R_last_ij)) && has_epsilon(R_last_kk))
1344  {
1345  /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1346  if (needs_parentheses(&R_temp_kk))
1347  sb_printf2(R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1348  else
1349  sb_printf2(R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1350  R_cur_l->null_flag = GNUNET_YES;
1351  }
1352  else
1353  {
1354  sb_strdup(R_cur_l, R_last_ij);
1355  remove_parentheses(R_cur_l);
1356  }
1357  }
1358  else
1359  {
1360  /* we have no left side */
1361  R_cur_l->null_flag = GNUNET_YES;
1362  }
1363 
1364  /* construct R_cur_r, if not already constructed */
1365  if (GNUNET_YES == R_cur_r->null_flag)
1366  {
1367  length = R_temp_kk.slen - R_last_ik->slen;
1368 
1369  /* a(ba)*bx = (ab)+x */
1370  if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1371  (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1372  (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1373  (0 < R_last_ik->slen) &&
1374  (0 == sb_strkcmp(&R_temp_kk, R_last_ik, length)) &&
1375  (0 == sb_strncmp(&R_temp_kk, R_last_kj, length)))
1376  {
1377  struct StringBuffer temp_a;
1378  struct StringBuffer temp_b;
1379 
1380  sb_init(&temp_a, length);
1381  sb_init(&temp_b, R_last_kj->slen - length);
1382 
1383  length_l = length;
1384  temp_a.sbuf = temp_a.abuf;
1385  GNUNET_memcpy(temp_a.sbuf, R_last_kj->sbuf, length_l);
1386  temp_a.slen = length_l;
1387 
1388  length_r = R_last_kj->slen - length;
1389  temp_b.sbuf = temp_b.abuf;
1390  GNUNET_memcpy(temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1391  temp_b.slen = length_r;
1392 
1393  /* e|(ab)+ = (ab)* */
1394  if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1395  (0 == temp_b.slen))
1396  {
1397  sb_printf2(R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1398  sb_free(R_cur_l);
1399  R_cur_l->null_flag = GNUNET_YES;
1400  }
1401  else
1402  {
1403  sb_printf3(R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1404  }
1405  sb_free(&temp_a);
1406  sb_free(&temp_b);
1407  }
1408  else if (0 == sb_strcmp(&R_temp_ik, &R_temp_kk) &&
1409  0 == sb_strcmp(&R_temp_kk, &R_temp_kj))
1410  {
1411  /*
1412  * (e|a)a*(e|a) = a*
1413  * (e|a)(e|a)*(e|a) = a*
1414  */
1415  if (has_epsilon(R_last_ik) && has_epsilon(R_last_kj))
1416  {
1417  if (needs_parentheses(&R_temp_kk))
1418  sb_printf1(R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1419  else
1420  sb_printf1(R_cur_r, "%.*s*", 1, &R_temp_kk);
1421  }
1422  /* aa*a = a+a */
1423  else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1424  (!has_epsilon(R_last_ik)))
1425  {
1426  if (needs_parentheses(&R_temp_kk))
1427  sb_printf2(R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1428  else
1429  sb_printf2(R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1430  }
1431  /*
1432  * (e|a)a*a = a+
1433  * aa*(e|a) = a+
1434  * a(e|a)*(e|a) = a+
1435  * (e|a)a*a = a+
1436  */
1437  else
1438  {
1439  eps_check = (has_epsilon(R_last_ik) + has_epsilon(R_last_kk) +
1440  has_epsilon(R_last_kj));
1441 
1442  if (1 == eps_check)
1443  {
1444  if (needs_parentheses(&R_temp_kk))
1445  sb_printf1(R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1446  else
1447  sb_printf1(R_cur_r, "%.*s+", 1, &R_temp_kk);
1448  }
1449  }
1450  }
1451  /*
1452  * aa*b = a+b
1453  * (e|a)(e|a)*b = a*b
1454  */
1455  else if (0 == sb_strcmp(&R_temp_ik, &R_temp_kk))
1456  {
1457  if (has_epsilon(R_last_ik))
1458  {
1459  if (needs_parentheses(&R_temp_kk))
1460  sb_printf2(R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1461  else
1462  sb_printf2(R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1463  }
1464  else
1465  {
1466  if (needs_parentheses(&R_temp_kk))
1467  sb_printf2(R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1468  else
1469  sb_printf2(R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1470  }
1471  }
1472  /*
1473  * ba*a = ba+
1474  * b(e|a)*(e|a) = ba*
1475  */
1476  else if (0 == sb_strcmp(&R_temp_kk, &R_temp_kj))
1477  {
1478  if (has_epsilon(R_last_kj))
1479  {
1480  if (needs_parentheses(&R_temp_kk))
1481  sb_printf2(R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1482  else
1483  sb_printf2(R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1484  }
1485  else
1486  {
1487  if (needs_parentheses(&R_temp_kk))
1488  sb_printf2(R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1489  else
1490  sb_printf2(R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1491  }
1492  }
1493  else
1494  {
1495  if (0 < R_temp_kk.slen)
1496  {
1497  if (needs_parentheses(&R_temp_kk))
1498  {
1499  sb_printf3(R_cur_r,
1500  "%.*s(%.*s)*%.*s",
1501  3,
1502  R_last_ik,
1503  &R_temp_kk,
1504  R_last_kj);
1505  }
1506  else
1507  {
1508  sb_printf3(R_cur_r,
1509  "%.*s%.*s*%.*s",
1510  1,
1511  R_last_ik,
1512  &R_temp_kk,
1513  R_last_kj);
1514  }
1515  }
1516  else
1517  {
1518  sb_printf2(R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1519  }
1520  }
1521  }
1522  sb_free(&R_temp_ij);
1523  sb_free(&R_temp_ik);
1524  sb_free(&R_temp_kk);
1525  sb_free(&R_temp_kj);
1526 
1527  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1528  {
1529  R_cur_ij->null_flag = GNUNET_YES;
1530  return;
1531  }
1532 
1533  if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1534  {
1535  struct StringBuffer tmp;
1536 
1537  tmp = *R_cur_ij;
1538  *R_cur_ij = *R_cur_l;
1539  *R_cur_l = tmp;
1540  return;
1541  }
1542 
1543  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1544  {
1545  struct StringBuffer tmp;
1546 
1547  tmp = *R_cur_ij;
1548  *R_cur_ij = *R_cur_r;
1549  *R_cur_r = tmp;
1550  return;
1551  }
1552 
1553  if (0 == sb_nullstrcmp(R_cur_l, R_cur_r))
1554  {
1555  struct StringBuffer tmp;
1556 
1557  tmp = *R_cur_ij;
1558  *R_cur_ij = *R_cur_l;
1559  *R_cur_l = tmp;
1560  return;
1561  }
1562  sb_printf2(R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1563 }
1564 
1565 
1577 static int
1579 {
1580  unsigned int n = a->state_count;
1581  struct REGEX_INTERNAL_State *states[n];
1582  struct StringBuffer *R_last;
1583  struct StringBuffer *R_cur;
1584  struct StringBuffer R_cur_r;
1585  struct StringBuffer R_cur_l;
1586  struct StringBuffer *R_swap;
1587  struct REGEX_INTERNAL_Transition *t;
1588  struct StringBuffer complete_regex;
1589  unsigned int i;
1590  unsigned int j;
1591  unsigned int k;
1592 
1593  R_last = GNUNET_malloc_large(sizeof(struct StringBuffer) * n * n);
1594  R_cur = GNUNET_malloc_large(sizeof(struct StringBuffer) * n * n);
1595  if ((NULL == R_last) || (NULL == R_cur))
1596  {
1598  GNUNET_free_non_null(R_cur);
1599  GNUNET_free_non_null(R_last);
1600  return GNUNET_SYSERR;
1601  }
1602 
1603  /* create depth-first numbering of the states, initializes 'state' */
1605  a->start,
1606  NULL,
1607  NULL,
1608  &number_states,
1609  states);
1610 
1611  for (i = 0; i < n; i++)
1612  GNUNET_assert(NULL != states[i]);
1613  for (i = 0; i < n; i++)
1614  for (j = 0; j < n; j++)
1615  R_last[i * n + j].null_flag = GNUNET_YES;
1616 
1617  /* Compute regular expressions of length "1" between each pair of states */
1618  for (i = 0; i < n; i++)
1619  {
1620  for (t = states[i]->transitions_head; NULL != t; t = t->next)
1621  {
1622  j = t->to_state->dfs_id;
1623  if (GNUNET_YES == R_last[i * n + j].null_flag)
1624  {
1625  sb_strdup_cstr(&R_last[i * n + j], t->label);
1626  }
1627  else
1628  {
1629  sb_append_cstr(&R_last[i * n + j], "|");
1630  sb_append_cstr(&R_last[i * n + j], t->label);
1631  }
1632  }
1633  /* add self-loop: i is reachable from i via epsilon-transition */
1634  if (GNUNET_YES == R_last[i * n + i].null_flag)
1635  {
1636  R_last[i * n + i].slen = 0;
1637  R_last[i * n + i].null_flag = GNUNET_NO;
1638  }
1639  else
1640  {
1641  sb_wrap(&R_last[i * n + i], "(|%.*s)", 3);
1642  }
1643  }
1644  for (i = 0; i < n; i++)
1645  for (j = 0; j < n; j++)
1646  if (needs_parentheses(&R_last[i * n + j]))
1647  sb_wrap(&R_last[i * n + j], "(%.*s)", 2);
1648  /* Compute regular expressions of length "k" between each pair of states per
1649  * induction */
1650  memset(&R_cur_l, 0, sizeof(struct StringBuffer));
1651  memset(&R_cur_r, 0, sizeof(struct StringBuffer));
1652  for (k = 0; k < n; k++)
1653  {
1654  for (i = 0; i < n; i++)
1655  {
1656  for (j = 0; j < n; j++)
1657  {
1658  /* Basis for the recursion:
1659  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1660  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1661  */
1662 
1663  /* Create R_cur[i][j] and simplify the expression */
1664  automaton_create_proofs_simplify(&R_last[i * n + j],
1665  &R_last[i * n + k],
1666  &R_last[k * n + k],
1667  &R_last[k * n + j],
1668  &R_cur[i * n + j],
1669  &R_cur_l,
1670  &R_cur_r);
1671  }
1672  }
1673  /* set R_last = R_cur */
1674  R_swap = R_last;
1675  R_last = R_cur;
1676  R_cur = R_swap;
1677  /* clear 'R_cur' for next iteration */
1678  for (i = 0; i < n; i++)
1679  for (j = 0; j < n; j++)
1680  R_cur[i * n + j].null_flag = GNUNET_YES;
1681  }
1682  sb_free(&R_cur_l);
1683  sb_free(&R_cur_r);
1684  /* assign proofs and hashes */
1685  for (i = 0; i < n; i++)
1686  {
1687  if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1688  {
1689  states[i]->proof = GNUNET_strndup(R_last[a->start->dfs_id * n + i].sbuf,
1690  R_last[a->start->dfs_id * n + i].slen);
1691  GNUNET_CRYPTO_hash(states[i]->proof,
1692  strlen(states[i]->proof),
1693  &states[i]->hash);
1694  }
1695  }
1696 
1697  /* complete regex for whole DFA: union of all pairs (start state/accepting
1698  * state(s)). */
1699  sb_init(&complete_regex, 16 * n);
1700  for (i = 0; i < n; i++)
1701  {
1702  if (states[i]->accepting)
1703  {
1704  if ((0 == complete_regex.slen) &&
1705  (0 < R_last[a->start->dfs_id * n + i].slen))
1706  {
1707  sb_append(&complete_regex, &R_last[a->start->dfs_id * n + i]);
1708  }
1709  else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1710  (0 < R_last[a->start->dfs_id * n + i].slen))
1711  {
1712  sb_append_cstr(&complete_regex, "|");
1713  sb_append(&complete_regex, &R_last[a->start->dfs_id * n + i]);
1714  }
1715  }
1716  }
1717  a->canonical_regex =
1718  GNUNET_strndup(complete_regex.sbuf, complete_regex.slen);
1719 
1720  /* cleanup */
1721  sb_free(&complete_regex);
1722  for (i = 0; i < n; i++)
1723  for (j = 0; j < n; j++)
1724  {
1725  sb_free(&R_cur[i * n + j]);
1726  sb_free(&R_last[i * n + j]);
1727  }
1728  GNUNET_free(R_cur);
1729  GNUNET_free(R_last);
1730  return GNUNET_OK;
1731 }
1732 
1733 
1743 static struct REGEX_INTERNAL_State *
1745  struct REGEX_INTERNAL_StateSet *nfa_states)
1746 {
1747  struct REGEX_INTERNAL_State *s;
1748  char *pos;
1749  size_t len;
1750  struct REGEX_INTERNAL_State *cstate;
1751  struct REGEX_INTERNAL_Transition *ctran;
1752  unsigned int i;
1753 
1754  s = GNUNET_new(struct REGEX_INTERNAL_State);
1755  s->id = ctx->state_id++;
1756  s->index = -1;
1757  s->lowlink = -1;
1758 
1759  if (NULL == nfa_states)
1760  {
1761  GNUNET_asprintf(&s->name, "s%i", s->id);
1762  return s;
1763  }
1764 
1765  s->nfa_set = *nfa_states;
1766 
1767  if (nfa_states->off < 1)
1768  return s;
1769 
1770  /* Create a name based on 'nfa_states' */
1771  len = nfa_states->off * 14 + 4;
1772  s->name = GNUNET_malloc(len);
1773  strcat(s->name, "{");
1774  pos = s->name + 1;
1775 
1776  for (i = 0; i < nfa_states->off; i++)
1777  {
1778  cstate = nfa_states->states[i];
1779  GNUNET_snprintf(pos, pos - s->name + len, "%i,", cstate->id);
1780  pos += strlen(pos);
1781 
1782  /* Add a transition for each distinct label to NULL state */
1783  for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1784  if (NULL != ctran->label)
1785  state_add_transition(ctx, s, ctran->label, NULL);
1786 
1787  /* If the nfa_states contain an accepting state, the new dfa state is also
1788  * accepting. */
1789  if (cstate->accepting)
1790  s->accepting = 1;
1791  }
1792  pos[-1] = '}';
1793  s->name = GNUNET_realloc(s->name, strlen(s->name) + 1);
1794 
1795  memset(nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1796  return s;
1797 }
1798 
1799 
1813 static unsigned int
1814 dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
1815 {
1816  struct REGEX_INTERNAL_Transition *t;
1817  struct REGEX_INTERNAL_State *new_s;
1818  unsigned int len;
1819  unsigned int max_len;
1820 
1821  if (NULL == s)
1822  return 0;
1823 
1824  new_s = NULL;
1825  max_len = 0;
1826  for (t = (*s)->transitions_head; NULL != t; t = t->next)
1827  {
1828  len = strlen(t->label);
1829 
1830  if (0 == strncmp(t->label, str, len))
1831  {
1832  if (len >= max_len)
1833  {
1834  max_len = len;
1835  new_s = t->to_state;
1836  }
1837  }
1838  }
1839 
1840  *s = new_s;
1841  return max_len;
1842 }
1843 
1844 
1854 static void
1855 mark_states(void *cls,
1856  const unsigned int count,
1857  struct REGEX_INTERNAL_State *s)
1858 {
1859  s->marked = GNUNET_YES;
1860 }
1861 
1862 
1869 static void
1871 {
1872  struct REGEX_INTERNAL_State *s;
1873  struct REGEX_INTERNAL_State *s_next;
1874 
1875  /* 1. unmark all states */
1876  for (s = a->states_head; NULL != s; s = s->next)
1877  s->marked = GNUNET_NO;
1878 
1879  /* 2. traverse dfa from start state and mark all visited states */
1881  a->start,
1882  NULL,
1883  NULL,
1884  &mark_states,
1885  NULL);
1886 
1887  /* 3. delete all states that were not visited */
1888  for (s = a->states_head; NULL != s; s = s_next)
1889  {
1890  s_next = s->next;
1891  if (GNUNET_NO == s->marked)
1892  automaton_remove_state(a, s);
1893  }
1894 }
1895 
1896 
1903 static void
1905 {
1906  struct REGEX_INTERNAL_State *s;
1907  struct REGEX_INTERNAL_State *s_next;
1908  struct REGEX_INTERNAL_Transition *t;
1909  int dead;
1910 
1911  GNUNET_assert(DFA == a->type);
1912 
1913  for (s = a->states_head; NULL != s; s = s_next)
1914  {
1915  s_next = s->next;
1916 
1917  if (s->accepting)
1918  continue;
1919 
1920  dead = 1;
1921  for (t = s->transitions_head; NULL != t; t = t->next)
1922  {
1923  if (NULL != t->to_state && t->to_state != s)
1924  {
1925  dead = 0;
1926  break;
1927  }
1928  }
1929 
1930  if (0 == dead)
1931  continue;
1932 
1933  /* state s is dead, remove it */
1934  automaton_remove_state(a, s);
1935  }
1936 }
1937 
1938 
1946 static int
1948  struct REGEX_INTERNAL_Automaton *a)
1949 {
1950  uint32_t *table;
1951  struct REGEX_INTERNAL_State *s1;
1952  struct REGEX_INTERNAL_State *s2;
1953  struct REGEX_INTERNAL_Transition *t1;
1954  struct REGEX_INTERNAL_Transition *t2;
1955  struct REGEX_INTERNAL_State *s1_next;
1956  struct REGEX_INTERNAL_State *s2_next;
1957  int change;
1958  unsigned int num_equal_edges;
1959  unsigned int i;
1960  unsigned int state_cnt;
1961  unsigned long long idx;
1962  unsigned long long idx1;
1963 
1964  if ((NULL == a) || (0 == a->state_count))
1965  {
1967  "Could not merge nondistinguishable states, automaton was NULL.\n");
1968  return GNUNET_SYSERR;
1969  }
1970 
1971  state_cnt = a->state_count;
1972  table = GNUNET_malloc_large(
1973  (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1974  if (NULL == table)
1975  {
1977  return GNUNET_SYSERR;
1978  }
1979 
1980  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1981  s1->marked = i++;
1982 
1983  /* Mark all pairs of accepting/!accepting states */
1984  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1985  for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1986  if ((s1->accepting && !s2->accepting) ||
1987  (!s1->accepting && s2->accepting))
1988  {
1989  idx = (unsigned long long)s1->marked * state_cnt + s2->marked;
1990  table[idx / 32] |= (1U << (idx % 32));
1991  }
1992 
1993  /* Find all equal states */
1994  change = 1;
1995  while (0 != change)
1996  {
1997  change = 0;
1998  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1999  {
2000  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
2001  {
2002  idx = (unsigned long long)s1->marked * state_cnt + s2->marked;
2003  if (0 != (table[idx / 32] & (1U << (idx % 32))))
2004  continue;
2005  num_equal_edges = 0;
2006  for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2007  {
2008  for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2009  {
2010  if (0 == strcmp(t1->label, t2->label))
2011  {
2012  num_equal_edges++;
2013  /* same edge, but targets definitively different, so we're different
2014  as well */
2015  if (t1->to_state->marked > t2->to_state->marked)
2016  idx1 = (unsigned long long)t1->to_state->marked * state_cnt +
2017  t2->to_state->marked;
2018  else
2019  idx1 = (unsigned long long)t2->to_state->marked * state_cnt +
2020  t1->to_state->marked;
2021  if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2022  {
2023  table[idx / 32] |= (1U << (idx % 32));
2024  change = 1; /* changed a marker, need to run again */
2025  }
2026  }
2027  }
2028  }
2029  if ((num_equal_edges != s1->transition_count) ||
2030  (num_equal_edges != s2->transition_count))
2031  {
2032  /* Make sure ALL edges of possible equal states are the same */
2033  table[idx / 32] |= (1U << (idx % 32));
2034  change = 1; /* changed a marker, need to run again */
2035  }
2036  }
2037  }
2038  }
2039 
2040  /* Merge states that are equal */
2041  for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2042  {
2043  s1_next = s1->next;
2044  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2045  {
2046  s2_next = s2->next;
2047  idx = (unsigned long long)s1->marked * state_cnt + s2->marked;
2048  if (0 == (table[idx / 32] & (1U << (idx % 32))))
2049  automaton_merge_states(ctx, a, s1, s2);
2050  }
2051  }
2052 
2053  GNUNET_free(table);
2054  return GNUNET_OK;
2055 }
2056 
2057 
2066 static int
2068  struct REGEX_INTERNAL_Automaton *a)
2069 {
2070  if (NULL == a)
2071  return GNUNET_SYSERR;
2072 
2073  GNUNET_assert(DFA == a->type);
2074 
2075  /* 1. remove unreachable states */
2077 
2078  /* 2. remove dead states */
2080 
2081  /* 3. Merge nondistinguishable states */
2083  return GNUNET_SYSERR;
2084  return GNUNET_OK;
2085 }
2086 
2087 
2095  const unsigned int stride;
2096 
2102 
2107 };
2108 
2109 
2120 static void
2122  const unsigned int depth,
2123  char *label,
2124  struct REGEX_INTERNAL_State *start,
2125  struct REGEX_INTERNAL_State *s)
2126 {
2127  struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2128  struct REGEX_INTERNAL_Transition *t;
2129  char *new_label;
2130 
2131  if (depth == ctx->stride)
2132  {
2134  t->label = GNUNET_strdup(label);
2135  t->to_state = s;
2136  t->from_state = start;
2138  ctx->transitions_tail,
2139  t);
2140  }
2141  else
2142  {
2143  for (t = s->transitions_head; NULL != t; t = t->next)
2144  {
2145  /* Do not consider self-loops, because it end's up in too many
2146  * transitions */
2147  if (t->to_state == t->from_state)
2148  continue;
2149 
2150  if (NULL != label)
2151  {
2152  GNUNET_asprintf(&new_label, "%s%s", label, t->label);
2153  }
2154  else
2155  new_label = GNUNET_strdup(t->label);
2156 
2158  (depth + 1),
2159  new_label,
2160  start,
2161  t->to_state);
2162  }
2163  }
2164  GNUNET_free_non_null(label);
2165 }
2166 
2167 
2176 static void
2178  const unsigned int count,
2179  struct REGEX_INTERNAL_State *s)
2180 {
2181  dfa_add_multi_strides_helper(cls, 0, NULL, s, s);
2182 }
2183 
2184 
2192 void
2194  struct REGEX_INTERNAL_Automaton *dfa,
2195  const unsigned int stride_len)
2196 {
2197  struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2198  struct REGEX_INTERNAL_Transition *t;
2199  struct REGEX_INTERNAL_Transition *t_next;
2200 
2201  if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
2202  return;
2203 
2204  /* Compute the new transitions of given stride_len */
2206  dfa->start,
2207  NULL,
2208  NULL,
2210  &ctx);
2211 
2212  /* Add all the new transitions to the automaton. */
2213  for (t = ctx.transitions_head; NULL != t; t = t_next)
2214  {
2215  t_next = t->next;
2216  state_add_transition(regex_ctx, t->from_state, t->label, t->to_state);
2219  GNUNET_free(t);
2220  }
2221 
2222  /* Mark this automaton as multistrided */
2223  dfa->is_multistrided = GNUNET_YES;
2224 }
2225 
2239 void
2241  struct REGEX_INTERNAL_State *start,
2242  struct REGEX_INTERNAL_State *cur,
2243  char *label,
2244  unsigned int max_len,
2245  struct REGEX_INTERNAL_Transition **transitions_head,
2246  struct REGEX_INTERNAL_Transition **transitions_tail)
2247 {
2248  struct REGEX_INTERNAL_Transition *t;
2249  char *new_label;
2250 
2251 
2252  if (NULL != label &&
2253  ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
2254  GNUNET_YES == cur->marked) ||
2255  (start != dfa->start && max_len > 0 && max_len == strlen(label)) ||
2256  (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen(label))))
2257  {
2259  t->label = GNUNET_strdup(label);
2260  t->to_state = cur;
2261  t->from_state = start;
2262  GNUNET_CONTAINER_DLL_insert(*transitions_head, *transitions_tail, t);
2263 
2264  if (GNUNET_NO == cur->marked)
2265  {
2267  cur,
2268  cur,
2269  NULL,
2270  max_len,
2271  transitions_head,
2272  transitions_tail);
2273  }
2274  return;
2275  }
2276  else if (cur != start)
2277  cur->contained = GNUNET_YES;
2278 
2279  if (GNUNET_YES == cur->marked && cur != start)
2280  return;
2281 
2282  cur->marked = GNUNET_YES;
2283 
2284 
2285  for (t = cur->transitions_head; NULL != t; t = t->next)
2286  {
2287  if (NULL != label)
2288  GNUNET_asprintf(&new_label, "%s%s", label, t->label);
2289  else
2290  new_label = GNUNET_strdup(t->label);
2291 
2292  if (t->to_state != cur)
2293  {
2295  start,
2296  t->to_state,
2297  new_label,
2298  max_len,
2299  transitions_head,
2300  transitions_tail);
2301  }
2302  GNUNET_free(new_label);
2303  }
2304 }
2305 
2306 
2315 static void
2317  struct REGEX_INTERNAL_Automaton *dfa,
2318  unsigned int max_len)
2319 {
2320  struct REGEX_INTERNAL_State *s;
2321  struct REGEX_INTERNAL_State *s_next;
2322  struct REGEX_INTERNAL_Transition *t;
2323  struct REGEX_INTERNAL_Transition *t_next;
2324  struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2325  struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2326 
2327  if (NULL == dfa)
2328  return;
2329 
2330  /* Count the incoming transitions on each state. */
2331  for (s = dfa->states_head; NULL != s; s = s->next)
2332  {
2333  for (t = s->transitions_head; NULL != t; t = t->next)
2334  {
2335  if (NULL != t->to_state)
2337  }
2338  }
2339 
2340  /* Unmark all states. */
2341  for (s = dfa->states_head; NULL != s; s = s->next)
2342  {
2343  s->marked = GNUNET_NO;
2344  s->contained = GNUNET_NO;
2345  }
2346 
2347  /* Add strides and mark states that can be deleted. */
2349  dfa->start,
2350  dfa->start,
2351  NULL,
2352  max_len,
2353  &transitions_head,
2354  &transitions_tail);
2355 
2356  /* Add all the new transitions to the automaton. */
2357  for (t = transitions_head; NULL != t; t = t_next)
2358  {
2359  t_next = t->next;
2360  state_add_transition(regex_ctx, t->from_state, t->label, t->to_state);
2361  GNUNET_CONTAINER_DLL_remove(transitions_head, transitions_tail, t);
2363  GNUNET_free(t);
2364  }
2365 
2366  /* Remove marked states (including their incoming and outgoing transitions). */
2367  for (s = dfa->states_head; NULL != s; s = s_next)
2368  {
2369  s_next = s->next;
2370  if (GNUNET_YES == s->contained)
2371  automaton_remove_state(dfa, s);
2372  }
2373 }
2374 
2375 
2385 static struct REGEX_INTERNAL_Automaton *
2387  struct REGEX_INTERNAL_State *end)
2388 {
2389  struct REGEX_INTERNAL_Automaton *n;
2390 
2392 
2393  n->type = NFA;
2394  n->start = NULL;
2395  n->end = NULL;
2396  n->state_count = 0;
2397 
2398  if (NULL == start || NULL == end)
2399  return n;
2400 
2401  automaton_add_state(n, end);
2402  automaton_add_state(n, start);
2403 
2404  n->state_count = 2;
2405 
2406  n->start = start;
2407  n->end = end;
2408 
2409  return n;
2410 }
2411 
2412 
2420 static void
2424 {
2425  struct REGEX_INTERNAL_State *s;
2426 
2427  if (NULL == n || NULL == states_head)
2428  {
2429  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2430  return;
2431  }
2432 
2433  if (NULL == n->states_head)
2434  {
2435  n->states_head = states_head;
2436  n->states_tail = states_tail;
2437  return;
2438  }
2439 
2440  if (NULL != states_head)
2441  {
2442  n->states_tail->next = states_head;
2443  n->states_tail = states_tail;
2444  }
2445 
2446  for (s = states_head; NULL != s; s = s->next)
2447  n->state_count++;
2448 }
2449 
2450 
2459 static struct REGEX_INTERNAL_State *
2461 {
2462  struct REGEX_INTERNAL_State *s;
2463 
2464  s = GNUNET_new(struct REGEX_INTERNAL_State);
2465  s->id = ctx->state_id++;
2466  s->accepting = accepting;
2467  s->marked = GNUNET_NO;
2468  s->contained = 0;
2469  s->index = -1;
2470  s->lowlink = -1;
2471  s->scc_id = 0;
2472  s->name = NULL;
2473  GNUNET_asprintf(&s->name, "s%i", s->id);
2474 
2475  return s;
2476 }
2477 
2478 
2488 static void
2490  struct REGEX_INTERNAL_Automaton *nfa,
2491  struct REGEX_INTERNAL_StateSet *states,
2492  const char *label)
2493 {
2494  struct REGEX_INTERNAL_State *s;
2495  unsigned int i;
2496  struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2497  struct REGEX_INTERNAL_State *clsstate;
2498  struct REGEX_INTERNAL_State *currentstate;
2499  struct REGEX_INTERNAL_Transition *ctran;
2500 
2501  memset(ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2502  if (NULL == states)
2503  return;
2504 
2505  for (i = 0; i < states->off; i++)
2506  {
2507  s = states->states[i];
2508 
2509  /* Add start state to closure only for epsilon closure */
2510  if (NULL == label)
2511  state_set_append(ret, s);
2512 
2513  /* initialize work stack */
2514  cls_stack.head = NULL;
2515  cls_stack.tail = NULL;
2516  GNUNET_CONTAINER_MDLL_insert(ST, cls_stack.head, cls_stack.tail, s);
2517  cls_stack.len = 1;
2518 
2519  while (NULL != (currentstate = cls_stack.tail))
2520  {
2522  cls_stack.head,
2523  cls_stack.tail,
2524  currentstate);
2525  cls_stack.len--;
2526  for (ctran = currentstate->transitions_head; NULL != ctran;
2527  ctran = ctran->next)
2528  {
2529  if (NULL == (clsstate = ctran->to_state))
2530  continue;
2531  if (0 != clsstate->contained)
2532  continue;
2533  if (0 != nullstrcmp(label, ctran->label))
2534  continue;
2535  state_set_append(ret, clsstate);
2537  cls_stack.head,
2538  cls_stack.tail,
2539  clsstate);
2540  cls_stack.len++;
2541  clsstate->contained = 1;
2542  }
2543  }
2544  }
2545  for (i = 0; i < ret->off; i++)
2546  ret->states[i]->contained = 0;
2547 
2548  if (ret->off > 1)
2549  qsort(ret->states,
2550  ret->off,
2551  sizeof(struct REGEX_INTERNAL_State *),
2552  &state_compare);
2553 }
2554 
2555 
2561 static void
2563 {
2564  struct REGEX_INTERNAL_Automaton *a;
2565  struct REGEX_INTERNAL_Automaton *b;
2566  struct REGEX_INTERNAL_Automaton *new_nfa;
2567 
2568  b = ctx->stack_tail;
2569  GNUNET_assert(NULL != b);
2571  a = ctx->stack_tail;
2572  GNUNET_assert(NULL != a);
2574 
2575  state_add_transition(ctx, a->end, NULL, b->start);
2576  a->end->accepting = 0;
2577  b->end->accepting = 1;
2578 
2579  new_nfa = nfa_fragment_create(NULL, NULL);
2580  nfa_add_states(new_nfa, a->states_head, a->states_tail);
2581  nfa_add_states(new_nfa, b->states_head, b->states_tail);
2582  new_nfa->start = a->start;
2583  new_nfa->end = b->end;
2584  new_nfa->state_count += a->state_count + b->state_count;
2587 
2589 }
2590 
2591 
2597 static void
2599 {
2600  struct REGEX_INTERNAL_Automaton *a;
2601  struct REGEX_INTERNAL_Automaton *new_nfa;
2602  struct REGEX_INTERNAL_State *start;
2603  struct REGEX_INTERNAL_State *end;
2604 
2605  a = ctx->stack_tail;
2606 
2607  if (NULL == a)
2608  {
2609  GNUNET_log(
2611  "nfa_add_star_op failed, because there was no element on the stack");
2612  return;
2613  }
2614 
2616 
2617  start = nfa_state_create(ctx, 0);
2618  end = nfa_state_create(ctx, 1);
2619 
2620  state_add_transition(ctx, start, NULL, a->start);
2621  state_add_transition(ctx, start, NULL, end);
2622  state_add_transition(ctx, a->end, NULL, a->start);
2623  state_add_transition(ctx, a->end, NULL, end);
2624 
2625  a->end->accepting = 0;
2626  end->accepting = 1;
2627 
2628  new_nfa = nfa_fragment_create(start, end);
2629  nfa_add_states(new_nfa, a->states_head, a->states_tail);
2631 
2633 }
2634 
2635 
2641 static void
2643 {
2644  struct REGEX_INTERNAL_Automaton *a;
2645 
2646  a = ctx->stack_tail;
2647 
2648  if (NULL == a)
2649  {
2650  GNUNET_log(
2652  "nfa_add_plus_op failed, because there was no element on the stack");
2653  return;
2654  }
2655 
2657 
2658  state_add_transition(ctx, a->end, NULL, a->start);
2659 
2661 }
2662 
2663 
2669 static void
2671 {
2672  struct REGEX_INTERNAL_Automaton *a;
2673  struct REGEX_INTERNAL_Automaton *new_nfa;
2674  struct REGEX_INTERNAL_State *start;
2675  struct REGEX_INTERNAL_State *end;
2676 
2677  a = ctx->stack_tail;
2678  if (NULL == a)
2679  {
2680  GNUNET_log(
2682  "nfa_add_question_op failed, because there was no element on the stack");
2683  return;
2684  }
2685 
2687 
2688  start = nfa_state_create(ctx, 0);
2689  end = nfa_state_create(ctx, 1);
2690 
2691  state_add_transition(ctx, start, NULL, a->start);
2692  state_add_transition(ctx, start, NULL, end);
2693  state_add_transition(ctx, a->end, NULL, end);
2694 
2695  a->end->accepting = 0;
2696 
2697  new_nfa = nfa_fragment_create(start, end);
2698  nfa_add_states(new_nfa, a->states_head, a->states_tail);
2701 }
2702 
2703 
2710 static void
2712 {
2713  struct REGEX_INTERNAL_Automaton *a;
2714  struct REGEX_INTERNAL_Automaton *b;
2715  struct REGEX_INTERNAL_Automaton *new_nfa;
2716  struct REGEX_INTERNAL_State *start;
2717  struct REGEX_INTERNAL_State *end;
2718 
2719  b = ctx->stack_tail;
2720  GNUNET_assert(NULL != b);
2722  a = ctx->stack_tail;
2723  GNUNET_assert(NULL != a);
2725 
2726  start = nfa_state_create(ctx, 0);
2727  end = nfa_state_create(ctx, 1);
2728  state_add_transition(ctx, start, NULL, a->start);
2729  state_add_transition(ctx, start, NULL, b->start);
2730 
2731  state_add_transition(ctx, a->end, NULL, end);
2732  state_add_transition(ctx, b->end, NULL, end);
2733 
2734  a->end->accepting = 0;
2735  b->end->accepting = 0;
2736  end->accepting = 1;
2737 
2738  new_nfa = nfa_fragment_create(start, end);
2739  nfa_add_states(new_nfa, a->states_head, a->states_tail);
2740  nfa_add_states(new_nfa, b->states_head, b->states_tail);
2743 
2745 }
2746 
2747 
2754 static void
2755 nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
2756 {
2757  struct REGEX_INTERNAL_Automaton *n;
2758  struct REGEX_INTERNAL_State *start;
2759  struct REGEX_INTERNAL_State *end;
2760 
2761  GNUNET_assert(NULL != ctx);
2762 
2763  start = nfa_state_create(ctx, 0);
2764  end = nfa_state_create(ctx, 1);
2765  state_add_transition(ctx, start, label, end);
2766  n = nfa_fragment_create(start, end);
2767  GNUNET_assert(NULL != n);
2769 }
2770 
2771 
2777 static void
2779 {
2780  if (NULL == ctx)
2781  {
2782  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2783  return;
2784  }
2785  ctx->state_id = 0;
2786  ctx->transition_id = 0;
2787  ctx->stack_head = NULL;
2788  ctx->stack_tail = NULL;
2789 }
2790 
2791 
2800 struct REGEX_INTERNAL_Automaton *
2801 REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
2802 {
2803  struct REGEX_INTERNAL_Context ctx;
2804  struct REGEX_INTERNAL_Automaton *nfa;
2805  const char *regexp;
2806  char curlabel[2];
2807  char *error_msg;
2808  unsigned int count;
2809  unsigned int altcount;
2810  unsigned int atomcount;
2811  unsigned int poff;
2812  unsigned int psize;
2813 
2814  struct {
2815  int altcount;
2816  int atomcount;
2817  } * p;
2818 
2819  if (NULL == regex || 0 == strlen(regex) || 0 == len)
2820  {
2822  "Could not parse regex. Empty regex string provided.\n");
2823 
2824  return NULL;
2825  }
2827 
2828  regexp = regex;
2829  curlabel[1] = '\0';
2830  p = NULL;
2831  error_msg = NULL;
2832  altcount = 0;
2833  atomcount = 0;
2834  poff = 0;
2835  psize = 0;
2836 
2837  for (count = 0; count < len && *regexp; count++, regexp++)
2838  {
2839  switch (*regexp)
2840  {
2841  case '(':
2842  if (atomcount > 1)
2843  {
2844  --atomcount;
2845  nfa_add_concatenation(&ctx);
2846  }
2847  if (poff == psize)
2848  GNUNET_array_grow(p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2849  p[poff].altcount = altcount;
2850  p[poff].atomcount = atomcount;
2851  poff++;
2852  altcount = 0;
2853  atomcount = 0;
2854  break;
2855 
2856  case '|':
2857  if (0 == atomcount)
2858  {
2859  error_msg = "Cannot append '|' to nothing";
2860  goto error;
2861  }
2862  while (--atomcount > 0)
2863  nfa_add_concatenation(&ctx);
2864  altcount++;
2865  break;
2866 
2867  case ')':
2868  if (0 == poff)
2869  {
2870  error_msg = "Missing opening '('";
2871  goto error;
2872  }
2873  if (0 == atomcount)
2874  {
2875  /* Ignore this: "()" */
2876  poff--;
2877  altcount = p[poff].altcount;
2878  atomcount = p[poff].atomcount;
2879  break;
2880  }
2881  while (--atomcount > 0)
2882  nfa_add_concatenation(&ctx);
2883  for (; altcount > 0; altcount--)
2884  nfa_add_alternation(&ctx);
2885  poff--;
2886  altcount = p[poff].altcount;
2887  atomcount = p[poff].atomcount;
2888  atomcount++;
2889  break;
2890 
2891  case '*':
2892  if (atomcount == 0)
2893  {
2894  error_msg = "Cannot append '*' to nothing";
2895  goto error;
2896  }
2897  nfa_add_star_op(&ctx);
2898  break;
2899 
2900  case '+':
2901  if (atomcount == 0)
2902  {
2903  error_msg = "Cannot append '+' to nothing";
2904  goto error;
2905  }
2906  nfa_add_plus_op(&ctx);
2907  break;
2908 
2909  case '?':
2910  if (atomcount == 0)
2911  {
2912  error_msg = "Cannot append '?' to nothing";
2913  goto error;
2914  }
2915  nfa_add_question_op(&ctx);
2916  break;
2917 
2918  default:
2919  if (atomcount > 1)
2920  {
2921  --atomcount;
2922  nfa_add_concatenation(&ctx);
2923  }
2924  curlabel[0] = *regexp;
2925  nfa_add_label(&ctx, curlabel);
2926  atomcount++;
2927  break;
2928  }
2929  }
2930  if (0 != poff)
2931  {
2932  error_msg = "Unbalanced parenthesis";
2933  goto error;
2934  }
2935  while (--atomcount > 0)
2936  nfa_add_concatenation(&ctx);
2937  for (; altcount > 0; altcount--)
2938  nfa_add_alternation(&ctx);
2939 
2940  GNUNET_array_grow(p, psize, 0);
2941 
2942  nfa = ctx.stack_tail;
2944 
2945  if (NULL != ctx.stack_head)
2946  {
2947  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2948  goto error;
2949  }
2950 
2951  /* Remember the regex that was used to generate this NFA */
2952  nfa->regex = GNUNET_strdup(regex);
2953 
2954  /* create depth-first numbering of the states for pretty printing */
2956  NULL,
2957  NULL,
2958  NULL,
2959  &number_states,
2960  NULL);
2961 
2962  /* No multistriding added so far */
2963  nfa->is_multistrided = GNUNET_NO;
2964 
2965  return nfa;
2966 
2967 error:
2968  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2969  if (NULL != error_msg)
2970  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2971 
2973 
2974  while (NULL != (nfa = ctx.stack_head))
2975  {
2978  }
2979 
2980  return NULL;
2981 }
2982 
2983 
2993 static void
2995  struct REGEX_INTERNAL_Automaton *nfa,
2996  struct REGEX_INTERNAL_Automaton *dfa,
2997  struct REGEX_INTERNAL_State *dfa_state)
2998 {
2999  struct REGEX_INTERNAL_Transition *ctran;
3000  struct REGEX_INTERNAL_State *new_dfa_state;
3001  struct REGEX_INTERNAL_State *state_contains;
3002  struct REGEX_INTERNAL_State *state_iter;
3003  struct REGEX_INTERNAL_StateSet tmp;
3004  struct REGEX_INTERNAL_StateSet nfa_set;
3005 
3006  for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3007  {
3008  if (NULL == ctran->label || NULL != ctran->to_state)
3009  continue;
3010 
3011  nfa_closure_set_create(&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3012  nfa_closure_set_create(&nfa_set, nfa, &tmp, NULL);
3013  state_set_clear(&tmp);
3014 
3015  state_contains = NULL;
3016  for (state_iter = dfa->states_head; NULL != state_iter;
3017  state_iter = state_iter->next)
3018  {
3019  if (0 == state_set_compare(&state_iter->nfa_set, &nfa_set))
3020  {
3021  state_contains = state_iter;
3022  break;
3023  }
3024  }
3025  if (NULL == state_contains)
3026  {
3027  new_dfa_state = dfa_state_create(ctx, &nfa_set);
3028  automaton_add_state(dfa, new_dfa_state);
3029  ctran->to_state = new_dfa_state;
3030  construct_dfa_states(ctx, nfa, dfa, new_dfa_state);
3031  }
3032  else
3033  {
3034  ctran->to_state = state_contains;
3035  state_set_clear(&nfa_set);
3036  }
3037  }
3038 }
3039 
3040 
3058 struct REGEX_INTERNAL_Automaton *
3060  const size_t len,
3061  unsigned int max_path_len)
3062 {
3063  struct REGEX_INTERNAL_Context ctx;
3064  struct REGEX_INTERNAL_Automaton *dfa;
3065  struct REGEX_INTERNAL_Automaton *nfa;
3066  struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3067  struct REGEX_INTERNAL_StateSet singleton_set;
3068 
3070 
3071  /* Create NFA */
3072  nfa = REGEX_INTERNAL_construct_nfa(regex, len);
3073 
3074  if (NULL == nfa)
3075  {
3077  "Could not create DFA, because NFA creation failed\n");
3078  return NULL;
3079  }
3080 
3081  dfa = GNUNET_new(struct REGEX_INTERNAL_Automaton);
3082  dfa->type = DFA;
3083  dfa->regex = GNUNET_strdup(regex);
3084 
3085  /* Create DFA start state from epsilon closure */
3086  memset(&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3087  state_set_append(&singleton_set, nfa->start);
3088  nfa_closure_set_create(&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3089  state_set_clear(&singleton_set);
3090  dfa->start = dfa_state_create(&ctx, &nfa_start_eps_cls);
3091  automaton_add_state(dfa, dfa->start);
3092 
3093  construct_dfa_states(&ctx, nfa, dfa, dfa->start);
3095 
3096  /* Minimize DFA */
3097  if (GNUNET_OK != dfa_minimize(&ctx, dfa))
3098  {
3100  return NULL;
3101  }
3102 
3103  /* Create proofs and hashes for all states */
3104  if (GNUNET_OK != automaton_create_proofs(dfa))
3105  {
3107  return NULL;
3108  }
3109 
3110  /* Compress linear DFA paths */
3111  if (1 != max_path_len)
3112  dfa_compress_paths(&ctx, dfa, max_path_len);
3113 
3114  return dfa;
3115 }
3116 
3117 
3124 void
3126 {
3127  struct REGEX_INTERNAL_State *s;
3128  struct REGEX_INTERNAL_State *next_state;
3129 
3130  if (NULL == a)
3131  return;
3132 
3135 
3136  for (s = a->states_head; NULL != s; s = next_state)
3137  {
3138  next_state = s->next;
3141  }
3142 
3143  GNUNET_free(a);
3144 }
3145 
3146 
3155 static int
3156 evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
3157 {
3158  const char *strp;
3159  struct REGEX_INTERNAL_State *s;
3160  unsigned int step_len;
3161 
3162  if (DFA != a->type)
3163  {
3165  "Tried to evaluate DFA, but NFA automaton given");
3166  return -1;
3167  }
3168 
3169  s = a->start;
3170 
3171  /* If the string is empty but the starting state is accepting, we accept. */
3172  if ((NULL == string || 0 == strlen(string)) && s->accepting)
3173  return 0;
3174 
3175  for (strp = string; NULL != strp && *strp; strp += step_len)
3176  {
3177  step_len = dfa_move(&s, strp);
3178 
3179  if (NULL == s)
3180  break;
3181  }
3182 
3183  if (NULL != s && s->accepting)
3184  return 0;
3185 
3186  return 1;
3187 }
3188 
3189 
3197 static int
3198 evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
3199 {
3200  const char *strp;
3201  char str[2];
3202  struct REGEX_INTERNAL_State *s;
3203  struct REGEX_INTERNAL_StateSet sset;
3204  struct REGEX_INTERNAL_StateSet new_sset;
3205  struct REGEX_INTERNAL_StateSet singleton_set;
3206  unsigned int i;
3207  int result;
3208 
3209  if (NFA != a->type)
3210  {
3212  "Tried to evaluate NFA, but DFA automaton given");
3213  return -1;
3214  }
3215 
3216  /* If the string is empty but the starting state is accepting, we accept. */
3217  if ((NULL == string || 0 == strlen(string)) && a->start->accepting)
3218  return 0;
3219 
3220  result = 1;
3221  memset(&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3222  state_set_append(&singleton_set, a->start);
3223  nfa_closure_set_create(&sset, a, &singleton_set, NULL);
3224  state_set_clear(&singleton_set);
3225 
3226  str[1] = '\0';
3227  for (strp = string; NULL != strp && *strp; strp++)
3228  {
3229  str[0] = *strp;
3230  nfa_closure_set_create(&new_sset, a, &sset, str);
3231  state_set_clear(&sset);
3232  nfa_closure_set_create(&sset, a, &new_sset, 0);
3233  state_set_clear(&new_sset);
3234  }
3235 
3236  for (i = 0; i < sset.off; i++)
3237  {
3238  s = sset.states[i];
3239  if ((NULL != s) && (s->accepting))
3240  {
3241  result = 0;
3242  break;
3243  }
3244  }
3245 
3246  state_set_clear(&sset);
3247  return result;
3248 }
3249 
3250 
3258 int
3259 REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
3260 {
3261  int result;
3262 
3263  switch (a->type)
3264  {
3265  case DFA:
3266  result = evaluate_dfa(a, string);
3267  break;
3268 
3269  case NFA:
3270  result = evaluate_nfa(a, string);
3271  break;
3272 
3273  default:
3275  "Evaluating regex failed, automaton has no type!\n");
3276  result = GNUNET_SYSERR;
3277  break;
3278  }
3279 
3280  return result;
3281 }
3282 
3283 
3295 const char *
3297 {
3298  if (NULL == a)
3299  return NULL;
3300 
3301  return a->canonical_regex;
3302 }
3303 
3304 
3312 unsigned int
3314 {
3315  unsigned int t_count;
3316  struct REGEX_INTERNAL_State *s;
3317 
3318  if (NULL == a)
3319  return 0;
3320 
3321  t_count = 0;
3322  for (s = a->states_head; NULL != s; s = s->next)
3323  t_count += s->transition_count;
3324 
3325  return t_count;
3326 }
3327 
3328 
3339 size_t
3340 REGEX_INTERNAL_get_first_key(const char *input_string,
3341  size_t string_len,
3342  struct GNUNET_HashCode *key)
3343 {
3344  size_t size;
3345 
3346  size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3348  if (NULL == input_string)
3349  {
3350  GNUNET_log(GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3351  return 0;
3352  }
3353  GNUNET_CRYPTO_hash(input_string, size, key);
3354 
3355  return size;
3356 }
3357 
3358 
3369 static void
3370 iterate_initial_edge(unsigned int min_len,
3371  unsigned int max_len,
3372  char *consumed_string,
3373  struct REGEX_INTERNAL_State *state,
3375  void *iterator_cls)
3376 {
3377  char *temp;
3378  struct REGEX_INTERNAL_Transition *t;
3379  unsigned int num_edges = state->transition_count;
3380  struct REGEX_BLOCK_Edge edges[num_edges];
3381  struct REGEX_BLOCK_Edge edge[1];
3382  struct GNUNET_HashCode hash;
3383  struct GNUNET_HashCode hash_new;
3384  unsigned int cur_len;
3385 
3386  if (NULL != consumed_string)
3387  cur_len = strlen(consumed_string);
3388  else
3389  cur_len = 0;
3390 
3391  if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3392  (cur_len > 0) && (NULL != consumed_string))
3393  {
3394  if (cur_len <= max_len)
3395  {
3396  if ((NULL != state->proof) &&
3397  (0 != strcmp(consumed_string, state->proof)))
3398  {
3399  (void)state_get_edges(state, edges);
3400  GNUNET_CRYPTO_hash(consumed_string, strlen(consumed_string), &hash);
3402  "Start state for string `%s' is %s\n",
3403  consumed_string,
3404  GNUNET_h2s(&hash));
3405  iterator(iterator_cls,
3406  &hash,
3407  consumed_string,
3408  state->accepting,
3409  num_edges,
3410  edges);
3411  }
3412 
3413  if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3414  (state->transition_count < 1) && (cur_len < max_len))
3415  {
3416  /* Special case for regex consisting of just a string that is shorter than
3417  * max_len */
3418  edge[0].label = &consumed_string[cur_len - 1];
3419  edge[0].destination = state->hash;
3420  temp = GNUNET_strdup(consumed_string);
3421  temp[cur_len - 1] = '\0';
3422  GNUNET_CRYPTO_hash(temp, cur_len - 1, &hash_new);
3424  "Start state for short string `%s' is %s\n",
3425  temp,
3426  GNUNET_h2s(&hash_new));
3427  iterator(iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3428  GNUNET_free(temp);
3429  }
3430  }
3431  else /* cur_len > max_len */
3432  {
3433  /* Case where the concatenated labels are longer than max_len, then split. */
3434  edge[0].label = &consumed_string[max_len];
3435  edge[0].destination = state->hash;
3436  temp = GNUNET_strdup(consumed_string);
3437  temp[max_len] = '\0';
3438  GNUNET_CRYPTO_hash(temp, max_len, &hash);
3440  "Start state at split edge `%s'-`%s` is %s\n",
3441  temp,
3442  edge[0].label,
3443  GNUNET_h2s(&hash_new));
3444  iterator(iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3445  GNUNET_free(temp);
3446  }
3447  }
3448 
3449  if (cur_len < max_len)
3450  {
3451  for (t = state->transitions_head; NULL != t; t = t->next)
3452  {
3453  if (NULL != strchr(t->label, (int)'.'))
3454  {
3455  /* Wildcards not allowed during starting states */
3456  GNUNET_break(0);
3457  continue;
3458  }
3459  if (NULL != consumed_string)
3460  GNUNET_asprintf(&temp, "%s%s", consumed_string, t->label);
3461  else
3462  GNUNET_asprintf(&temp, "%s", t->label);
3463  iterate_initial_edge(min_len,
3464  max_len,
3465  temp,
3466  t->to_state,
3467  iterator,
3468  iterator_cls);
3469  GNUNET_free(temp);
3470  }
3471  }
3472 }
3473 
3474 
3483 void
3486  void *iterator_cls)
3487 {
3488  struct REGEX_INTERNAL_State *s;
3489 
3490  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3493  NULL,
3494  a->start,
3495  iterator,
3496  iterator_cls);
3497  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3498  for (s = a->states_head; NULL != s; s = s->next)
3499  {
3500  struct REGEX_BLOCK_Edge edges[s->transition_count];
3501  unsigned int num_edges;
3502 
3503  num_edges = state_get_edges(s, edges);
3504  if (((NULL != s->proof) && (0 < strlen(s->proof))) || s->accepting)
3505  {
3507  "Creating DFA edges at `%s' under key %s\n",
3508  s->proof,
3509  GNUNET_h2s(&s->hash));
3510  iterator(iterator_cls,
3511  &s->hash,
3512  s->proof,
3513  s->accepting,
3514  num_edges,
3515  edges);
3516  }
3517  s->marked = GNUNET_NO;
3518  }
3519 }
3520 
3521 
3530  char *proof;
3534 };
3535 
3536 
3543 };
3544 
3545 
3557 static void
3559  const struct GNUNET_HashCode *key,
3560  const char *proof,
3561  int accepting,
3562  unsigned int num_edges,
3563  const struct REGEX_BLOCK_Edge *edges)
3564 {
3565  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3566  struct temporal_state_store *tmp;
3567  size_t edges_size;
3568 
3569  tmp = GNUNET_new(struct temporal_state_store);
3570  tmp->reachable = GNUNET_NO;
3571  tmp->proof = GNUNET_strdup(proof);
3572  tmp->accepting = accepting;
3573  tmp->num_edges = num_edges;
3574  edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3575  tmp->edges = GNUNET_malloc(edges_size);
3576  GNUNET_memcpy(tmp->edges, edges, edges_size);
3579  hm,
3580  key,
3581  tmp,
3583 }
3584 
3585 
3594 static void
3596  struct GNUNET_CONTAINER_MultiHashMap *hm)
3597 {
3598  struct temporal_state_store *child;
3599  unsigned int i;
3600 
3601  if (GNUNET_YES == state->reachable)
3602  /* visited */
3603  return;
3604 
3605  state->reachable = GNUNET_YES;
3606  for (i = 0; i < state->num_edges; i++)
3607  {
3608  child =
3610  if (NULL == child)
3611  {
3612  GNUNET_break(0);
3613  continue;
3614  }
3615  mark_as_reachable(child, hm);
3616  }
3617 }
3618 
3619 
3629 static int
3631  const struct GNUNET_HashCode *key,
3632  void *value)
3633 {
3634  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3635  struct temporal_state_store *state = value;
3636 
3637  if (GNUNET_YES == state->reachable)
3638  /* already visited and marked */
3639  return GNUNET_YES;
3640 
3641  if (GNUNET_REGEX_INITIAL_BYTES > strlen(state->proof) &&
3642  GNUNET_NO == state->accepting)
3643  /* not directly reachable */
3644  return GNUNET_YES;
3645 
3646  mark_as_reachable(state, hm);
3647  return GNUNET_YES;
3648 }
3649 
3650 
3661 static int
3662 iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
3663 {
3664  struct client_iterator *ci = cls;
3665  struct temporal_state_store *state = value;
3666 
3667  if (GNUNET_YES == state->reachable)
3668  {
3669  ci->iterator(ci->iterator_cls,
3670  key,
3671  state->proof,
3672  state->accepting,
3673  state->num_edges,
3674  state->edges);
3675  }
3676  GNUNET_free(state->edges);
3677  GNUNET_free(state->proof);
3678  GNUNET_free(state);
3679  return GNUNET_YES;
3680 }
3681 
3692 void
3695  void *iterator_cls)
3696 {
3697  struct GNUNET_CONTAINER_MultiHashMap *hm;
3698  struct client_iterator ci;
3699 
3701  ci.iterator = iterator;
3703 
3707 
3709 }
3710 
3711 
3712 /* end of regex_internal.c */
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
unsigned int state_count
Number of states in the automaton.
static int iterator(void *cls, const struct GNUNET_PeerIdentity *key, void *value)
Iterator over hash map entries.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
struct GNUNET_HashCode destination
Destionation of the edge.
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
int index
Used for SCC detection.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA &#39;a&#39;.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
unsigned int transition_id
Unique transition id.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
struct GNUNET_HashCode hash
Hash of the state.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
int accepting
If this is an accepting state or not.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id&#39;s of the states that are contained in each set...
int GNUNET_snprintf(char *buf, size_t size, const char *format,...)
Like snprintf, just aborts if the buffer is of insufficient size.
unsigned int state_id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length &#39;len&#39;.
struct REGEX_INTERNAL_Automaton * stack_tail
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton &#39;a&#39;, always use this function to alter the states DLL of the automaton...
unsigned int blen
Number of bytes allocated for sbuf.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
int contained
Marking the state as contained.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet &#39;set&#39;.
#define GNUNET_NO
Definition: gnunet_common.h:78
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
Automaton representation.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
static struct GNUNET_SCHEDULER_Task * t
Main task.
static int ret
Final status code.
Definition: gnunet-arm.c:89
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
char * proof
Proof for this state.
int marked
Marking of the state.
Internal representation of the hash map.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state &#39;s&#39; to the next state on transition &#39;str&#39;.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
Edge representation.
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
struct REGEX_INTERNAL_State * states_tail
DLL of states.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct REGEX_INTERNAL_State * head
MDLL of states.
Set of states using MDLL API.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it&#39;s start state, visiting all reachable states and calling &#39;action&#39; on each one of them.
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level &#39;level&#39; that indicates a failure of the command &#39;cmd&#39; with the mess...
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
enum State state
current state of profiling
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
, &#39; bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_...
Store regex iterator and cls in one place to pass to the hashmap iterator.
static char * value
Value of the record to add/remove.
const unsigned int stride
Length of the strides.
library to parse regular expressions into dfa
char * name
Human readable name of the state.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static uint64_t proof
Definition: gnunet-scrypt.c:41
unsigned int transition_count
Number of transitions from this state to other states.
static int state_compare(const void *a, const void *b)
Compare two states.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:44
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:54
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_MAX(a, b)
Definition: gnunet_common.h:82
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA &#39;a&#39;.
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given &#39;dfa&#39;.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
struct REGEX_INTERNAL_State ** states
Array of states.
static int result
Global testing status.
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
Transition between two states.
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
Struct to hold all the relevant state information in the HashMap.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
A 512-bit hashcode.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
struct REGEX_INTERNAL_State * start
First state of the automaton.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
struct GNUNET_HashCode key
The key used in the DHT.
#define GNUNET_SYSERR
Definition: gnunet_common.h:76
static unsigned int size
Size of the "table".
Definition: peer.c:66
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
Context for adding strided transitions to a DFA.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given &#39;nfa&#39; and starting with &#39;dfa_state&#39;.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA &#39;a&#39; by removing all unreachable states, removing all dead states and merging a...
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data structure.
unsigned int off
Number of entries in use in the &#39;states&#39; array.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton &#39;a&#39;.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
static int has_epsilon(const struct StringBuffer *str)
Check if the string &#39;str&#39; starts with an epsilon (empty string).
struct REGEX_BLOCK_Edge * edges
const char * label
Label of the edge.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton &#39;a&#39;.
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
unsigned int id
Unique id of this transition.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string against the given compiled regex a.
static struct GNUNET_OS_Process * child
The child process we spawn.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a &#39;transition&#39; from &#39;state&#39;.
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton &#39;n&#39;.
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
unsigned int dfs_id
Linear state ID accquired by depth-first-search.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given &#39;regex&#39; of length &#39;len&#39;.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
#define GNUNET_log(kind,...)
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
REGEX_INTERNAL_KeyIterator iterator
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state &#39;s&#39;.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state &#39;marked&#39; to GNUNET_YES.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
char * label
Label for this transition.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that get&#39;s passed to automaton traversal and is called before each next traversal from state...
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
#define GNUNET_YES
Definition: gnunet_common.h:77
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton &#39;a&#39; that are reachable from a state with a proof of at least GNUN...
size_t slen
Length of the string in the buffer.
String container for faster string operations.
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:139
common internal definitions for regex library.
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
char * abuf
Allocated buffer.
unsigned int len
Length of the MDLL.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of &#39;ret&#39; to fit &#39;nlen&#39; characters; move the existing string to the beginning of...
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as &#39;action&#39; in &#39;REGEX_INTERNAL_automaton_traverse&#39; function to create the depth-...
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MulitHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
int lowlink
Used for SCC detection.
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA &#39;a&#39;.
unsigned int id
Unique state id.
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare &#39;str1&#39;, starting from position &#39;k&#39;, with whole &#39;str2&#39;.
#define GNUNET_malloc(size)
Wrapper around malloc.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given &#39;dfa&#39;.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex...
#define GNUNET_free(ptr)
Wrapper around free.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
struct REGEX_INTERNAL_Automaton * stack_head
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
struct REGEX_INTERNAL_State * tail
MDLL of states.