GNUnet  0.10.x
regex_internal.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19 */
26 #include "platform.h"
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
31 
32 
37 #define REGEX_DEBUG_DFA GNUNET_NO
38 
43 {
48 
53 
57  unsigned int len;
58 };
59 
60 
67 static void
70 {
71  if (set->off == set->size)
72  GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73  set->states[set->off++] = state;
74 }
75 
76 
85 static int
86 nullstrcmp (const char *str1, const char *str2)
87 {
88  if ((NULL == str1) != (NULL == str2))
89  return -1;
90  if ((NULL == str1) && (NULL == str2))
91  return 0;
92 
93  return strcmp (str1, str2);
94 }
95 
96 
106 static void
108  struct REGEX_INTERNAL_State *from_state,
109  const char *label,
110  struct REGEX_INTERNAL_State *to_state)
111 {
113  struct REGEX_INTERNAL_Transition *oth;
114 
115  if (NULL == from_state)
116  {
117  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118  return;
119  }
120 
121  /* Do not add duplicate state transitions */
122  for (t = from_state->transitions_head; NULL != t; t = t->next)
123  {
124  if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
125  t->from_state == from_state)
126  return;
127  }
128 
129  /* sort transitions by label */
130  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131  {
132  if (0 < nullstrcmp (oth->label, label))
133  break;
134  }
135 
137  if (NULL != ctx)
138  t->id = ctx->transition_id++;
139  if (NULL != label)
140  t->label = GNUNET_strdup (label);
141  else
142  t->label = NULL;
143  t->to_state = to_state;
144  t->from_state = from_state;
145 
146  /* Add outgoing transition to 'from_state' */
147  from_state->transition_count++;
149  from_state->transitions_tail,
150  oth,
151  t);
152 }
153 
154 
161 static void
163  struct REGEX_INTERNAL_Transition *transition)
164 {
165  if (NULL == state || NULL == transition)
166  return;
167 
168  if (transition->from_state != state)
169  return;
170 
171  GNUNET_free_non_null (transition->label);
172 
173  state->transition_count--;
175  state->transitions_tail,
176  transition);
177 
178  GNUNET_free (transition);
179 }
180 
181 
192 static int
193 state_compare (const void *a, const void *b)
194 {
195  struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196  struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197 
198  return (*s1)->id - (*s2)->id;
199 }
200 
201 
211 static unsigned int
213 {
215  unsigned int count;
216 
217  if (NULL == s)
218  return 0;
219 
220  count = 0;
221 
222  for (t = s->transitions_head; NULL != t; t = t->next)
223  {
224  if (NULL != t->to_state)
225  {
226  edges[count].label = t->label;
227  edges[count].destination = t->to_state->hash;
228  count++;
229  }
230  }
231  return count;
232 }
233 
234 
243 static int
245  struct REGEX_INTERNAL_StateSet *sset2)
246 {
247  int result;
248  unsigned int i;
249 
250  if (NULL == sset1 || NULL == sset2)
251  return 1;
252 
253  result = sset1->off - sset2->off;
254  if (result < 0)
255  return -1;
256  if (result > 0)
257  return 1;
258  for (i = 0; i < sset1->off; i++)
259  if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260  break;
261  return result;
262 }
263 
264 
270 static void
272 {
273  GNUNET_array_grow (set->states, set->size, 0);
274  set->off = 0;
275 }
276 
277 
284 static void
286 {
287  if (NULL == a)
288  return;
289 
290  a->start = NULL;
291  a->end = NULL;
292  a->states_head = NULL;
293  a->states_tail = NULL;
294  a->state_count = 0;
295  GNUNET_free (a);
296 }
297 
298 
304 static void
306 {
308  struct REGEX_INTERNAL_Transition *next_t;
309 
310  if (NULL == s)
311  return;
312 
315  state_set_clear (&s->nfa_set);
316  for (t = s->transitions_head; NULL != t; t = next_t)
317  {
318  next_t = t->next;
320  }
321 
322  GNUNET_free (s);
323 }
324 
325 
334 static void
336  struct REGEX_INTERNAL_State *s)
337 {
338  struct REGEX_INTERNAL_State *s_check;
339  struct REGEX_INTERNAL_Transition *t_check;
340  struct REGEX_INTERNAL_Transition *t_check_next;
341 
342  if (NULL == a || NULL == s)
343  return;
344 
345  /* remove all transitions leading to this state */
346  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347  {
348  for (t_check = s_check->transitions_head; NULL != t_check;
349  t_check = t_check_next)
350  {
351  t_check_next = t_check->next;
352  if (t_check->to_state == s)
353  state_remove_transition (s_check, t_check);
354  }
355  }
356 
357  /* remove state */
359  a->state_count--;
360 
362 }
363 
364 
374 static void
376  struct REGEX_INTERNAL_Automaton *a,
377  struct REGEX_INTERNAL_State *s1,
378  struct REGEX_INTERNAL_State *s2)
379 {
380  struct REGEX_INTERNAL_State *s_check;
381  struct REGEX_INTERNAL_Transition *t_check;
383  struct REGEX_INTERNAL_Transition *t_next;
384  int is_dup;
385 
386  if (s1 == s2)
387  return;
388 
389  /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390  * does not already exists, if it already exists remove transition. */
391  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392  {
393  for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394  {
395  t_next = t_check->next;
396 
397  if (s2 == t_check->to_state)
398  {
399  is_dup = GNUNET_NO;
400  for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401  {
402  if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label))
403  is_dup = GNUNET_YES;
404  }
405  if (GNUNET_NO == is_dup)
406  t_check->to_state = s1;
407  else
408  state_remove_transition (t_check->from_state, t_check);
409  }
410  }
411  }
412 
413  /* 2. Add all transitions from s2 to sX to s1 */
414  for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415  {
416  if (t_check->to_state != s1)
417  state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418  }
419 
420  /* 3. Rename s1 to {s1,s2} */
421 #if REGEX_DEBUG_DFA
422  char *new_name;
423 
424  new_name = s1->name;
425  GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426  GNUNET_free (new_name);
427 #endif
428 
429  /* remove state */
431  a->state_count--;
433 }
434 
435 
443 static void
445  struct REGEX_INTERNAL_State *s)
446 {
448  a->state_count++;
449 }
450 
451 
466 static void
468  int *marks,
469  unsigned int *count,
471  void *check_cls,
473  void *action_cls)
474 {
476 
477  if (GNUNET_YES == marks[s->traversal_id])
478  return;
479 
480  marks[s->traversal_id] = GNUNET_YES;
481 
482  if (NULL != action)
483  action (action_cls, *count, s);
484 
485  (*count)++;
486 
487  for (t = s->transitions_head; NULL != t; t = t->next)
488  {
489  if (NULL == check ||
490  (NULL != check && GNUNET_YES == check (check_cls, s, t)))
491  {
493  marks,
494  count,
495  check,
496  check_cls,
497  action,
498  action_cls);
499  }
500  }
501 }
502 
503 
517 void
519  struct REGEX_INTERNAL_State *start,
521  void *check_cls,
523  void *action_cls)
524 {
525  unsigned int count;
526  struct REGEX_INTERNAL_State *s;
527 
528  if (NULL == a || 0 == a->state_count)
529  return;
530 
531  int marks[a->state_count];
532 
533  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
534  s = s->next, count++)
535  {
536  s->traversal_id = count;
537  marks[s->traversal_id] = GNUNET_NO;
538  }
539 
540  count = 0;
541 
542  if (NULL == start)
543  s = a->start;
544  else
545  s = start;
546 
548  marks,
549  &count,
550  check,
551  check_cls,
552  action,
553  action_cls);
554 }
555 
556 
561 {
566  char *sbuf;
567 
571  char *abuf;
572 
576  size_t slen;
577 
581  unsigned int blen;
582 
586  int16_t null_flag;
587 
597  int16_t synced;
598 };
599 
600 
609 static int
610 sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
611 {
612  if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
613  return 0;
614  if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
615  return -1;
616  if (s1->slen != s2->slen)
617  return -1;
618  if (0 == s1->slen)
619  return 0;
620  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
621 }
622 
623 
632 static int
633 sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
634 {
635  if (s1->slen != s2->slen)
636  return -1;
637  if (0 == s1->slen)
638  return 0;
639  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
640 }
641 
642 
650 static void
651 sb_realloc (struct StringBuffer *ret, size_t nlen)
652 {
653  char *old;
654 
655  GNUNET_assert (nlen >= ret->slen);
656  old = ret->abuf;
657  ret->abuf = GNUNET_malloc (nlen);
658  ret->blen = nlen;
659  GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
660  ret->sbuf = ret->abuf;
661  GNUNET_free_non_null (old);
662 }
663 
664 
671 static void
672 sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
673 {
674  if (GNUNET_YES == ret->null_flag)
675  ret->slen = 0;
676  ret->null_flag = GNUNET_NO;
677  if (ret->blen < sarg->slen + ret->slen)
678  sb_realloc (ret, ret->blen + sarg->slen + 128);
679  GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
680  ret->slen += sarg->slen;
681 }
682 
683 
690 static void
691 sb_append_cstr (struct StringBuffer *ret, const char *cstr)
692 {
693  size_t cstr_len = strlen (cstr);
694 
695  if (GNUNET_YES == ret->null_flag)
696  ret->slen = 0;
697  ret->null_flag = GNUNET_NO;
698  if (ret->blen < cstr_len + ret->slen)
699  sb_realloc (ret, ret->blen + cstr_len + 128);
700  GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
701  ret->slen += cstr_len;
702 }
703 
704 
715 static void
716 sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
717 {
718  char *temp;
719 
720  if (GNUNET_YES == ret->null_flag)
721  ret->slen = 0;
722  ret->null_flag = GNUNET_NO;
723  temp = GNUNET_malloc (ret->slen + extra_chars + 1);
724  GNUNET_snprintf (temp,
725  ret->slen + extra_chars + 1,
726  format,
727  (int) ret->slen,
728  ret->sbuf);
729  GNUNET_free_non_null (ret->abuf);
730  ret->abuf = temp;
731  ret->sbuf = temp;
732  ret->blen = ret->slen + extra_chars + 1;
733  ret->slen = ret->slen + extra_chars;
734 }
735 
736 
746 static void
748  const char *format,
749  size_t extra_chars,
750  const struct StringBuffer *sarg)
751 {
752  if (ret->blen < sarg->slen + extra_chars + 1)
753  sb_realloc (ret, sarg->slen + extra_chars + 1);
754  ret->null_flag = GNUNET_NO;
755  ret->sbuf = ret->abuf;
756  ret->slen = sarg->slen + extra_chars;
757  GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
758 }
759 
760 
770 static void
772  const char *format,
773  size_t extra_chars,
774  const struct StringBuffer *sarg1,
775  const struct StringBuffer *sarg2)
776 {
777  if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
778  sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
779  ret->null_flag = GNUNET_NO;
780  ret->slen = sarg1->slen + sarg2->slen + extra_chars;
781  ret->sbuf = ret->abuf;
782  GNUNET_snprintf (ret->sbuf,
783  ret->blen,
784  format,
785  (int) sarg1->slen,
786  sarg1->sbuf,
787  (int) sarg2->slen,
788  sarg2->sbuf);
789 }
790 
791 
803 static void
805  const char *format,
806  size_t extra_chars,
807  const struct StringBuffer *sarg1,
808  const struct StringBuffer *sarg2,
809  const struct StringBuffer *sarg3)
810 {
811  if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
812  sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
813  ret->null_flag = GNUNET_NO;
814  ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
815  ret->sbuf = ret->abuf;
816  GNUNET_snprintf (ret->sbuf,
817  ret->blen,
818  format,
819  (int) sarg1->slen,
820  sarg1->sbuf,
821  (int) sarg2->slen,
822  sarg2->sbuf,
823  (int) sarg3->slen,
824  sarg3->sbuf);
825 }
826 
827 
834 static void
835 sb_free (struct StringBuffer *sb)
836 {
837  GNUNET_array_grow (sb->abuf, sb->blen, 0);
838  sb->slen = 0;
839  sb->sbuf = NULL;
840  sb->null_flag = GNUNET_YES;
841 }
842 
843 
850 static void
851 sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
852 
853 {
854  out->null_flag = in->null_flag;
855  if (GNUNET_YES == out->null_flag)
856  return;
857  if (out->blen < in->slen)
858  {
859  GNUNET_array_grow (out->abuf, out->blen, in->slen);
860  }
861  out->sbuf = out->abuf;
862  out->slen = in->slen;
863  GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
864 }
865 
866 
873 static void
874 sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
875 {
876  if (NULL == cstr)
877  {
878  out->null_flag = GNUNET_YES;
879  return;
880  }
881  out->null_flag = GNUNET_NO;
882  out->slen = strlen (cstr);
883  if (out->blen < out->slen)
884  {
885  GNUNET_array_grow (out->abuf, out->blen, out->slen);
886  }
887  out->sbuf = out->abuf;
888  GNUNET_memcpy (out->sbuf, cstr, out->slen);
889 }
890 
891 
900 static int
901 needs_parentheses (const struct StringBuffer *str)
902 {
903  size_t slen;
904  const char *op;
905  const char *cl;
906  const char *pos;
907  const char *end;
908  unsigned int cnt;
909 
910  if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
911  return GNUNET_NO;
912  pos = str->sbuf;
913  if ('(' != pos[0])
914  return GNUNET_YES;
915  end = str->sbuf + slen;
916  cnt = 1;
917  pos++;
918  while (cnt > 0)
919  {
920  cl = memchr (pos, ')', end - pos);
921  if (NULL == cl)
922  {
923  GNUNET_break (0);
924  return GNUNET_YES;
925  }
926  /* while '(' before ')', count opening parens */
927  while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
928  {
929  cnt++;
930  pos = op + 1;
931  }
932  /* got ')' first */
933  cnt--;
934  pos = cl + 1;
935  }
936  return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
937 }
938 
939 
949 static void
951 {
952  size_t slen;
953  const char *pos;
954  const char *end;
955  const char *sbuf;
956  const char *op;
957  const char *cp;
958  unsigned int cnt;
959 
960  if (0)
961  return;
962  sbuf = str->sbuf;
963  if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
964  ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
965  return;
966  cnt = 0;
967  pos = &sbuf[1];
968  end = &sbuf[slen - 1];
969  op = memchr (pos, '(', end - pos);
970  cp = memchr (pos, ')', end - pos);
971  while (NULL != cp)
972  {
973  while ((NULL != op) && (op < cp))
974  {
975  cnt++;
976  pos = op + 1;
977  op = memchr (pos, '(', end - pos);
978  }
979  while ((NULL != cp) && ((NULL == op) || (cp < op)))
980  {
981  if (0 == cnt)
982  return; /* can't strip parens */
983  cnt--;
984  pos = cp + 1;
985  cp = memchr (pos, ')', end - pos);
986  }
987  }
988  if (0 != cnt)
989  {
990  GNUNET_break (0);
991  return;
992  }
993  str->sbuf++;
994  str->slen -= 2;
995 }
996 
997 
1006 static int
1007 has_epsilon (const struct StringBuffer *str)
1008 {
1009  return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
1010  ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1011  (')' == str->sbuf[str->slen - 1]);
1012 }
1013 
1014 
1024 static void
1025 remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1026 {
1027  if (GNUNET_YES == str->null_flag)
1028  {
1029  ret->null_flag = GNUNET_YES;
1030  return;
1031  }
1032  if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1033  (')' == str->sbuf[str->slen - 1]))
1034  {
1035  /* remove epsilon */
1036  if (ret->blen < str->slen - 3)
1037  {
1038  GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1039  }
1040  ret->sbuf = ret->abuf;
1041  ret->slen = str->slen - 3;
1042  GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1043  return;
1044  }
1045  sb_strdup (ret, str);
1046 }
1047 
1048 
1058 static int
1059 sb_strncmp (const struct StringBuffer *str1,
1060  const struct StringBuffer *str2,
1061  size_t n)
1062 {
1063  size_t max;
1064 
1065  if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1066  return -1;
1067  max = GNUNET_MAX (str1->slen, str2->slen);
1068  if (max > n)
1069  max = n;
1070  return memcmp (str1->sbuf, str2->sbuf, max);
1071 }
1072 
1073 
1083 static int
1084 sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1085 {
1086  if (str1->slen < n)
1087  return -1;
1088  return memcmp (str1->sbuf, str2, n);
1089 }
1090 
1091 
1099 static void
1100 sb_init (struct StringBuffer *sb, size_t n)
1101 {
1102  sb->null_flag = GNUNET_NO;
1103  sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1104  sb->blen = n;
1105  sb->slen = 0;
1106 }
1107 
1108 
1118 static int
1119 sb_strkcmp (const struct StringBuffer *str1,
1120  const struct StringBuffer *str2,
1121  size_t k)
1122 {
1123  if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1124  (k > str1->slen) || (str1->slen - k != str2->slen))
1125  return -1;
1126  return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1127 }
1128 
1129 
1138 static void
1139 number_states (void *cls,
1140  const unsigned int count,
1141  struct REGEX_INTERNAL_State *s)
1142 {
1143  struct REGEX_INTERNAL_State **states = cls;
1144 
1145  s->dfs_id = count;
1146  if (NULL != states)
1147  states[count] = s;
1148 }
1149 
1150 
1151 #define PRIS(a) \
1152  ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1153  ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1154 
1155 
1170 static void
1172  const struct StringBuffer *R_last_ik,
1173  const struct StringBuffer *R_last_kk,
1174  const struct StringBuffer *R_last_kj,
1175  struct StringBuffer *R_cur_ij,
1176  struct StringBuffer *R_cur_l,
1177  struct StringBuffer *R_cur_r)
1178 {
1179  struct StringBuffer R_temp_ij;
1180  struct StringBuffer R_temp_ik;
1181  struct StringBuffer R_temp_kj;
1182  struct StringBuffer R_temp_kk;
1183  int eps_check;
1184  int ij_ik_cmp;
1185  int ij_kj_cmp;
1186  int ik_kk_cmp;
1187  int kk_kj_cmp;
1188  int clean_ik_kk_cmp;
1189  int clean_kk_kj_cmp;
1190  size_t length;
1191  size_t length_l;
1192  size_t length_r;
1193 
1194  /*
1195  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1196  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1197  * R_cur_ij = R_cur_l | R_cur_r
1198  * R_cur_l == R^{(k-1)}_{ij}
1199  * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1200  */
1201 
1202  if ((GNUNET_YES == R_last_ij->null_flag) &&
1203  ((GNUNET_YES == R_last_ik->null_flag) ||
1204  (GNUNET_YES == R_last_kj->null_flag)))
1205  {
1206  /* R^{(k)}_{ij} = N | N */
1207  R_cur_ij->null_flag = GNUNET_YES;
1208  R_cur_ij->synced = GNUNET_NO;
1209  return;
1210  }
1211 
1212  if ((GNUNET_YES == R_last_ik->null_flag) ||
1213  (GNUNET_YES == R_last_kj->null_flag))
1214  {
1215  /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1216  if (GNUNET_YES == R_last_ij->synced)
1217  {
1218  R_cur_ij->synced = GNUNET_YES;
1219  R_cur_ij->null_flag = GNUNET_NO;
1220  return;
1221  }
1222  R_cur_ij->synced = GNUNET_YES;
1223  sb_strdup (R_cur_ij, R_last_ij);
1224  return;
1225  }
1226  R_cur_ij->synced = GNUNET_NO;
1227 
1228  /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1229  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1230 
1231  R_cur_r->null_flag = GNUNET_YES;
1232  R_cur_r->slen = 0;
1233  R_cur_l->null_flag = GNUNET_YES;
1234  R_cur_l->slen = 0;
1235 
1236  /* cache results from strcmp, we might need these many times */
1237  ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1238  ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1239  ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1240  kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1241 
1242  /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1243  * as parentheses, so we can better compare the contents */
1244 
1245  memset (&R_temp_ij, 0, sizeof (struct StringBuffer));
1246  memset (&R_temp_ik, 0, sizeof (struct StringBuffer));
1247  memset (&R_temp_kk, 0, sizeof (struct StringBuffer));
1248  memset (&R_temp_kj, 0, sizeof (struct StringBuffer));
1249  remove_epsilon (R_last_ik, &R_temp_ik);
1250  remove_epsilon (R_last_kk, &R_temp_kk);
1251  remove_epsilon (R_last_kj, &R_temp_kj);
1252  remove_parentheses (&R_temp_ik);
1253  remove_parentheses (&R_temp_kk);
1254  remove_parentheses (&R_temp_kj);
1255  clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1256  clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1257 
1258  /* construct R_cur_l (and, if necessary R_cur_r) */
1259  if (GNUNET_YES != R_last_ij->null_flag)
1260  {
1261  /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1262  * as parentheses, so we can better compare the contents */
1263  remove_epsilon (R_last_ij, &R_temp_ij);
1264  remove_parentheses (&R_temp_ij);
1265 
1266  if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1267  (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1268  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1269  {
1270  if (0 == R_temp_ij.slen)
1271  {
1272  R_cur_r->null_flag = GNUNET_NO;
1273  }
1274  else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1275  (0 == sb_strncmp_cstr (R_last_ik, "(|", 2) &&
1276  0 == sb_strncmp_cstr (R_last_kj, "(|", 2)))
1277  {
1278  /*
1279  * a|(e|a)a*(e|a) = a*
1280  * a|(e|a)(e|a)*(e|a) = a*
1281  * (e|a)|aa*a = a*
1282  * (e|a)|aa*(e|a) = a*
1283  * (e|a)|(e|a)a*a = a*
1284  * (e|a)|(e|a)a*(e|a) = a*
1285  * (e|a)|(e|a)(e|a)*(e|a) = a*
1286  */
1287  if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1288  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1289  else
1290  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1291  }
1292  else
1293  {
1294  /*
1295  * a|aa*a = a+
1296  * a|(e|a)a*a = a+
1297  * a|aa*(e|a) = a+
1298  * a|(e|a)(e|a)*a = a+
1299  * a|a(e|a)*(e|a) = a+
1300  */
1301  if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1302  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1303  else
1304  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1305  }
1306  }
1307  else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1308  (0 != clean_ik_kk_cmp))
1309  {
1310  /* a|ab*b = ab* */
1311  if (0 == R_last_kk->slen)
1312  sb_strdup (R_cur_r, R_last_ij);
1313  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1314  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1315  else
1316  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1317  R_cur_l->null_flag = GNUNET_YES;
1318  }
1319  else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1320  (0 != clean_kk_kj_cmp))
1321  {
1322  /* a|bb*a = b*a */
1323  if (R_last_kk->slen < 1)
1324  {
1325  sb_strdup (R_cur_r, R_last_kj);
1326  }
1327  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1328  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1329  else
1330  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1331 
1332  R_cur_l->null_flag = GNUNET_YES;
1333  }
1334  else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1335  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1336  {
1337  /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1338  if (needs_parentheses (&R_temp_kk))
1339  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1340  else
1341  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1342  R_cur_l->null_flag = GNUNET_YES;
1343  }
1344  else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1345  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1346  {
1347  /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1348  if (needs_parentheses (&R_temp_kk))
1349  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1350  else
1351  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1352  R_cur_l->null_flag = GNUNET_YES;
1353  }
1354  else
1355  {
1356  sb_strdup (R_cur_l, R_last_ij);
1357  remove_parentheses (R_cur_l);
1358  }
1359  }
1360  else
1361  {
1362  /* we have no left side */
1363  R_cur_l->null_flag = GNUNET_YES;
1364  }
1365 
1366  /* construct R_cur_r, if not already constructed */
1367  if (GNUNET_YES == R_cur_r->null_flag)
1368  {
1369  length = R_temp_kk.slen - R_last_ik->slen;
1370 
1371  /* a(ba)*bx = (ab)+x */
1372  if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1373  (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1374  (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1375  (0 < R_last_ik->slen) &&
1376  (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1377  (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1378  {
1379  struct StringBuffer temp_a;
1380  struct StringBuffer temp_b;
1381 
1382  sb_init (&temp_a, length);
1383  sb_init (&temp_b, R_last_kj->slen - length);
1384 
1385  length_l = length;
1386  temp_a.sbuf = temp_a.abuf;
1387  GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1388  temp_a.slen = length_l;
1389 
1390  length_r = R_last_kj->slen - length;
1391  temp_b.sbuf = temp_b.abuf;
1392  GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1393  temp_b.slen = length_r;
1394 
1395  /* e|(ab)+ = (ab)* */
1396  if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1397  (0 == temp_b.slen))
1398  {
1399  sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1400  sb_free (R_cur_l);
1401  R_cur_l->null_flag = GNUNET_YES;
1402  }
1403  else
1404  {
1405  sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1406  }
1407  sb_free (&temp_a);
1408  sb_free (&temp_b);
1409  }
1410  else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk) &&
1411  0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1412  {
1413  /*
1414  * (e|a)a*(e|a) = a*
1415  * (e|a)(e|a)*(e|a) = a*
1416  */
1417  if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1418  {
1419  if (needs_parentheses (&R_temp_kk))
1420  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1421  else
1422  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1423  }
1424  /* aa*a = a+a */
1425  else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1426  (! has_epsilon (R_last_ik)))
1427  {
1428  if (needs_parentheses (&R_temp_kk))
1429  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1430  else
1431  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1432  }
1433  /*
1434  * (e|a)a*a = a+
1435  * aa*(e|a) = a+
1436  * a(e|a)*(e|a) = a+
1437  * (e|a)a*a = a+
1438  */
1439  else
1440  {
1441  eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
1442  has_epsilon (R_last_kj));
1443 
1444  if (1 == eps_check)
1445  {
1446  if (needs_parentheses (&R_temp_kk))
1447  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1448  else
1449  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1450  }
1451  }
1452  }
1453  /*
1454  * aa*b = a+b
1455  * (e|a)(e|a)*b = a*b
1456  */
1457  else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1458  {
1459  if (has_epsilon (R_last_ik))
1460  {
1461  if (needs_parentheses (&R_temp_kk))
1462  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1463  else
1464  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1465  }
1466  else
1467  {
1468  if (needs_parentheses (&R_temp_kk))
1469  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1470  else
1471  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1472  }
1473  }
1474  /*
1475  * ba*a = ba+
1476  * b(e|a)*(e|a) = ba*
1477  */
1478  else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1479  {
1480  if (has_epsilon (R_last_kj))
1481  {
1482  if (needs_parentheses (&R_temp_kk))
1483  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1484  else
1485  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1486  }
1487  else
1488  {
1489  if (needs_parentheses (&R_temp_kk))
1490  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1491  else
1492  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1493  }
1494  }
1495  else
1496  {
1497  if (0 < R_temp_kk.slen)
1498  {
1499  if (needs_parentheses (&R_temp_kk))
1500  {
1501  sb_printf3 (R_cur_r,
1502  "%.*s(%.*s)*%.*s",
1503  3,
1504  R_last_ik,
1505  &R_temp_kk,
1506  R_last_kj);
1507  }
1508  else
1509  {
1510  sb_printf3 (R_cur_r,
1511  "%.*s%.*s*%.*s",
1512  1,
1513  R_last_ik,
1514  &R_temp_kk,
1515  R_last_kj);
1516  }
1517  }
1518  else
1519  {
1520  sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1521  }
1522  }
1523  }
1524  sb_free (&R_temp_ij);
1525  sb_free (&R_temp_ik);
1526  sb_free (&R_temp_kk);
1527  sb_free (&R_temp_kj);
1528 
1529  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1530  {
1531  R_cur_ij->null_flag = GNUNET_YES;
1532  return;
1533  }
1534 
1535  if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1536  {
1537  struct StringBuffer tmp;
1538 
1539  tmp = *R_cur_ij;
1540  *R_cur_ij = *R_cur_l;
1541  *R_cur_l = tmp;
1542  return;
1543  }
1544 
1545  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1546  {
1547  struct StringBuffer tmp;
1548 
1549  tmp = *R_cur_ij;
1550  *R_cur_ij = *R_cur_r;
1551  *R_cur_r = tmp;
1552  return;
1553  }
1554 
1555  if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1556  {
1557  struct StringBuffer tmp;
1558 
1559  tmp = *R_cur_ij;
1560  *R_cur_ij = *R_cur_l;
1561  *R_cur_l = tmp;
1562  return;
1563  }
1564  sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1565 }
1566 
1567 
1579 static int
1581 {
1582  unsigned int n = a->state_count;
1583  struct REGEX_INTERNAL_State *states[n];
1584  struct StringBuffer *R_last;
1585  struct StringBuffer *R_cur;
1586  struct StringBuffer R_cur_r;
1587  struct StringBuffer R_cur_l;
1588  struct StringBuffer *R_swap;
1589  struct REGEX_INTERNAL_Transition *t;
1590  struct StringBuffer complete_regex;
1591  unsigned int i;
1592  unsigned int j;
1593  unsigned int k;
1594 
1595  R_last = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1596  R_cur = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1597  if ((NULL == R_last) || (NULL == R_cur))
1598  {
1600  GNUNET_free_non_null (R_cur);
1601  GNUNET_free_non_null (R_last);
1602  return GNUNET_SYSERR;
1603  }
1604 
1605  /* create depth-first numbering of the states, initializes 'state' */
1607  a->start,
1608  NULL,
1609  NULL,
1610  &number_states,
1611  states);
1612 
1613  for (i = 0; i < n; i++)
1614  GNUNET_assert (NULL != states[i]);
1615  for (i = 0; i < n; i++)
1616  for (j = 0; j < n; j++)
1617  R_last[i * n + j].null_flag = GNUNET_YES;
1618 
1619  /* Compute regular expressions of length "1" between each pair of states */
1620  for (i = 0; i < n; i++)
1621  {
1622  for (t = states[i]->transitions_head; NULL != t; t = t->next)
1623  {
1624  j = t->to_state->dfs_id;
1625  if (GNUNET_YES == R_last[i * n + j].null_flag)
1626  {
1627  sb_strdup_cstr (&R_last[i * n + j], t->label);
1628  }
1629  else
1630  {
1631  sb_append_cstr (&R_last[i * n + j], "|");
1632  sb_append_cstr (&R_last[i * n + j], t->label);
1633  }
1634  }
1635  /* add self-loop: i is reachable from i via epsilon-transition */
1636  if (GNUNET_YES == R_last[i * n + i].null_flag)
1637  {
1638  R_last[i * n + i].slen = 0;
1639  R_last[i * n + i].null_flag = GNUNET_NO;
1640  }
1641  else
1642  {
1643  sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1644  }
1645  }
1646  for (i = 0; i < n; i++)
1647  for (j = 0; j < n; j++)
1648  if (needs_parentheses (&R_last[i * n + j]))
1649  sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1650  /* Compute regular expressions of length "k" between each pair of states per
1651  * induction */
1652  memset (&R_cur_l, 0, sizeof (struct StringBuffer));
1653  memset (&R_cur_r, 0, sizeof (struct StringBuffer));
1654  for (k = 0; k < n; k++)
1655  {
1656  for (i = 0; i < n; i++)
1657  {
1658  for (j = 0; j < n; j++)
1659  {
1660  /* Basis for the recursion:
1661  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1662  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1663  */
1664 
1665  /* Create R_cur[i][j] and simplify the expression */
1666  automaton_create_proofs_simplify (&R_last[i * n + j],
1667  &R_last[i * n + k],
1668  &R_last[k * n + k],
1669  &R_last[k * n + j],
1670  &R_cur[i * n + j],
1671  &R_cur_l,
1672  &R_cur_r);
1673  }
1674  }
1675  /* set R_last = R_cur */
1676  R_swap = R_last;
1677  R_last = R_cur;
1678  R_cur = R_swap;
1679  /* clear 'R_cur' for next iteration */
1680  for (i = 0; i < n; i++)
1681  for (j = 0; j < n; j++)
1682  R_cur[i * n + j].null_flag = GNUNET_YES;
1683  }
1684  sb_free (&R_cur_l);
1685  sb_free (&R_cur_r);
1686  /* assign proofs and hashes */
1687  for (i = 0; i < n; i++)
1688  {
1689  if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1690  {
1691  states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1692  R_last[a->start->dfs_id * n + i].slen);
1693  GNUNET_CRYPTO_hash (states[i]->proof,
1694  strlen (states[i]->proof),
1695  &states[i]->hash);
1696  }
1697  }
1698 
1699  /* complete regex for whole DFA: union of all pairs (start state/accepting
1700  * state(s)). */
1701  sb_init (&complete_regex, 16 * n);
1702  for (i = 0; i < n; i++)
1703  {
1704  if (states[i]->accepting)
1705  {
1706  if ((0 == complete_regex.slen) &&
1707  (0 < R_last[a->start->dfs_id * n + i].slen))
1708  {
1709  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1710  }
1711  else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1712  (0 < R_last[a->start->dfs_id * n + i].slen))
1713  {
1714  sb_append_cstr (&complete_regex, "|");
1715  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1716  }
1717  }
1718  }
1719  a->canonical_regex =
1720  GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1721 
1722  /* cleanup */
1723  sb_free (&complete_regex);
1724  for (i = 0; i < n; i++)
1725  for (j = 0; j < n; j++)
1726  {
1727  sb_free (&R_cur[i * n + j]);
1728  sb_free (&R_last[i * n + j]);
1729  }
1730  GNUNET_free (R_cur);
1731  GNUNET_free (R_last);
1732  return GNUNET_OK;
1733 }
1734 
1735 
1745 static struct REGEX_INTERNAL_State *
1747  struct REGEX_INTERNAL_StateSet *nfa_states)
1748 {
1749  struct REGEX_INTERNAL_State *s;
1750  char *pos;
1751  size_t len;
1752  struct REGEX_INTERNAL_State *cstate;
1753  struct REGEX_INTERNAL_Transition *ctran;
1754  unsigned int i;
1755 
1756  s = GNUNET_new (struct REGEX_INTERNAL_State);
1757  s->id = ctx->state_id++;
1758  s->index = -1;
1759  s->lowlink = -1;
1760 
1761  if (NULL == nfa_states)
1762  {
1763  GNUNET_asprintf (&s->name, "s%i", s->id);
1764  return s;
1765  }
1766 
1767  s->nfa_set = *nfa_states;
1768 
1769  if (nfa_states->off < 1)
1770  return s;
1771 
1772  /* Create a name based on 'nfa_states' */
1773  len = nfa_states->off * 14 + 4;
1774  s->name = GNUNET_malloc (len);
1775  strcat (s->name, "{");
1776  pos = s->name + 1;
1777 
1778  for (i = 0; i < nfa_states->off; i++)
1779  {
1780  cstate = nfa_states->states[i];
1781  GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1782  pos += strlen (pos);
1783 
1784  /* Add a transition for each distinct label to NULL state */
1785  for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1786  if (NULL != ctran->label)
1787  state_add_transition (ctx, s, ctran->label, NULL);
1788 
1789  /* If the nfa_states contain an accepting state, the new dfa state is also
1790  * accepting. */
1791  if (cstate->accepting)
1792  s->accepting = 1;
1793  }
1794  pos[-1] = '}';
1795  s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1796 
1797  memset (nfa_states, 0, sizeof (struct REGEX_INTERNAL_StateSet));
1798  return s;
1799 }
1800 
1801 
1815 static unsigned int
1816 dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1817 {
1818  struct REGEX_INTERNAL_Transition *t;
1819  struct REGEX_INTERNAL_State *new_s;
1820  unsigned int len;
1821  unsigned int max_len;
1822 
1823  if (NULL == s)
1824  return 0;
1825 
1826  new_s = NULL;
1827  max_len = 0;
1828  for (t = (*s)->transitions_head; NULL != t; t = t->next)
1829  {
1830  len = strlen (t->label);
1831 
1832  if (0 == strncmp (t->label, str, len))
1833  {
1834  if (len >= max_len)
1835  {
1836  max_len = len;
1837  new_s = t->to_state;
1838  }
1839  }
1840  }
1841 
1842  *s = new_s;
1843  return max_len;
1844 }
1845 
1846 
1856 static void
1857 mark_states (void *cls,
1858  const unsigned int count,
1859  struct REGEX_INTERNAL_State *s)
1860 {
1861  s->marked = GNUNET_YES;
1862 }
1863 
1864 
1871 static void
1873 {
1874  struct REGEX_INTERNAL_State *s;
1875  struct REGEX_INTERNAL_State *s_next;
1876 
1877  /* 1. unmark all states */
1878  for (s = a->states_head; NULL != s; s = s->next)
1879  s->marked = GNUNET_NO;
1880 
1881  /* 2. traverse dfa from start state and mark all visited states */
1883  a->start,
1884  NULL,
1885  NULL,
1886  &mark_states,
1887  NULL);
1888 
1889  /* 3. delete all states that were not visited */
1890  for (s = a->states_head; NULL != s; s = s_next)
1891  {
1892  s_next = s->next;
1893  if (GNUNET_NO == s->marked)
1894  automaton_remove_state (a, s);
1895  }
1896 }
1897 
1898 
1905 static void
1907 {
1908  struct REGEX_INTERNAL_State *s;
1909  struct REGEX_INTERNAL_State *s_next;
1910  struct REGEX_INTERNAL_Transition *t;
1911  int dead;
1912 
1913  GNUNET_assert (DFA == a->type);
1914 
1915  for (s = a->states_head; NULL != s; s = s_next)
1916  {
1917  s_next = s->next;
1918 
1919  if (s->accepting)
1920  continue;
1921 
1922  dead = 1;
1923  for (t = s->transitions_head; NULL != t; t = t->next)
1924  {
1925  if (NULL != t->to_state && t->to_state != s)
1926  {
1927  dead = 0;
1928  break;
1929  }
1930  }
1931 
1932  if (0 == dead)
1933  continue;
1934 
1935  /* state s is dead, remove it */
1936  automaton_remove_state (a, s);
1937  }
1938 }
1939 
1940 
1948 static int
1950  struct REGEX_INTERNAL_Automaton *a)
1951 {
1952  uint32_t *table;
1953  struct REGEX_INTERNAL_State *s1;
1954  struct REGEX_INTERNAL_State *s2;
1955  struct REGEX_INTERNAL_Transition *t1;
1956  struct REGEX_INTERNAL_Transition *t2;
1957  struct REGEX_INTERNAL_State *s1_next;
1958  struct REGEX_INTERNAL_State *s2_next;
1959  int change;
1960  unsigned int num_equal_edges;
1961  unsigned int i;
1962  unsigned int state_cnt;
1963  unsigned long long idx;
1964  unsigned long long idx1;
1965 
1966  if ((NULL == a) || (0 == a->state_count))
1967  {
1969  "Could not merge nondistinguishable states, automaton was NULL.\n");
1970  return GNUNET_SYSERR;
1971  }
1972 
1973  state_cnt = a->state_count;
1974  table = GNUNET_malloc_large (
1975  (sizeof (uint32_t) * state_cnt * state_cnt / 32) + sizeof (uint32_t));
1976  if (NULL == table)
1977  {
1979  return GNUNET_SYSERR;
1980  }
1981 
1982  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1983  s1->marked = i++;
1984 
1985  /* Mark all pairs of accepting/!accepting states */
1986  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1987  for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1988  if ((s1->accepting && ! s2->accepting) ||
1989  (! s1->accepting && s2->accepting))
1990  {
1991  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1992  table[idx / 32] |= (1U << (idx % 32));
1993  }
1994 
1995  /* Find all equal states */
1996  change = 1;
1997  while (0 != change)
1998  {
1999  change = 0;
2000  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
2001  {
2002  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
2003  {
2004  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2005  if (0 != (table[idx / 32] & (1U << (idx % 32))))
2006  continue;
2007  num_equal_edges = 0;
2008  for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2009  {
2010  for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2011  {
2012  if (0 == strcmp (t1->label, t2->label))
2013  {
2014  num_equal_edges++;
2015  /* same edge, but targets definitively different, so we're different
2016  as well */
2017  if (t1->to_state->marked > t2->to_state->marked)
2018  idx1 = (unsigned long long) t1->to_state->marked * state_cnt +
2019  t2->to_state->marked;
2020  else
2021  idx1 = (unsigned long long) t2->to_state->marked * state_cnt +
2022  t1->to_state->marked;
2023  if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2024  {
2025  table[idx / 32] |= (1U << (idx % 32));
2026  change = 1; /* changed a marker, need to run again */
2027  }
2028  }
2029  }
2030  }
2031  if ((num_equal_edges != s1->transition_count) ||
2032  (num_equal_edges != s2->transition_count))
2033  {
2034  /* Make sure ALL edges of possible equal states are the same */
2035  table[idx / 32] |= (1U << (idx % 32));
2036  change = 1; /* changed a marker, need to run again */
2037  }
2038  }
2039  }
2040  }
2041 
2042  /* Merge states that are equal */
2043  for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2044  {
2045  s1_next = s1->next;
2046  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2047  {
2048  s2_next = s2->next;
2049  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2050  if (0 == (table[idx / 32] & (1U << (idx % 32))))
2051  automaton_merge_states (ctx, a, s1, s2);
2052  }
2053  }
2054 
2055  GNUNET_free (table);
2056  return GNUNET_OK;
2057 }
2058 
2059 
2068 static int
2070  struct REGEX_INTERNAL_Automaton *a)
2071 {
2072  if (NULL == a)
2073  return GNUNET_SYSERR;
2074 
2075  GNUNET_assert (DFA == a->type);
2076 
2077  /* 1. remove unreachable states */
2079 
2080  /* 2. remove dead states */
2082 
2083  /* 3. Merge nondistinguishable states */
2085  return GNUNET_SYSERR;
2086  return GNUNET_OK;
2087 }
2088 
2089 
2094 {
2098  const unsigned int stride;
2099 
2105 
2110 };
2111 
2112 
2123 static void
2125  const unsigned int depth,
2126  char *label,
2127  struct REGEX_INTERNAL_State *start,
2128  struct REGEX_INTERNAL_State *s)
2129 {
2130  struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2131  struct REGEX_INTERNAL_Transition *t;
2132  char *new_label;
2133 
2134  if (depth == ctx->stride)
2135  {
2137  t->label = GNUNET_strdup (label);
2138  t->to_state = s;
2139  t->from_state = start;
2141  ctx->transitions_tail,
2142  t);
2143  }
2144  else
2145  {
2146  for (t = s->transitions_head; NULL != t; t = t->next)
2147  {
2148  /* Do not consider self-loops, because it end's up in too many
2149  * transitions */
2150  if (t->to_state == t->from_state)
2151  continue;
2152 
2153  if (NULL != label)
2154  {
2155  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2156  }
2157  else
2158  new_label = GNUNET_strdup (t->label);
2159 
2161  (depth + 1),
2162  new_label,
2163  start,
2164  t->to_state);
2165  }
2166  }
2167  GNUNET_free_non_null (label);
2168 }
2169 
2170 
2179 static void
2181  const unsigned int count,
2182  struct REGEX_INTERNAL_State *s)
2183 {
2184  dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2185 }
2186 
2187 
2195 void
2197  struct REGEX_INTERNAL_Automaton *dfa,
2198  const unsigned int stride_len)
2199 {
2200  struct REGEX_INTERNAL_Strided_Context ctx = {stride_len, NULL, NULL};
2201  struct REGEX_INTERNAL_Transition *t;
2202  struct REGEX_INTERNAL_Transition *t_next;
2203 
2204  if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
2205  return;
2206 
2207  /* Compute the new transitions of given stride_len */
2209  dfa->start,
2210  NULL,
2211  NULL,
2213  &ctx);
2214 
2215  /* Add all the new transitions to the automaton. */
2216  for (t = ctx.transitions_head; NULL != t; t = t_next)
2217  {
2218  t_next = t->next;
2219  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2222  GNUNET_free (t);
2223  }
2224 
2225  /* Mark this automaton as multistrided */
2226  dfa->is_multistrided = GNUNET_YES;
2227 }
2228 
2242 void
2244  struct REGEX_INTERNAL_State *start,
2245  struct REGEX_INTERNAL_State *cur,
2246  char *label,
2247  unsigned int max_len,
2248  struct REGEX_INTERNAL_Transition **transitions_head,
2249  struct REGEX_INTERNAL_Transition **transitions_tail)
2250 {
2251  struct REGEX_INTERNAL_Transition *t;
2252  char *new_label;
2253 
2254 
2255  if (NULL != label &&
2256  ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
2257  GNUNET_YES == cur->marked) ||
2258  (start != dfa->start && max_len > 0 && max_len == strlen (label)) ||
2259  (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
2260  {
2262  t->label = GNUNET_strdup (label);
2263  t->to_state = cur;
2264  t->from_state = start;
2265  GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2266 
2267  if (GNUNET_NO == cur->marked)
2268  {
2270  cur,
2271  cur,
2272  NULL,
2273  max_len,
2274  transitions_head,
2275  transitions_tail);
2276  }
2277  return;
2278  }
2279  else if (cur != start)
2280  cur->contained = GNUNET_YES;
2281 
2282  if (GNUNET_YES == cur->marked && cur != start)
2283  return;
2284 
2285  cur->marked = GNUNET_YES;
2286 
2287 
2288  for (t = cur->transitions_head; NULL != t; t = t->next)
2289  {
2290  if (NULL != label)
2291  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2292  else
2293  new_label = GNUNET_strdup (t->label);
2294 
2295  if (t->to_state != cur)
2296  {
2298  start,
2299  t->to_state,
2300  new_label,
2301  max_len,
2302  transitions_head,
2303  transitions_tail);
2304  }
2305  GNUNET_free (new_label);
2306  }
2307 }
2308 
2309 
2318 static void
2320  struct REGEX_INTERNAL_Automaton *dfa,
2321  unsigned int max_len)
2322 {
2323  struct REGEX_INTERNAL_State *s;
2324  struct REGEX_INTERNAL_State *s_next;
2325  struct REGEX_INTERNAL_Transition *t;
2326  struct REGEX_INTERNAL_Transition *t_next;
2327  struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2328  struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2329 
2330  if (NULL == dfa)
2331  return;
2332 
2333  /* Count the incoming transitions on each state. */
2334  for (s = dfa->states_head; NULL != s; s = s->next)
2335  {
2336  for (t = s->transitions_head; NULL != t; t = t->next)
2337  {
2338  if (NULL != t->to_state)
2340  }
2341  }
2342 
2343  /* Unmark all states. */
2344  for (s = dfa->states_head; NULL != s; s = s->next)
2345  {
2346  s->marked = GNUNET_NO;
2347  s->contained = GNUNET_NO;
2348  }
2349 
2350  /* Add strides and mark states that can be deleted. */
2352  dfa->start,
2353  dfa->start,
2354  NULL,
2355  max_len,
2356  &transitions_head,
2357  &transitions_tail);
2358 
2359  /* Add all the new transitions to the automaton. */
2360  for (t = transitions_head; NULL != t; t = t_next)
2361  {
2362  t_next = t->next;
2363  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2364  GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2366  GNUNET_free (t);
2367  }
2368 
2369  /* Remove marked states (including their incoming and outgoing transitions). */
2370  for (s = dfa->states_head; NULL != s; s = s_next)
2371  {
2372  s_next = s->next;
2373  if (GNUNET_YES == s->contained)
2374  automaton_remove_state (dfa, s);
2375  }
2376 }
2377 
2378 
2388 static struct REGEX_INTERNAL_Automaton *
2390  struct REGEX_INTERNAL_State *end)
2391 {
2392  struct REGEX_INTERNAL_Automaton *n;
2393 
2394  n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2395 
2396  n->type = NFA;
2397  n->start = NULL;
2398  n->end = NULL;
2399  n->state_count = 0;
2400 
2401  if (NULL == start || NULL == end)
2402  return n;
2403 
2404  automaton_add_state (n, end);
2405  automaton_add_state (n, start);
2406 
2407  n->state_count = 2;
2408 
2409  n->start = start;
2410  n->end = end;
2411 
2412  return n;
2413 }
2414 
2415 
2423 static void
2427 {
2428  struct REGEX_INTERNAL_State *s;
2429 
2430  if (NULL == n || NULL == states_head)
2431  {
2432  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2433  return;
2434  }
2435 
2436  if (NULL == n->states_head)
2437  {
2438  n->states_head = states_head;
2439  n->states_tail = states_tail;
2440  return;
2441  }
2442 
2443  if (NULL != states_head)
2444  {
2445  n->states_tail->next = states_head;
2446  n->states_tail = states_tail;
2447  }
2448 
2449  for (s = states_head; NULL != s; s = s->next)
2450  n->state_count++;
2451 }
2452 
2453 
2462 static struct REGEX_INTERNAL_State *
2464 {
2465  struct REGEX_INTERNAL_State *s;
2466 
2467  s = GNUNET_new (struct REGEX_INTERNAL_State);
2468  s->id = ctx->state_id++;
2469  s->accepting = accepting;
2470  s->marked = GNUNET_NO;
2471  s->contained = 0;
2472  s->index = -1;
2473  s->lowlink = -1;
2474  s->scc_id = 0;
2475  s->name = NULL;
2476  GNUNET_asprintf (&s->name, "s%i", s->id);
2477 
2478  return s;
2479 }
2480 
2481 
2491 static void
2493  struct REGEX_INTERNAL_Automaton *nfa,
2494  struct REGEX_INTERNAL_StateSet *states,
2495  const char *label)
2496 {
2497  struct REGEX_INTERNAL_State *s;
2498  unsigned int i;
2499  struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2500  struct REGEX_INTERNAL_State *clsstate;
2501  struct REGEX_INTERNAL_State *currentstate;
2502  struct REGEX_INTERNAL_Transition *ctran;
2503 
2504  memset (ret, 0, sizeof (struct REGEX_INTERNAL_StateSet));
2505  if (NULL == states)
2506  return;
2507 
2508  for (i = 0; i < states->off; i++)
2509  {
2510  s = states->states[i];
2511 
2512  /* Add start state to closure only for epsilon closure */
2513  if (NULL == label)
2514  state_set_append (ret, s);
2515 
2516  /* initialize work stack */
2517  cls_stack.head = NULL;
2518  cls_stack.tail = NULL;
2519  GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2520  cls_stack.len = 1;
2521 
2522  while (NULL != (currentstate = cls_stack.tail))
2523  {
2525  cls_stack.head,
2526  cls_stack.tail,
2527  currentstate);
2528  cls_stack.len--;
2529  for (ctran = currentstate->transitions_head; NULL != ctran;
2530  ctran = ctran->next)
2531  {
2532  if (NULL == (clsstate = ctran->to_state))
2533  continue;
2534  if (0 != clsstate->contained)
2535  continue;
2536  if (0 != nullstrcmp (label, ctran->label))
2537  continue;
2538  state_set_append (ret, clsstate);
2540  cls_stack.head,
2541  cls_stack.tail,
2542  clsstate);
2543  cls_stack.len++;
2544  clsstate->contained = 1;
2545  }
2546  }
2547  }
2548  for (i = 0; i < ret->off; i++)
2549  ret->states[i]->contained = 0;
2550 
2551  if (ret->off > 1)
2552  qsort (ret->states,
2553  ret->off,
2554  sizeof (struct REGEX_INTERNAL_State *),
2555  &state_compare);
2556 }
2557 
2558 
2564 static void
2566 {
2567  struct REGEX_INTERNAL_Automaton *a;
2568  struct REGEX_INTERNAL_Automaton *b;
2569  struct REGEX_INTERNAL_Automaton *new_nfa;
2570 
2571  b = ctx->stack_tail;
2572  GNUNET_assert (NULL != b);
2574  a = ctx->stack_tail;
2575  GNUNET_assert (NULL != a);
2577 
2578  state_add_transition (ctx, a->end, NULL, b->start);
2579  a->end->accepting = 0;
2580  b->end->accepting = 1;
2581 
2582  new_nfa = nfa_fragment_create (NULL, NULL);
2583  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2584  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2585  new_nfa->start = a->start;
2586  new_nfa->end = b->end;
2587  new_nfa->state_count += a->state_count + b->state_count;
2590 
2592 }
2593 
2594 
2600 static void
2602 {
2603  struct REGEX_INTERNAL_Automaton *a;
2604  struct REGEX_INTERNAL_Automaton *new_nfa;
2605  struct REGEX_INTERNAL_State *start;
2606  struct REGEX_INTERNAL_State *end;
2607 
2608  a = ctx->stack_tail;
2609 
2610  if (NULL == a)
2611  {
2612  GNUNET_log (
2614  "nfa_add_star_op failed, because there was no element on the stack");
2615  return;
2616  }
2617 
2619 
2620  start = nfa_state_create (ctx, 0);
2621  end = nfa_state_create (ctx, 1);
2622 
2623  state_add_transition (ctx, start, NULL, a->start);
2624  state_add_transition (ctx, start, NULL, end);
2625  state_add_transition (ctx, a->end, NULL, a->start);
2626  state_add_transition (ctx, a->end, NULL, end);
2627 
2628  a->end->accepting = 0;
2629  end->accepting = 1;
2630 
2631  new_nfa = nfa_fragment_create (start, end);
2632  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2634 
2636 }
2637 
2638 
2644 static void
2646 {
2647  struct REGEX_INTERNAL_Automaton *a;
2648 
2649  a = ctx->stack_tail;
2650 
2651  if (NULL == a)
2652  {
2653  GNUNET_log (
2655  "nfa_add_plus_op failed, because there was no element on the stack");
2656  return;
2657  }
2658 
2660 
2661  state_add_transition (ctx, a->end, NULL, a->start);
2662 
2664 }
2665 
2666 
2672 static void
2674 {
2675  struct REGEX_INTERNAL_Automaton *a;
2676  struct REGEX_INTERNAL_Automaton *new_nfa;
2677  struct REGEX_INTERNAL_State *start;
2678  struct REGEX_INTERNAL_State *end;
2679 
2680  a = ctx->stack_tail;
2681  if (NULL == a)
2682  {
2683  GNUNET_log (
2685  "nfa_add_question_op failed, because there was no element on the stack");
2686  return;
2687  }
2688 
2690 
2691  start = nfa_state_create (ctx, 0);
2692  end = nfa_state_create (ctx, 1);
2693 
2694  state_add_transition (ctx, start, NULL, a->start);
2695  state_add_transition (ctx, start, NULL, end);
2696  state_add_transition (ctx, a->end, NULL, end);
2697 
2698  a->end->accepting = 0;
2699 
2700  new_nfa = nfa_fragment_create (start, end);
2701  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2704 }
2705 
2706 
2713 static void
2715 {
2716  struct REGEX_INTERNAL_Automaton *a;
2717  struct REGEX_INTERNAL_Automaton *b;
2718  struct REGEX_INTERNAL_Automaton *new_nfa;
2719  struct REGEX_INTERNAL_State *start;
2720  struct REGEX_INTERNAL_State *end;
2721 
2722  b = ctx->stack_tail;
2723  GNUNET_assert (NULL != b);
2725  a = ctx->stack_tail;
2726  GNUNET_assert (NULL != a);
2728 
2729  start = nfa_state_create (ctx, 0);
2730  end = nfa_state_create (ctx, 1);
2731  state_add_transition (ctx, start, NULL, a->start);
2732  state_add_transition (ctx, start, NULL, b->start);
2733 
2734  state_add_transition (ctx, a->end, NULL, end);
2735  state_add_transition (ctx, b->end, NULL, end);
2736 
2737  a->end->accepting = 0;
2738  b->end->accepting = 0;
2739  end->accepting = 1;
2740 
2741  new_nfa = nfa_fragment_create (start, end);
2742  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2743  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2746 
2748 }
2749 
2750 
2757 static void
2758 nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2759 {
2760  struct REGEX_INTERNAL_Automaton *n;
2761  struct REGEX_INTERNAL_State *start;
2762  struct REGEX_INTERNAL_State *end;
2763 
2764  GNUNET_assert (NULL != ctx);
2765 
2766  start = nfa_state_create (ctx, 0);
2767  end = nfa_state_create (ctx, 1);
2768  state_add_transition (ctx, start, label, end);
2769  n = nfa_fragment_create (start, end);
2770  GNUNET_assert (NULL != n);
2772 }
2773 
2774 
2780 static void
2782 {
2783  if (NULL == ctx)
2784  {
2785  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2786  return;
2787  }
2788  ctx->state_id = 0;
2789  ctx->transition_id = 0;
2790  ctx->stack_head = NULL;
2791  ctx->stack_tail = NULL;
2792 }
2793 
2794 
2803 struct REGEX_INTERNAL_Automaton *
2804 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2805 {
2806  struct REGEX_INTERNAL_Context ctx;
2807  struct REGEX_INTERNAL_Automaton *nfa;
2808  const char *regexp;
2809  char curlabel[2];
2810  char *error_msg;
2811  unsigned int count;
2812  unsigned int altcount;
2813  unsigned int atomcount;
2814  unsigned int poff;
2815  unsigned int psize;
2816  struct
2817  {
2818  int altcount;
2819  int atomcount;
2820  } * p;
2821 
2822  if (NULL == regex || 0 == strlen (regex) || 0 == len)
2823  {
2825  "Could not parse regex. Empty regex string provided.\n");
2826 
2827  return NULL;
2828  }
2830 
2831  regexp = regex;
2832  curlabel[1] = '\0';
2833  p = NULL;
2834  error_msg = NULL;
2835  altcount = 0;
2836  atomcount = 0;
2837  poff = 0;
2838  psize = 0;
2839 
2840  for (count = 0; count < len && *regexp; count++, regexp++)
2841  {
2842  switch (*regexp)
2843  {
2844  case '(':
2845  if (atomcount > 1)
2846  {
2847  --atomcount;
2848  nfa_add_concatenation (&ctx);
2849  }
2850  if (poff == psize)
2851  GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2852  p[poff].altcount = altcount;
2853  p[poff].atomcount = atomcount;
2854  poff++;
2855  altcount = 0;
2856  atomcount = 0;
2857  break;
2858  case '|':
2859  if (0 == atomcount)
2860  {
2861  error_msg = "Cannot append '|' to nothing";
2862  goto error;
2863  }
2864  while (--atomcount > 0)
2865  nfa_add_concatenation (&ctx);
2866  altcount++;
2867  break;
2868  case ')':
2869  if (0 == poff)
2870  {
2871  error_msg = "Missing opening '('";
2872  goto error;
2873  }
2874  if (0 == atomcount)
2875  {
2876  /* Ignore this: "()" */
2877  poff--;
2878  altcount = p[poff].altcount;
2879  atomcount = p[poff].atomcount;
2880  break;
2881  }
2882  while (--atomcount > 0)
2883  nfa_add_concatenation (&ctx);
2884  for (; altcount > 0; altcount--)
2885  nfa_add_alternation (&ctx);
2886  poff--;
2887  altcount = p[poff].altcount;
2888  atomcount = p[poff].atomcount;
2889  atomcount++;
2890  break;
2891  case '*':
2892  if (atomcount == 0)
2893  {
2894  error_msg = "Cannot append '*' to nothing";
2895  goto error;
2896  }
2897  nfa_add_star_op (&ctx);
2898  break;
2899  case '+':
2900  if (atomcount == 0)
2901  {
2902  error_msg = "Cannot append '+' to nothing";
2903  goto error;
2904  }
2905  nfa_add_plus_op (&ctx);
2906  break;
2907  case '?':
2908  if (atomcount == 0)
2909  {
2910  error_msg = "Cannot append '?' to nothing";
2911  goto error;
2912  }
2913  nfa_add_question_op (&ctx);
2914  break;
2915  default:
2916  if (atomcount > 1)
2917  {
2918  --atomcount;
2919  nfa_add_concatenation (&ctx);
2920  }
2921  curlabel[0] = *regexp;
2922  nfa_add_label (&ctx, curlabel);
2923  atomcount++;
2924  break;
2925  }
2926  }
2927  if (0 != poff)
2928  {
2929  error_msg = "Unbalanced parenthesis";
2930  goto error;
2931  }
2932  while (--atomcount > 0)
2933  nfa_add_concatenation (&ctx);
2934  for (; altcount > 0; altcount--)
2935  nfa_add_alternation (&ctx);
2936 
2937  GNUNET_array_grow (p, psize, 0);
2938 
2939  nfa = ctx.stack_tail;
2941 
2942  if (NULL != ctx.stack_head)
2943  {
2944  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2945  goto error;
2946  }
2947 
2948  /* Remember the regex that was used to generate this NFA */
2949  nfa->regex = GNUNET_strdup (regex);
2950 
2951  /* create depth-first numbering of the states for pretty printing */
2953  NULL,
2954  NULL,
2955  NULL,
2956  &number_states,
2957  NULL);
2958 
2959  /* No multistriding added so far */
2960  nfa->is_multistrided = GNUNET_NO;
2961 
2962  return nfa;
2963 
2964 error:
2965  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2966  if (NULL != error_msg)
2967  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2968 
2970 
2971  while (NULL != (nfa = ctx.stack_head))
2972  {
2975  }
2976 
2977  return NULL;
2978 }
2979 
2980 
2990 static void
2992  struct REGEX_INTERNAL_Automaton *nfa,
2993  struct REGEX_INTERNAL_Automaton *dfa,
2994  struct REGEX_INTERNAL_State *dfa_state)
2995 {
2996  struct REGEX_INTERNAL_Transition *ctran;
2997  struct REGEX_INTERNAL_State *new_dfa_state;
2998  struct REGEX_INTERNAL_State *state_contains;
2999  struct REGEX_INTERNAL_State *state_iter;
3000  struct REGEX_INTERNAL_StateSet tmp;
3001  struct REGEX_INTERNAL_StateSet nfa_set;
3002 
3003  for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3004  {
3005  if (NULL == ctran->label || NULL != ctran->to_state)
3006  continue;
3007 
3008  nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3009  nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3010  state_set_clear (&tmp);
3011 
3012  state_contains = NULL;
3013  for (state_iter = dfa->states_head; NULL != state_iter;
3014  state_iter = state_iter->next)
3015  {
3016  if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3017  {
3018  state_contains = state_iter;
3019  break;
3020  }
3021  }
3022  if (NULL == state_contains)
3023  {
3024  new_dfa_state = dfa_state_create (ctx, &nfa_set);
3025  automaton_add_state (dfa, new_dfa_state);
3026  ctran->to_state = new_dfa_state;
3027  construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3028  }
3029  else
3030  {
3031  ctran->to_state = state_contains;
3032  state_set_clear (&nfa_set);
3033  }
3034  }
3035 }
3036 
3037 
3055 struct REGEX_INTERNAL_Automaton *
3057  const size_t len,
3058  unsigned int max_path_len)
3059 {
3060  struct REGEX_INTERNAL_Context ctx;
3061  struct REGEX_INTERNAL_Automaton *dfa;
3062  struct REGEX_INTERNAL_Automaton *nfa;
3063  struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3064  struct REGEX_INTERNAL_StateSet singleton_set;
3065 
3067 
3068  /* Create NFA */
3069  nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3070 
3071  if (NULL == nfa)
3072  {
3074  "Could not create DFA, because NFA creation failed\n");
3075  return NULL;
3076  }
3077 
3078  dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3079  dfa->type = DFA;
3080  dfa->regex = GNUNET_strdup (regex);
3081 
3082  /* Create DFA start state from epsilon closure */
3083  memset (&singleton_set, 0, sizeof (struct REGEX_INTERNAL_StateSet));
3084  state_set_append (&singleton_set, nfa->start);
3085  nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3086  state_set_clear (&singleton_set);
3087  dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3088  automaton_add_state (dfa, dfa->start);
3089 
3090  construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3092 
3093  /* Minimize DFA */
3094  if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3095  {
3097  return NULL;
3098  }
3099 
3100  /* Create proofs and hashes for all states */
3101  if (GNUNET_OK != automaton_create_proofs (dfa))
3102  {
3104  return NULL;
3105  }
3106 
3107  /* Compress linear DFA paths */
3108  if (1 != max_path_len)
3109  dfa_compress_paths (&ctx, dfa, max_path_len);
3110 
3111  return dfa;
3112 }
3113 
3114 
3121 void
3123 {
3124  struct REGEX_INTERNAL_State *s;
3125  struct REGEX_INTERNAL_State *next_state;
3126 
3127  if (NULL == a)
3128  return;
3129 
3132 
3133  for (s = a->states_head; NULL != s; s = next_state)
3134  {
3135  next_state = s->next;
3138  }
3139 
3140  GNUNET_free (a);
3141 }
3142 
3143 
3152 static int
3153 evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3154 {
3155  const char *strp;
3156  struct REGEX_INTERNAL_State *s;
3157  unsigned int step_len;
3158 
3159  if (DFA != a->type)
3160  {
3162  "Tried to evaluate DFA, but NFA automaton given");
3163  return -1;
3164  }
3165 
3166  s = a->start;
3167 
3168  /* If the string is empty but the starting state is accepting, we accept. */
3169  if ((NULL == string || 0 == strlen (string)) && s->accepting)
3170  return 0;
3171 
3172  for (strp = string; NULL != strp && *strp; strp += step_len)
3173  {
3174  step_len = dfa_move (&s, strp);
3175 
3176  if (NULL == s)
3177  break;
3178  }
3179 
3180  if (NULL != s && s->accepting)
3181  return 0;
3182 
3183  return 1;
3184 }
3185 
3186 
3194 static int
3195 evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3196 {
3197  const char *strp;
3198  char str[2];
3199  struct REGEX_INTERNAL_State *s;
3200  struct REGEX_INTERNAL_StateSet sset;
3201  struct REGEX_INTERNAL_StateSet new_sset;
3202  struct REGEX_INTERNAL_StateSet singleton_set;
3203  unsigned int i;
3204  int result;
3205 
3206  if (NFA != a->type)
3207  {
3209  "Tried to evaluate NFA, but DFA automaton given");
3210  return -1;
3211  }
3212 
3213  /* If the string is empty but the starting state is accepting, we accept. */
3214  if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
3215  return 0;
3216 
3217  result = 1;
3218  memset (&singleton_set, 0, sizeof (struct REGEX_INTERNAL_StateSet));
3219  state_set_append (&singleton_set, a->start);
3220  nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3221  state_set_clear (&singleton_set);
3222 
3223  str[1] = '\0';
3224  for (strp = string; NULL != strp && *strp; strp++)
3225  {
3226  str[0] = *strp;
3227  nfa_closure_set_create (&new_sset, a, &sset, str);
3228  state_set_clear (&sset);
3229  nfa_closure_set_create (&sset, a, &new_sset, 0);
3230  state_set_clear (&new_sset);
3231  }
3232 
3233  for (i = 0; i < sset.off; i++)
3234  {
3235  s = sset.states[i];
3236  if ((NULL != s) && (s->accepting))
3237  {
3238  result = 0;
3239  break;
3240  }
3241  }
3242 
3243  state_set_clear (&sset);
3244  return result;
3245 }
3246 
3247 
3255 int
3256 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3257 {
3258  int result;
3259 
3260  switch (a->type)
3261  {
3262  case DFA:
3263  result = evaluate_dfa (a, string);
3264  break;
3265  case NFA:
3266  result = evaluate_nfa (a, string);
3267  break;
3268  default:
3270  "Evaluating regex failed, automaton has no type!\n");
3271  result = GNUNET_SYSERR;
3272  break;
3273  }
3274 
3275  return result;
3276 }
3277 
3278 
3290 const char *
3292 {
3293  if (NULL == a)
3294  return NULL;
3295 
3296  return a->canonical_regex;
3297 }
3298 
3299 
3307 unsigned int
3309 {
3310  unsigned int t_count;
3311  struct REGEX_INTERNAL_State *s;
3312 
3313  if (NULL == a)
3314  return 0;
3315 
3316  t_count = 0;
3317  for (s = a->states_head; NULL != s; s = s->next)
3318  t_count += s->transition_count;
3319 
3320  return t_count;
3321 }
3322 
3323 
3334 size_t
3335 REGEX_INTERNAL_get_first_key (const char *input_string,
3336  size_t string_len,
3337  struct GNUNET_HashCode *key)
3338 {
3339  size_t size;
3340 
3341  size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3343  if (NULL == input_string)
3344  {
3345  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3346  return 0;
3347  }
3348  GNUNET_CRYPTO_hash (input_string, size, key);
3349 
3350  return size;
3351 }
3352 
3353 
3364 static void
3365 iterate_initial_edge (unsigned int min_len,
3366  unsigned int max_len,
3367  char *consumed_string,
3368  struct REGEX_INTERNAL_State *state,
3370  void *iterator_cls)
3371 {
3372  char *temp;
3373  struct REGEX_INTERNAL_Transition *t;
3374  unsigned int num_edges = state->transition_count;
3375  struct REGEX_BLOCK_Edge edges[num_edges];
3376  struct REGEX_BLOCK_Edge edge[1];
3377  struct GNUNET_HashCode hash;
3378  struct GNUNET_HashCode hash_new;
3379  unsigned int cur_len;
3380 
3381  if (NULL != consumed_string)
3382  cur_len = strlen (consumed_string);
3383  else
3384  cur_len = 0;
3385 
3386  if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3387  (cur_len > 0) && (NULL != consumed_string))
3388  {
3389  if (cur_len <= max_len)
3390  {
3391  if ((NULL != state->proof) &&
3392  (0 != strcmp (consumed_string, state->proof)))
3393  {
3394  (void) state_get_edges (state, edges);
3395  GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3397  "Start state for string `%s' is %s\n",
3398  consumed_string,
3399  GNUNET_h2s (&hash));
3400  iterator (iterator_cls,
3401  &hash,
3402  consumed_string,
3403  state->accepting,
3404  num_edges,
3405  edges);
3406  }
3407 
3408  if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3409  (state->transition_count < 1) && (cur_len < max_len))
3410  {
3411  /* Special case for regex consisting of just a string that is shorter than
3412  * max_len */
3413  edge[0].label = &consumed_string[cur_len - 1];
3414  edge[0].destination = state->hash;
3415  temp = GNUNET_strdup (consumed_string);
3416  temp[cur_len - 1] = '\0';
3417  GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3419  "Start state for short string `%s' is %s\n",
3420  temp,
3421  GNUNET_h2s (&hash_new));
3422  iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3423  GNUNET_free (temp);
3424  }
3425  }
3426  else /* cur_len > max_len */
3427  {
3428  /* Case where the concatenated labels are longer than max_len, then split. */
3429  edge[0].label = &consumed_string[max_len];
3430  edge[0].destination = state->hash;
3431  temp = GNUNET_strdup (consumed_string);
3432  temp[max_len] = '\0';
3433  GNUNET_CRYPTO_hash (temp, max_len, &hash);
3435  "Start state at split edge `%s'-`%s` is %s\n",
3436  temp,
3437  edge[0].label,
3438  GNUNET_h2s (&hash_new));
3439  iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3440  GNUNET_free (temp);
3441  }
3442  }
3443 
3444  if (cur_len < max_len)
3445  {
3446  for (t = state->transitions_head; NULL != t; t = t->next)
3447  {
3448  if (NULL != strchr (t->label, (int) '.'))
3449  {
3450  /* Wildcards not allowed during starting states */
3451  GNUNET_break (0);
3452  continue;
3453  }
3454  if (NULL != consumed_string)
3455  GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3456  else
3457  GNUNET_asprintf (&temp, "%s", t->label);
3458  iterate_initial_edge (min_len,
3459  max_len,
3460  temp,
3461  t->to_state,
3462  iterator,
3463  iterator_cls);
3464  GNUNET_free (temp);
3465  }
3466  }
3467 }
3468 
3469 
3478 void
3481  void *iterator_cls)
3482 {
3483  struct REGEX_INTERNAL_State *s;
3484 
3485  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3488  NULL,
3489  a->start,
3490  iterator,
3491  iterator_cls);
3492  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3493  for (s = a->states_head; NULL != s; s = s->next)
3494  {
3495  struct REGEX_BLOCK_Edge edges[s->transition_count];
3496  unsigned int num_edges;
3497 
3498  num_edges = state_get_edges (s, edges);
3499  if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3500  {
3502  "Creating DFA edges at `%s' under key %s\n",
3503  s->proof,
3504  GNUNET_h2s (&s->hash));
3505  iterator (iterator_cls,
3506  &s->hash,
3507  s->proof,
3508  s->accepting,
3509  num_edges,
3510  edges);
3511  }
3512  s->marked = GNUNET_NO;
3513  }
3514 }
3515 
3516 
3524 {
3526  char *proof;
3530 };
3531 
3532 
3537 {
3540 };
3541 
3542 
3554 static void
3555 store_all_states (void *cls,
3556  const struct GNUNET_HashCode *key,
3557  const char *proof,
3558  int accepting,
3559  unsigned int num_edges,
3560  const struct REGEX_BLOCK_Edge *edges)
3561 {
3562  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3563  struct temporal_state_store *tmp;
3564  size_t edges_size;
3565 
3566  tmp = GNUNET_new (struct temporal_state_store);
3567  tmp->reachable = GNUNET_NO;
3568  tmp->proof = GNUNET_strdup (proof);
3569  tmp->accepting = accepting;
3570  tmp->num_edges = num_edges;
3571  edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
3572  tmp->edges = GNUNET_malloc (edges_size);
3573  GNUNET_memcpy (tmp->edges, edges, edges_size);
3576  hm,
3577  key,
3578  tmp,
3580 }
3581 
3582 
3591 static void
3593  struct GNUNET_CONTAINER_MultiHashMap *hm)
3594 {
3595  struct temporal_state_store *child;
3596  unsigned int i;
3597 
3598  if (GNUNET_YES == state->reachable)
3599  /* visited */
3600  return;
3601 
3602  state->reachable = GNUNET_YES;
3603  for (i = 0; i < state->num_edges; i++)
3604  {
3605  child =
3607  if (NULL == child)
3608  {
3609  GNUNET_break (0);
3610  continue;
3611  }
3612  mark_as_reachable (child, hm);
3613  }
3614 }
3615 
3616 
3626 static int
3628  const struct GNUNET_HashCode *key,
3629  void *value)
3630 {
3631  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3632  struct temporal_state_store *state = value;
3633 
3634  if (GNUNET_YES == state->reachable)
3635  /* already visited and marked */
3636  return GNUNET_YES;
3637 
3638  if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) &&
3639  GNUNET_NO == state->accepting)
3640  /* not directly reachable */
3641  return GNUNET_YES;
3642 
3643  mark_as_reachable (state, hm);
3644  return GNUNET_YES;
3645 }
3646 
3647 
3658 static int
3659 iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3660 {
3661  struct client_iterator *ci = cls;
3662  struct temporal_state_store *state = value;
3663 
3664  if (GNUNET_YES == state->reachable)
3665  {
3666  ci->iterator (ci->iterator_cls,
3667  key,
3668  state->proof,
3669  state->accepting,
3670  state->num_edges,
3671  state->edges);
3672  }
3673  GNUNET_free (state->edges);
3674  GNUNET_free (state->proof);
3675  GNUNET_free (state);
3676  return GNUNET_YES;
3677 }
3678 
3689 void
3692  void *iterator_cls)
3693 {
3694  struct GNUNET_CONTAINER_MultiHashMap *hm;
3695  struct client_iterator ci;
3696 
3698  ci.iterator = iterator;
3700 
3704 
3706 }
3707 
3708 
3709 /* end of regex_internal.c */
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
unsigned int state_count
Number of states in the automaton.
static int iterator(void *cls, const struct GNUNET_PeerIdentity *key, void *value)
Iterator over hash map entries.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
struct GNUNET_HashCode destination
Destionation of the edge.
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
int index
Used for SCC detection.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA &#39;a&#39;.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
unsigned int transition_id
Unique transition id.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
struct GNUNET_HashCode hash
Hash of the state.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
int accepting
If this is an accepting state or not.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id&#39;s of the states that are contained in each set...
int GNUNET_snprintf(char *buf, size_t size, const char *format,...)
Like snprintf, just aborts if the buffer is of insufficient size.
unsigned int state_id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length &#39;len&#39;.
struct REGEX_INTERNAL_Automaton * stack_tail
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton &#39;a&#39;, always use this function to alter the states DLL of the automaton...
unsigned int blen
Number of bytes allocated for sbuf.
int contained
Marking the state as contained.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_HashMapIterator it, void *it_cls)
Iterate over all entries in the map.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet &#39;set&#39;.
#define GNUNET_NO
Definition: gnunet_common.h:81
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:78
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
Automaton representation.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
static struct GNUNET_SCHEDULER_Task * t
Main task.
static int ret
Final status code.
Definition: gnunet-arm.c:89
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
char * proof
Proof for this state.
int marked
Marking of the state.
Internal representation of the hash map.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state &#39;s&#39; to the next state on transition &#39;str&#39;.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
Edge representation.
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
struct REGEX_INTERNAL_State * states_tail
DLL of states.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct REGEX_INTERNAL_State * head
MDLL of states.
Set of states using MDLL API.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it&#39;s start state, visiting all reachable states and calling &#39;action&#39; on each one of them.
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level &#39;level&#39; that indicates a failure of the command &#39;cmd&#39; with the mess...
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
enum State state
current state of profiling
#define GNUNET_memcpy(dst, src, n)
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
, &#39; bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_...
Store regex iterator and cls in one place to pass to the hashmap iterator.
static char * value
Value of the record to add/remove.
const unsigned int stride
Length of the strides.
library to parse regular expressions into dfa
char * name
Human readable name of the state.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static uint64_t proof
Definition: gnunet-scrypt.c:41
unsigned int transition_count
Number of transitions from this state to other states.
static int state_compare(const void *a, const void *b)
Compare two states.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from &#39;in&#39; to &#39;out&#39;.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:44
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:55
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_MAX(a, b)
Definition: gnunet_common.h:85
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA &#39;a&#39;.
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given &#39;dfa&#39;.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
struct REGEX_INTERNAL_State ** states
Array of states.
static int result
Global testing status.
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
Transition between two states.
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
Struct to hold all the relevant state information in the HashMap.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
A 512-bit hashcode.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
struct REGEX_INTERNAL_State * start
First state of the automaton.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
struct GNUNET_HashCode key
The key used in the DHT.
#define GNUNET_SYSERR
Definition: gnunet_common.h:79
static unsigned int size
Size of the "table".
Definition: peer.c:67
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
Context for adding strided transitions to a DFA.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given &#39;nfa&#39; and starting with &#39;dfa_state&#39;.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA &#39;a&#39; by removing all unreachable states, removing all dead states and merging a...
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data structure.
unsigned int off
Number of entries in use in the &#39;states&#39; array.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton &#39;a&#39;.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
static int has_epsilon(const struct StringBuffer *str)
Check if the string &#39;str&#39; starts with an epsilon (empty string).
struct REGEX_BLOCK_Edge * edges
const char * label
Label of the edge.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of &#39;str1&#39; and &#39;str2&#39;.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton &#39;a&#39;.
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
unsigned int id
Unique id of this transition.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string against the given compiled regex a.
static struct GNUNET_OS_Process * child
The child process we spawn.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a &#39;transition&#39; from &#39;state&#39;.
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton &#39;n&#39;.
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
unsigned int dfs_id
Linear state ID accquired by depth-first-search.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given &#39;regex&#39; of length &#39;len&#39;.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
#define GNUNET_log(kind,...)
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
REGEX_INTERNAL_KeyIterator iterator
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state &#39;s&#39;.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state &#39;marked&#39; to GNUNET_YES.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
char * label
Label for this transition.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that get&#39;s passed to automaton traversal and is called before each next traversal from state...
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
#define GNUNET_YES
Definition: gnunet_common.h:80
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton &#39;a&#39; that are reachable from a state with a proof of at least GNUN...
size_t slen
Length of the string in the buffer.
String container for faster string operations.
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:139
common internal definitions for regex library.
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
char * abuf
Allocated buffer.
unsigned int len
Length of the MDLL.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of &#39;ret&#39; to fit &#39;nlen&#39; characters; move the existing string to the beginning of...
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as &#39;action&#39; in &#39;REGEX_INTERNAL_automaton_traverse&#39; function to create the depth-...
int lowlink
Used for SCC detection.
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA &#39;a&#39;.
unsigned int id
Unique state id.
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare &#39;str1&#39;, starting from position &#39;k&#39;, with whole &#39;str2&#39;.
#define GNUNET_malloc(size)
Wrapper around malloc.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given &#39;dfa&#39;.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex...
#define GNUNET_free(ptr)
Wrapper around free.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
struct REGEX_INTERNAL_Automaton * stack_head
DLL of REGEX_INTERNAL_Automaton&#39;s used as a stack.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
struct REGEX_INTERNAL_State * tail
MDLL of states.