GNUnet  0.20.0
regex_internal.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
26 #include "platform.h"
27 #include "gnunet_util_lib.h"
28 #include "gnunet_regex_service.h"
29 #include "regex_internal_lib.h"
30 #include "regex_internal.h"
31 
32 
37 #define REGEX_DEBUG_DFA GNUNET_NO
38 
43 {
48 
53 
57  unsigned int len;
58 };
59 
60 
67 static void
70 {
71  if (set->off == set->size)
72  GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73  set->states[set->off++] = state;
74 }
75 
76 
85 static int
86 nullstrcmp (const char *str1, const char *str2)
87 {
88  if ((NULL == str1) != (NULL == str2))
89  return -1;
90  if ((NULL == str1) && (NULL == str2))
91  return 0;
92 
93  return strcmp (str1, str2);
94 }
95 
96 
106 static void
108  struct REGEX_INTERNAL_State *from_state,
109  const char *label,
110  struct REGEX_INTERNAL_State *to_state)
111 {
113  struct REGEX_INTERNAL_Transition *oth;
114 
115  if (NULL == from_state)
116  {
117  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118  return;
119  }
120 
121  /* Do not add duplicate state transitions */
122  for (t = from_state->transitions_head; NULL != t; t = t->next)
123  {
124  if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125  (t->from_state == from_state) )
126  return;
127  }
128 
129  /* sort transitions by label */
130  for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131  {
132  if (0 < nullstrcmp (oth->label, label))
133  break;
134  }
135 
137  if (NULL != ctx)
138  t->id = ctx->transition_id++;
139  if (NULL != label)
140  t->label = GNUNET_strdup (label);
141  else
142  t->label = NULL;
143  t->to_state = to_state;
144  t->from_state = from_state;
145 
146  /* Add outgoing transition to 'from_state' */
150  oth,
151  t);
152 }
153 
154 
161 static void
163  struct REGEX_INTERNAL_Transition *transition)
164 {
165  if ((NULL == state) || (NULL == transition))
166  return;
167 
168  if (transition->from_state != state)
169  return;
170 
171  GNUNET_free (transition->label);
172 
173  state->transition_count--;
174  GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175  state->transitions_tail,
176  transition);
177 
178  GNUNET_free (transition);
179 }
180 
181 
192 static int
193 state_compare (const void *a, const void *b)
194 {
195  struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196  struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197 
198  return (*s1)->id - (*s2)->id;
199 }
200 
201 
211 static unsigned int
213 {
215  unsigned int count;
216 
217  if (NULL == s)
218  return 0;
219 
220  count = 0;
221 
222  for (t = s->transitions_head; NULL != t; t = t->next)
223  {
224  if (NULL != t->to_state)
225  {
226  edges[count].label = t->label;
227  edges[count].destination = t->to_state->hash;
228  count++;
229  }
230  }
231  return count;
232 }
233 
234 
243 static int
245  struct REGEX_INTERNAL_StateSet *sset2)
246 {
247  int result;
248  unsigned int i;
249 
250  if ((NULL == sset1) || (NULL == sset2))
251  return 1;
252 
253  result = sset1->off - sset2->off;
254  if (result < 0)
255  return -1;
256  if (result > 0)
257  return 1;
258  for (i = 0; i < sset1->off; i++)
259  if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260  break;
261  return result;
262 }
263 
264 
270 static void
272 {
273  GNUNET_array_grow (set->states, set->size, 0);
274  set->off = 0;
275 }
276 
277 
284 static void
286 {
287  if (NULL == a)
288  return;
289 
290  a->start = NULL;
291  a->end = NULL;
292  a->states_head = NULL;
293  a->states_tail = NULL;
294  a->state_count = 0;
295  GNUNET_free (a);
296 }
297 
298 
304 static void
306 {
308  struct REGEX_INTERNAL_Transition *next_t;
309 
310  if (NULL == s)
311  return;
312 
313  GNUNET_free (s->name);
314  GNUNET_free (s->proof);
315  state_set_clear (&s->nfa_set);
316  for (t = s->transitions_head; NULL != t; t = next_t)
317  {
318  next_t = t->next;
320  }
321 
322  GNUNET_free (s);
323 }
324 
325 
334 static void
336  struct REGEX_INTERNAL_State *s)
337 {
338  struct REGEX_INTERNAL_State *s_check;
339  struct REGEX_INTERNAL_Transition *t_check;
340  struct REGEX_INTERNAL_Transition *t_check_next;
341 
342  if ((NULL == a) || (NULL == s))
343  return;
344 
345  /* remove all transitions leading to this state */
346  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347  {
348  for (t_check = s_check->transitions_head; NULL != t_check;
349  t_check = t_check_next)
350  {
351  t_check_next = t_check->next;
352  if (t_check->to_state == s)
353  state_remove_transition (s_check, t_check);
354  }
355  }
356 
357  /* remove state */
359  a->state_count--;
360 
362 }
363 
364 
374 static void
376  struct REGEX_INTERNAL_Automaton *a,
377  struct REGEX_INTERNAL_State *s1,
378  struct REGEX_INTERNAL_State *s2)
379 {
380  struct REGEX_INTERNAL_State *s_check;
381  struct REGEX_INTERNAL_Transition *t_check;
383  struct REGEX_INTERNAL_Transition *t_next;
384  int is_dup;
385 
386  if (s1 == s2)
387  return;
388 
389  /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390  * does not already exists, if it already exists remove transition. */
391  for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392  {
393  for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394  {
395  t_next = t_check->next;
396 
397  if (s2 == t_check->to_state)
398  {
399  is_dup = GNUNET_NO;
400  for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401  {
402  if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403  is_dup = GNUNET_YES;
404  }
405  if (GNUNET_NO == is_dup)
406  t_check->to_state = s1;
407  else
408  state_remove_transition (t_check->from_state, t_check);
409  }
410  }
411  }
412 
413  /* 2. Add all transitions from s2 to sX to s1 */
414  for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415  {
416  if (t_check->to_state != s1)
417  state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418  }
419 
420  /* 3. Rename s1 to {s1,s2} */
421 #if REGEX_DEBUG_DFA
422  char *new_name;
423 
424  new_name = s1->name;
425  GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426  GNUNET_free (new_name);
427 #endif
428 
429  /* remove state */
431  a->state_count--;
433 }
434 
435 
443 static void
445  struct REGEX_INTERNAL_State *s)
446 {
448  a->state_count++;
449 }
450 
451 
466 static void
468  int *marks,
469  unsigned int *count,
471  void *check_cls,
473  void *action_cls)
474 {
476 
477  if (GNUNET_YES == marks[s->traversal_id])
478  return;
479 
480  marks[s->traversal_id] = GNUNET_YES;
481 
482  if (NULL != action)
483  action (action_cls, *count, s);
484 
485  (*count)++;
486 
487  for (t = s->transitions_head; NULL != t; t = t->next)
488  {
489  if ((NULL == check) ||
490  ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491  {
492  automaton_state_traverse (t->to_state,
493  marks,
494  count,
495  check,
496  check_cls,
497  action,
498  action_cls);
499  }
500  }
501 }
502 
503 
504 void
506  struct REGEX_INTERNAL_State *start,
508  void *check_cls,
510  void *action_cls)
511 {
512  unsigned int count;
513  struct REGEX_INTERNAL_State *s;
514 
515  if ((NULL == a) || (0 == a->state_count))
516  return;
517 
518  int marks[a->state_count];
519 
520  for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521  s = s->next, count++)
522  {
523  s->traversal_id = count;
524  marks[s->traversal_id] = GNUNET_NO;
525  }
526 
527  count = 0;
528 
529  if (NULL == start)
530  s = a->start;
531  else
532  s = start;
533 
535  marks,
536  &count,
537  check,
538  check_cls,
539  action,
540  action_cls);
541 }
542 
543 
548 {
553  char *sbuf;
554 
558  char *abuf;
559 
563  size_t slen;
564 
568  unsigned int blen;
569 
573  int16_t null_flag;
574 
584  int16_t synced;
585 };
586 
587 
596 static int
597 sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
598 {
599  if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
600  return 0;
601  if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
602  return -1;
603  if (s1->slen != s2->slen)
604  return -1;
605  if (0 == s1->slen)
606  return 0;
607  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
608 }
609 
610 
619 static int
620 sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
621 {
622  if (s1->slen != s2->slen)
623  return -1;
624  if (0 == s1->slen)
625  return 0;
626  return memcmp (s1->sbuf, s2->sbuf, s1->slen);
627 }
628 
629 
637 static void
638 sb_realloc (struct StringBuffer *ret, size_t nlen)
639 {
640  char *old;
641 
642  GNUNET_assert (nlen >= ret->slen);
643  old = ret->abuf;
644  ret->abuf = GNUNET_malloc (nlen);
645  ret->blen = nlen;
646  GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
647  ret->sbuf = ret->abuf;
648  GNUNET_free (old);
649 }
650 
651 
658 static void
659 sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
660 {
661  if (GNUNET_YES == ret->null_flag)
662  ret->slen = 0;
663  ret->null_flag = GNUNET_NO;
664  if (ret->blen < sarg->slen + ret->slen)
665  sb_realloc (ret, ret->blen + sarg->slen + 128);
666  GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
667  ret->slen += sarg->slen;
668 }
669 
670 
677 static void
678 sb_append_cstr (struct StringBuffer *ret, const char *cstr)
679 {
680  size_t cstr_len = strlen (cstr);
681 
682  if (GNUNET_YES == ret->null_flag)
683  ret->slen = 0;
684  ret->null_flag = GNUNET_NO;
685  if (ret->blen < cstr_len + ret->slen)
686  sb_realloc (ret, ret->blen + cstr_len + 128);
687  GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
688  ret->slen += cstr_len;
689 }
690 
691 
702 static void
703 sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
704 {
705  char *temp;
706 
707  if (GNUNET_YES == ret->null_flag)
708  ret->slen = 0;
709  ret->null_flag = GNUNET_NO;
710  temp = GNUNET_malloc (ret->slen + extra_chars + 1);
711  GNUNET_snprintf (temp,
712  ret->slen + extra_chars + 1,
713  format,
714  (int) ret->slen,
715  ret->sbuf);
716  GNUNET_free (ret->abuf);
717  ret->abuf = temp;
718  ret->sbuf = temp;
719  ret->blen = ret->slen + extra_chars + 1;
720  ret->slen = ret->slen + extra_chars;
721 }
722 
723 
733 static void
735  const char *format,
736  size_t extra_chars,
737  const struct StringBuffer *sarg)
738 {
739  if (ret->blen < sarg->slen + extra_chars + 1)
740  sb_realloc (ret, sarg->slen + extra_chars + 1);
741  ret->null_flag = GNUNET_NO;
742  ret->sbuf = ret->abuf;
743  ret->slen = sarg->slen + extra_chars;
744  GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
745 }
746 
747 
757 static void
759  const char *format,
760  size_t extra_chars,
761  const struct StringBuffer *sarg1,
762  const struct StringBuffer *sarg2)
763 {
764  if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
765  sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
766  ret->null_flag = GNUNET_NO;
767  ret->slen = sarg1->slen + sarg2->slen + extra_chars;
768  ret->sbuf = ret->abuf;
769  GNUNET_snprintf (ret->sbuf,
770  ret->blen,
771  format,
772  (int) sarg1->slen,
773  sarg1->sbuf,
774  (int) sarg2->slen,
775  sarg2->sbuf);
776 }
777 
778 
790 static void
792  const char *format,
793  size_t extra_chars,
794  const struct StringBuffer *sarg1,
795  const struct StringBuffer *sarg2,
796  const struct StringBuffer *sarg3)
797 {
798  if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
799  sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
800  ret->null_flag = GNUNET_NO;
801  ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
802  ret->sbuf = ret->abuf;
803  GNUNET_snprintf (ret->sbuf,
804  ret->blen,
805  format,
806  (int) sarg1->slen,
807  sarg1->sbuf,
808  (int) sarg2->slen,
809  sarg2->sbuf,
810  (int) sarg3->slen,
811  sarg3->sbuf);
812 }
813 
814 
821 static void
822 sb_free (struct StringBuffer *sb)
823 {
824  GNUNET_array_grow (sb->abuf, sb->blen, 0);
825  sb->slen = 0;
826  sb->sbuf = NULL;
827  sb->null_flag = GNUNET_YES;
828 }
829 
830 
837 static void
838 sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
839 
840 {
841  out->null_flag = in->null_flag;
842  if (GNUNET_YES == out->null_flag)
843  return;
844  if (out->blen < in->slen)
845  {
846  GNUNET_array_grow (out->abuf, out->blen, in->slen);
847  }
848  out->sbuf = out->abuf;
849  out->slen = in->slen;
850  GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
851 }
852 
853 
860 static void
861 sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
862 {
863  if (NULL == cstr)
864  {
865  out->null_flag = GNUNET_YES;
866  return;
867  }
868  out->null_flag = GNUNET_NO;
869  out->slen = strlen (cstr);
870  if (out->blen < out->slen)
871  {
872  GNUNET_array_grow (out->abuf, out->blen, out->slen);
873  }
874  out->sbuf = out->abuf;
875  GNUNET_memcpy (out->sbuf, cstr, out->slen);
876 }
877 
878 
887 static int
888 needs_parentheses (const struct StringBuffer *str)
889 {
890  size_t slen;
891  const char *op;
892  const char *cl;
893  const char *pos;
894  const char *end;
895  unsigned int cnt;
896 
897  if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
898  return GNUNET_NO;
899  pos = str->sbuf;
900  if ('(' != pos[0])
901  return GNUNET_YES;
902  end = str->sbuf + slen;
903  cnt = 1;
904  pos++;
905  while (cnt > 0)
906  {
907  cl = memchr (pos, ')', end - pos);
908  if (NULL == cl)
909  {
910  GNUNET_break (0);
911  return GNUNET_YES;
912  }
913  /* while '(' before ')', count opening parens */
914  while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
915  {
916  cnt++;
917  pos = op + 1;
918  }
919  /* got ')' first */
920  cnt--;
921  pos = cl + 1;
922  }
923  return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
924 }
925 
926 
936 static void
938 {
939  size_t slen;
940  const char *pos;
941  const char *end;
942  const char *sbuf;
943  const char *op;
944  const char *cp;
945  unsigned int cnt;
946 
947  if (0)
948  return;
949  sbuf = str->sbuf;
950  if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
951  ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
952  return;
953  cnt = 0;
954  pos = &sbuf[1];
955  end = &sbuf[slen - 1];
956  op = memchr (pos, '(', end - pos);
957  cp = memchr (pos, ')', end - pos);
958  while (NULL != cp)
959  {
960  while ((NULL != op) && (op < cp))
961  {
962  cnt++;
963  pos = op + 1;
964  op = memchr (pos, '(', end - pos);
965  }
966  while ((NULL != cp) && ((NULL == op) || (cp < op)))
967  {
968  if (0 == cnt)
969  return; /* can't strip parens */
970  cnt--;
971  pos = cp + 1;
972  cp = memchr (pos, ')', end - pos);
973  }
974  }
975  if (0 != cnt)
976  {
977  GNUNET_break (0);
978  return;
979  }
980  str->sbuf++;
981  str->slen -= 2;
982 }
983 
984 
993 static int
994 has_epsilon (const struct StringBuffer *str)
995 {
996  return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
997  ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
998  (')' == str->sbuf[str->slen - 1]);
999 }
1000 
1001 
1011 static void
1012 remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1013 {
1014  if (GNUNET_YES == str->null_flag)
1015  {
1016  ret->null_flag = GNUNET_YES;
1017  return;
1018  }
1019  if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1020  (')' == str->sbuf[str->slen - 1]))
1021  {
1022  /* remove epsilon */
1023  if (ret->blen < str->slen - 3)
1024  {
1025  GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1026  }
1027  ret->sbuf = ret->abuf;
1028  ret->slen = str->slen - 3;
1029  GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1030  return;
1031  }
1032  sb_strdup (ret, str);
1033 }
1034 
1035 
1045 static int
1046 sb_strncmp (const struct StringBuffer *str1,
1047  const struct StringBuffer *str2,
1048  size_t n)
1049 {
1050  size_t max;
1051 
1052  if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1053  return -1;
1054  max = GNUNET_MAX (str1->slen, str2->slen);
1055  if (max > n)
1056  max = n;
1057  return memcmp (str1->sbuf, str2->sbuf, max);
1058 }
1059 
1060 
1070 static int
1071 sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1072 {
1073  if (str1->slen < n)
1074  return -1;
1075  return memcmp (str1->sbuf, str2, n);
1076 }
1077 
1078 
1086 static void
1087 sb_init (struct StringBuffer *sb, size_t n)
1088 {
1089  sb->null_flag = GNUNET_NO;
1090  sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1091  sb->blen = n;
1092  sb->slen = 0;
1093 }
1094 
1095 
1105 static int
1106 sb_strkcmp (const struct StringBuffer *str1,
1107  const struct StringBuffer *str2,
1108  size_t k)
1109 {
1110  if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1111  (k > str1->slen) || (str1->slen - k != str2->slen))
1112  return -1;
1113  return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1114 }
1115 
1116 
1125 static void
1126 number_states (void *cls,
1127  const unsigned int count,
1128  struct REGEX_INTERNAL_State *s)
1129 {
1130  struct REGEX_INTERNAL_State **states = cls;
1131 
1132  s->dfs_id = count;
1133  if (NULL != states)
1134  states[count] = s;
1135 }
1136 
1137 
1138 #define PRIS(a) \
1139  ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1140  ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1141 
1142 
1157 static void
1159  const struct StringBuffer *R_last_ik,
1160  const struct StringBuffer *R_last_kk,
1161  const struct StringBuffer *R_last_kj,
1162  struct StringBuffer *R_cur_ij,
1163  struct StringBuffer *R_cur_l,
1164  struct StringBuffer *R_cur_r)
1165 {
1166  struct StringBuffer R_temp_ij;
1167  struct StringBuffer R_temp_ik;
1168  struct StringBuffer R_temp_kj;
1169  struct StringBuffer R_temp_kk;
1170  int eps_check;
1171  int ij_ik_cmp;
1172  int ij_kj_cmp;
1173  int ik_kk_cmp;
1174  int kk_kj_cmp;
1175  int clean_ik_kk_cmp;
1176  int clean_kk_kj_cmp;
1177  size_t length;
1178  size_t length_l;
1179  size_t length_r;
1180 
1181  /*
1182  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1183  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1184  * R_cur_ij = R_cur_l | R_cur_r
1185  * R_cur_l == R^{(k-1)}_{ij}
1186  * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1187  */if ((GNUNET_YES == R_last_ij->null_flag) &&
1188  ((GNUNET_YES == R_last_ik->null_flag) ||
1189  (GNUNET_YES == R_last_kj->null_flag)))
1190  {
1191  /* R^{(k)}_{ij} = N | N */
1192  R_cur_ij->null_flag = GNUNET_YES;
1193  R_cur_ij->synced = GNUNET_NO;
1194  return;
1195  }
1196 
1197  if ((GNUNET_YES == R_last_ik->null_flag) ||
1198  (GNUNET_YES == R_last_kj->null_flag))
1199  {
1200  /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1201  if (GNUNET_YES == R_last_ij->synced)
1202  {
1203  R_cur_ij->synced = GNUNET_YES;
1204  R_cur_ij->null_flag = GNUNET_NO;
1205  return;
1206  }
1207  R_cur_ij->synced = GNUNET_YES;
1208  sb_strdup (R_cur_ij, R_last_ij);
1209  return;
1210  }
1211  R_cur_ij->synced = GNUNET_NO;
1212 
1213  /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1214  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1215 
1216  R_cur_r->null_flag = GNUNET_YES;
1217  R_cur_r->slen = 0;
1218  R_cur_l->null_flag = GNUNET_YES;
1219  R_cur_l->slen = 0;
1220 
1221  /* cache results from strcmp, we might need these many times */
1222  ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1223  ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1224  ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1225  kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1226 
1227  /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1228  * as parentheses, so we can better compare the contents */
1229 
1230  memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1231  memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1232  memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1233  memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1234  remove_epsilon (R_last_ik, &R_temp_ik);
1235  remove_epsilon (R_last_kk, &R_temp_kk);
1236  remove_epsilon (R_last_kj, &R_temp_kj);
1237  remove_parentheses (&R_temp_ik);
1238  remove_parentheses (&R_temp_kk);
1239  remove_parentheses (&R_temp_kj);
1240  clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1241  clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1242 
1243  /* construct R_cur_l (and, if necessary R_cur_r) */
1244  if (GNUNET_YES != R_last_ij->null_flag)
1245  {
1246  /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1247  * as parentheses, so we can better compare the contents */
1248  remove_epsilon (R_last_ij, &R_temp_ij);
1249  remove_parentheses (&R_temp_ij);
1250 
1251  if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1252  (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1253  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1254  {
1255  if (0 == R_temp_ij.slen)
1256  {
1257  R_cur_r->null_flag = GNUNET_NO;
1258  }
1259  else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1260  ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1261  (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1262  {
1263  /*
1264  * a|(e|a)a*(e|a) = a*
1265  * a|(e|a)(e|a)*(e|a) = a*
1266  * (e|a)|aa*a = a*
1267  * (e|a)|aa*(e|a) = a*
1268  * (e|a)|(e|a)a*a = a*
1269  * (e|a)|(e|a)a*(e|a) = a*
1270  * (e|a)|(e|a)(e|a)*(e|a) = a*
1271  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1272  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1273  else
1274  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1275  }
1276  else
1277  {
1278  /*
1279  * a|aa*a = a+
1280  * a|(e|a)a*a = a+
1281  * a|aa*(e|a) = a+
1282  * a|(e|a)(e|a)*a = a+
1283  * a|a(e|a)*(e|a) = a+
1284  */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1286  else
1287  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1288  }
1289  }
1290  else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1291  (0 != clean_ik_kk_cmp))
1292  {
1293  /* a|ab*b = ab* */
1294  if (0 == R_last_kk->slen)
1295  sb_strdup (R_cur_r, R_last_ij);
1296  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1297  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1298  else
1299  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1300  R_cur_l->null_flag = GNUNET_YES;
1301  }
1302  else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1303  (0 != clean_kk_kj_cmp))
1304  {
1305  /* a|bb*a = b*a */
1306  if (R_last_kk->slen < 1)
1307  {
1308  sb_strdup (R_cur_r, R_last_kj);
1309  }
1310  else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1311  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1312  else
1313  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1314 
1315  R_cur_l->null_flag = GNUNET_YES;
1316  }
1317  else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1318  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1319  {
1320  /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1321  if (needs_parentheses (&R_temp_kk))
1322  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1323  else
1324  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1325  R_cur_l->null_flag = GNUNET_YES;
1326  }
1327  else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1328  (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1329  {
1330  /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1331  if (needs_parentheses (&R_temp_kk))
1332  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1333  else
1334  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1335  R_cur_l->null_flag = GNUNET_YES;
1336  }
1337  else
1338  {
1339  sb_strdup (R_cur_l, R_last_ij);
1340  remove_parentheses (R_cur_l);
1341  }
1342  }
1343  else
1344  {
1345  /* we have no left side */
1346  R_cur_l->null_flag = GNUNET_YES;
1347  }
1348 
1349  /* construct R_cur_r, if not already constructed */
1350  if (GNUNET_YES == R_cur_r->null_flag)
1351  {
1352  length = R_temp_kk.slen - R_last_ik->slen;
1353 
1354  /* a(ba)*bx = (ab)+x */
1355  if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1356  (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1357  (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1358  (0 < R_last_ik->slen) &&
1359  (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1360  (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1361  {
1362  struct StringBuffer temp_a;
1363  struct StringBuffer temp_b;
1364 
1365  sb_init (&temp_a, length);
1366  sb_init (&temp_b, R_last_kj->slen - length);
1367 
1368  length_l = length;
1369  temp_a.sbuf = temp_a.abuf;
1370  GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1371  temp_a.slen = length_l;
1372 
1373  length_r = R_last_kj->slen - length;
1374  temp_b.sbuf = temp_b.abuf;
1375  GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1376  temp_b.slen = length_r;
1377 
1378  /* e|(ab)+ = (ab)* */
1379  if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1380  (0 == temp_b.slen))
1381  {
1382  sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1383  sb_free (R_cur_l);
1384  R_cur_l->null_flag = GNUNET_YES;
1385  }
1386  else
1387  {
1388  sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1389  }
1390  sb_free (&temp_a);
1391  sb_free (&temp_b);
1392  }
1393  else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1394  (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1395  {
1396  /*
1397  * (e|a)a*(e|a) = a*
1398  * (e|a)(e|a)*(e|a) = a*
1399  */
1400  if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1401  {
1402  if (needs_parentheses (&R_temp_kk))
1403  sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1404  else
1405  sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1406  }
1407  /* aa*a = a+a */
1408  else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1409  (! has_epsilon (R_last_ik)))
1410  {
1411  if (needs_parentheses (&R_temp_kk))
1412  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1413  else
1414  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1415  }
1416  /*
1417  * (e|a)a*a = a+
1418  * aa*(e|a) = a+
1419  * a(e|a)*(e|a) = a+
1420  * (e|a)a*a = a+
1421  */else
1422  {
1423  eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1424  + has_epsilon (R_last_kj));
1425 
1426  if (1 == eps_check)
1427  {
1428  if (needs_parentheses (&R_temp_kk))
1429  sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1430  else
1431  sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1432  }
1433  }
1434  }
1435  /*
1436  * aa*b = a+b
1437  * (e|a)(e|a)*b = a*b
1438  */
1439  else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1440  {
1441  if (has_epsilon (R_last_ik))
1442  {
1443  if (needs_parentheses (&R_temp_kk))
1444  sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1445  else
1446  sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1447  }
1448  else
1449  {
1450  if (needs_parentheses (&R_temp_kk))
1451  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1452  else
1453  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1454  }
1455  }
1456  /*
1457  * ba*a = ba+
1458  * b(e|a)*(e|a) = ba*
1459  */
1460  else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1461  {
1462  if (has_epsilon (R_last_kj))
1463  {
1464  if (needs_parentheses (&R_temp_kk))
1465  sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1466  else
1467  sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1468  }
1469  else
1470  {
1471  if (needs_parentheses (&R_temp_kk))
1472  sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1473  else
1474  sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1475  }
1476  }
1477  else
1478  {
1479  if (0 < R_temp_kk.slen)
1480  {
1481  if (needs_parentheses (&R_temp_kk))
1482  {
1483  sb_printf3 (R_cur_r,
1484  "%.*s(%.*s)*%.*s",
1485  3,
1486  R_last_ik,
1487  &R_temp_kk,
1488  R_last_kj);
1489  }
1490  else
1491  {
1492  sb_printf3 (R_cur_r,
1493  "%.*s%.*s*%.*s",
1494  1,
1495  R_last_ik,
1496  &R_temp_kk,
1497  R_last_kj);
1498  }
1499  }
1500  else
1501  {
1502  sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1503  }
1504  }
1505  }
1506  sb_free (&R_temp_ij);
1507  sb_free (&R_temp_ik);
1508  sb_free (&R_temp_kk);
1509  sb_free (&R_temp_kj);
1510 
1511  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1512  {
1513  R_cur_ij->null_flag = GNUNET_YES;
1514  return;
1515  }
1516 
1517  if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1518  {
1519  struct StringBuffer tmp;
1520 
1521  tmp = *R_cur_ij;
1522  *R_cur_ij = *R_cur_l;
1523  *R_cur_l = tmp;
1524  return;
1525  }
1526 
1527  if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1528  {
1529  struct StringBuffer tmp;
1530 
1531  tmp = *R_cur_ij;
1532  *R_cur_ij = *R_cur_r;
1533  *R_cur_r = tmp;
1534  return;
1535  }
1536 
1537  if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1538  {
1539  struct StringBuffer tmp;
1540 
1541  tmp = *R_cur_ij;
1542  *R_cur_ij = *R_cur_l;
1543  *R_cur_l = tmp;
1544  return;
1545  }
1546  sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1547 }
1548 
1549 
1561 static int
1563 {
1564  unsigned int n = a->state_count;
1565  struct REGEX_INTERNAL_State *states[n];
1566  struct StringBuffer *R_last;
1567  struct StringBuffer *R_cur;
1568  struct StringBuffer R_cur_r;
1569  struct StringBuffer R_cur_l;
1570  struct StringBuffer *R_swap;
1571  struct REGEX_INTERNAL_Transition *t;
1572  struct StringBuffer complete_regex;
1573  unsigned int i;
1574  unsigned int j;
1575  unsigned int k;
1576 
1577  R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1578  R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1579  if ((NULL == R_last) || (NULL == R_cur))
1580  {
1582  GNUNET_free (R_cur);
1583  GNUNET_free (R_last);
1584  return GNUNET_SYSERR;
1585  }
1586 
1587  /* create depth-first numbering of the states, initializes 'state' */
1589  a->start,
1590  NULL,
1591  NULL,
1592  &number_states,
1593  states);
1594 
1595  for (i = 0; i < n; i++)
1596  GNUNET_assert (NULL != states[i]);
1597  for (i = 0; i < n; i++)
1598  for (j = 0; j < n; j++)
1599  R_last[i * n + j].null_flag = GNUNET_YES;
1600 
1601  /* Compute regular expressions of length "1" between each pair of states */
1602  for (i = 0; i < n; i++)
1603  {
1604  for (t = states[i]->transitions_head; NULL != t; t = t->next)
1605  {
1606  j = t->to_state->dfs_id;
1607  if (GNUNET_YES == R_last[i * n + j].null_flag)
1608  {
1609  sb_strdup_cstr (&R_last[i * n + j], t->label);
1610  }
1611  else
1612  {
1613  sb_append_cstr (&R_last[i * n + j], "|");
1614  sb_append_cstr (&R_last[i * n + j], t->label);
1615  }
1616  }
1617  /* add self-loop: i is reachable from i via epsilon-transition */
1618  if (GNUNET_YES == R_last[i * n + i].null_flag)
1619  {
1620  R_last[i * n + i].slen = 0;
1621  R_last[i * n + i].null_flag = GNUNET_NO;
1622  }
1623  else
1624  {
1625  sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1626  }
1627  }
1628  for (i = 0; i < n; i++)
1629  for (j = 0; j < n; j++)
1630  if (needs_parentheses (&R_last[i * n + j]))
1631  sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1632  /* Compute regular expressions of length "k" between each pair of states per
1633  * induction */
1634  memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1635  memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1636  for (k = 0; k < n; k++)
1637  {
1638  for (i = 0; i < n; i++)
1639  {
1640  for (j = 0; j < n; j++)
1641  {
1642  /* Basis for the recursion:
1643  * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1644  * R_last == R^{(k-1)}, R_cur == R^{(k)}
1645  */
1646 
1647  /* Create R_cur[i][j] and simplify the expression */
1648  automaton_create_proofs_simplify (&R_last[i * n + j],
1649  &R_last[i * n + k],
1650  &R_last[k * n + k],
1651  &R_last[k * n + j],
1652  &R_cur[i * n + j],
1653  &R_cur_l,
1654  &R_cur_r);
1655  }
1656  }
1657  /* set R_last = R_cur */
1658  R_swap = R_last;
1659  R_last = R_cur;
1660  R_cur = R_swap;
1661  /* clear 'R_cur' for next iteration */
1662  for (i = 0; i < n; i++)
1663  for (j = 0; j < n; j++)
1664  R_cur[i * n + j].null_flag = GNUNET_YES;
1665  }
1666  sb_free (&R_cur_l);
1667  sb_free (&R_cur_r);
1668  /* assign proofs and hashes */
1669  for (i = 0; i < n; i++)
1670  {
1671  if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1672  {
1673  states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1674  R_last[a->start->dfs_id * n + i].slen);
1675  GNUNET_CRYPTO_hash (states[i]->proof,
1676  strlen (states[i]->proof),
1677  &states[i]->hash);
1678  }
1679  }
1680 
1681  /* complete regex for whole DFA: union of all pairs (start state/accepting
1682  * state(s)). */
1683  sb_init (&complete_regex, 16 * n);
1684  for (i = 0; i < n; i++)
1685  {
1686  if (states[i]->accepting)
1687  {
1688  if ((0 == complete_regex.slen) &&
1689  (0 < R_last[a->start->dfs_id * n + i].slen))
1690  {
1691  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1692  }
1693  else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1694  (0 < R_last[a->start->dfs_id * n + i].slen))
1695  {
1696  sb_append_cstr (&complete_regex, "|");
1697  sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1698  }
1699  }
1700  }
1701  a->canonical_regex =
1702  GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1703 
1704  /* cleanup */
1705  sb_free (&complete_regex);
1706  for (i = 0; i < n; i++)
1707  for (j = 0; j < n; j++)
1708  {
1709  sb_free (&R_cur[i * n + j]);
1710  sb_free (&R_last[i * n + j]);
1711  }
1712  GNUNET_free (R_cur);
1713  GNUNET_free (R_last);
1714  return GNUNET_OK;
1715 }
1716 
1717 
1727 static struct REGEX_INTERNAL_State *
1729  struct REGEX_INTERNAL_StateSet *nfa_states)
1730 {
1731  struct REGEX_INTERNAL_State *s;
1732  char *pos;
1733  size_t len;
1734  struct REGEX_INTERNAL_State *cstate;
1735  struct REGEX_INTERNAL_Transition *ctran;
1736  unsigned int i;
1737 
1738  s = GNUNET_new (struct REGEX_INTERNAL_State);
1739  s->id = ctx->state_id++;
1740  s->index = -1;
1741  s->lowlink = -1;
1742 
1743  if (NULL == nfa_states)
1744  {
1745  GNUNET_asprintf (&s->name, "s%i", s->id);
1746  return s;
1747  }
1748 
1749  s->nfa_set = *nfa_states;
1750 
1751  if (nfa_states->off < 1)
1752  return s;
1753 
1754  /* Create a name based on 'nfa_states' */
1755  len = nfa_states->off * 14 + 4;
1756  s->name = GNUNET_malloc (len);
1757  strcat (s->name, "{");
1758  pos = s->name + 1;
1759 
1760  for (i = 0; i < nfa_states->off; i++)
1761  {
1762  cstate = nfa_states->states[i];
1763  GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1764  pos += strlen (pos);
1765 
1766  /* Add a transition for each distinct label to NULL state */
1767  for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1768  if (NULL != ctran->label)
1769  state_add_transition (ctx, s, ctran->label, NULL);
1770 
1771  /* If the nfa_states contain an accepting state, the new dfa state is also
1772  * accepting. */
1773  if (cstate->accepting)
1774  s->accepting = 1;
1775  }
1776  pos[-1] = '}';
1777  s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1778 
1779  memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1780  return s;
1781 }
1782 
1783 
1797 static unsigned int
1798 dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1799 {
1800  struct REGEX_INTERNAL_Transition *t;
1801  struct REGEX_INTERNAL_State *new_s;
1802  unsigned int len;
1803  unsigned int max_len;
1804 
1805  if (NULL == s)
1806  return 0;
1807 
1808  new_s = NULL;
1809  max_len = 0;
1810  for (t = (*s)->transitions_head; NULL != t; t = t->next)
1811  {
1812  len = strlen (t->label);
1813 
1814  if (0 == strncmp (t->label, str, len))
1815  {
1816  if (len >= max_len)
1817  {
1818  max_len = len;
1819  new_s = t->to_state;
1820  }
1821  }
1822  }
1823 
1824  *s = new_s;
1825  return max_len;
1826 }
1827 
1828 
1838 static void
1839 mark_states (void *cls,
1840  const unsigned int count,
1841  struct REGEX_INTERNAL_State *s)
1842 {
1843  s->marked = GNUNET_YES;
1844 }
1845 
1846 
1853 static void
1855 {
1856  struct REGEX_INTERNAL_State *s;
1857  struct REGEX_INTERNAL_State *s_next;
1858 
1859  /* 1. unmark all states */
1860  for (s = a->states_head; NULL != s; s = s->next)
1861  s->marked = GNUNET_NO;
1862 
1863  /* 2. traverse dfa from start state and mark all visited states */
1865  a->start,
1866  NULL,
1867  NULL,
1868  &mark_states,
1869  NULL);
1870 
1871  /* 3. delete all states that were not visited */
1872  for (s = a->states_head; NULL != s; s = s_next)
1873  {
1874  s_next = s->next;
1875  if (GNUNET_NO == s->marked)
1876  automaton_remove_state (a, s);
1877  }
1878 }
1879 
1880 
1887 static void
1889 {
1890  struct REGEX_INTERNAL_State *s;
1891  struct REGEX_INTERNAL_State *s_next;
1892  struct REGEX_INTERNAL_Transition *t;
1893  int dead;
1894 
1895  GNUNET_assert (DFA == a->type);
1896 
1897  for (s = a->states_head; NULL != s; s = s_next)
1898  {
1899  s_next = s->next;
1900 
1901  if (s->accepting)
1902  continue;
1903 
1904  dead = 1;
1905  for (t = s->transitions_head; NULL != t; t = t->next)
1906  {
1907  if ((NULL != t->to_state) && (t->to_state != s) )
1908  {
1909  dead = 0;
1910  break;
1911  }
1912  }
1913 
1914  if (0 == dead)
1915  continue;
1916 
1917  /* state s is dead, remove it */
1918  automaton_remove_state (a, s);
1919  }
1920 }
1921 
1922 
1930 static int
1932  struct REGEX_INTERNAL_Automaton *a)
1933 {
1934  uint32_t *table;
1935  struct REGEX_INTERNAL_State *s1;
1936  struct REGEX_INTERNAL_State *s2;
1937  struct REGEX_INTERNAL_Transition *t1;
1938  struct REGEX_INTERNAL_Transition *t2;
1939  struct REGEX_INTERNAL_State *s1_next;
1940  struct REGEX_INTERNAL_State *s2_next;
1941  int change;
1942  unsigned int num_equal_edges;
1943  unsigned int i;
1944  unsigned int state_cnt;
1945  unsigned long long idx;
1946  unsigned long long idx1;
1947 
1948  if ((NULL == a) || (0 == a->state_count))
1949  {
1951  "Could not merge nondistinguishable states, automaton was NULL.\n");
1952  return GNUNET_SYSERR;
1953  }
1954 
1955  state_cnt = a->state_count;
1957  (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1958  if (NULL == table)
1959  {
1961  return GNUNET_SYSERR;
1962  }
1963 
1964  for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1965  s1->marked = i++;
1966 
1967  /* Mark all pairs of accepting/!accepting states */
1968  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1969  for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1970  if ((s1->accepting && ! s2->accepting) ||
1971  (! s1->accepting && s2->accepting))
1972  {
1973  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1974  table[idx / 32] |= (1U << (idx % 32));
1975  }
1976 
1977  /* Find all equal states */
1978  change = 1;
1979  while (0 != change)
1980  {
1981  change = 0;
1982  for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1983  {
1984  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1985  {
1986  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987  if (0 != (table[idx / 32] & (1U << (idx % 32))))
1988  continue;
1989  num_equal_edges = 0;
1990  for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1991  {
1992  for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1993  {
1994  if (0 == strcmp (t1->label, t2->label))
1995  {
1996  num_equal_edges++;
1997  /* same edge, but targets definitively different, so we're different
1998  as well */
1999  if (t1->to_state->marked > t2->to_state->marked)
2000  idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2001  + t2->to_state->marked;
2002  else
2003  idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2004  + t1->to_state->marked;
2005  if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2006  {
2007  table[idx / 32] |= (1U << (idx % 32));
2008  change = 1; /* changed a marker, need to run again */
2009  }
2010  }
2011  }
2012  }
2013  if ((num_equal_edges != s1->transition_count) ||
2014  (num_equal_edges != s2->transition_count))
2015  {
2016  /* Make sure ALL edges of possible equal states are the same */
2017  table[idx / 32] |= (1U << (idx % 32));
2018  change = 1; /* changed a marker, need to run again */
2019  }
2020  }
2021  }
2022  }
2023 
2024  /* Merge states that are equal */
2025  for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2026  {
2027  s1_next = s1->next;
2028  for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2029  {
2030  s2_next = s2->next;
2031  idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2032  if (0 == (table[idx / 32] & (1U << (idx % 32))))
2033  automaton_merge_states (ctx, a, s1, s2);
2034  }
2035  }
2036 
2037  GNUNET_free (table);
2038  return GNUNET_OK;
2039 }
2040 
2041 
2050 static int
2052  struct REGEX_INTERNAL_Automaton *a)
2053 {
2054  if (NULL == a)
2055  return GNUNET_SYSERR;
2056 
2057  GNUNET_assert (DFA == a->type);
2058 
2059  /* 1. remove unreachable states */
2061 
2062  /* 2. remove dead states */
2064 
2065  /* 3. Merge nondistinguishable states */
2067  return GNUNET_SYSERR;
2068  return GNUNET_OK;
2069 }
2070 
2071 
2076 {
2080  const unsigned int stride;
2081 
2087 
2092 };
2093 
2094 
2105 static void
2107  const unsigned int depth,
2108  char *label,
2109  struct REGEX_INTERNAL_State *start,
2110  struct REGEX_INTERNAL_State *s)
2111 {
2112  struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2113  struct REGEX_INTERNAL_Transition *t;
2114  char *new_label;
2115 
2116  if (depth == ctx->stride)
2117  {
2119  t->label = GNUNET_strdup (label);
2120  t->to_state = s;
2121  t->from_state = start;
2122  GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2123  ctx->transitions_tail,
2124  t);
2125  }
2126  else
2127  {
2128  for (t = s->transitions_head; NULL != t; t = t->next)
2129  {
2130  /* Do not consider self-loops, because it end's up in too many
2131  * transitions */
2132  if (t->to_state == t->from_state)
2133  continue;
2134 
2135  if (NULL != label)
2136  {
2137  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2138  }
2139  else
2140  new_label = GNUNET_strdup (t->label);
2141 
2143  (depth + 1),
2144  new_label,
2145  start,
2146  t->to_state);
2147  }
2148  }
2149  GNUNET_free (label);
2150 }
2151 
2152 
2161 static void
2163  const unsigned int count,
2164  struct REGEX_INTERNAL_State *s)
2165 {
2166  dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2167 }
2168 
2169 
2177 void
2179  struct REGEX_INTERNAL_Automaton *dfa,
2180  const unsigned int stride_len)
2181 {
2182  struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2183  struct REGEX_INTERNAL_Transition *t;
2184  struct REGEX_INTERNAL_Transition *t_next;
2185 
2186  if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2187  return;
2188 
2189  /* Compute the new transitions of given stride_len */
2191  dfa->start,
2192  NULL,
2193  NULL,
2195  &ctx);
2196 
2197  /* Add all the new transitions to the automaton. */
2198  for (t = ctx.transitions_head; NULL != t; t = t_next)
2199  {
2200  t_next = t->next;
2201  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2202  GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2203  GNUNET_free (t->label);
2204  GNUNET_free (t);
2205  }
2206 
2207  /* Mark this automaton as multistrided */
2208  dfa->is_multistrided = GNUNET_YES;
2209 }
2210 
2211 
2225 void
2227  struct REGEX_INTERNAL_State *start,
2228  struct REGEX_INTERNAL_State *cur,
2229  char *label,
2230  unsigned int max_len,
2231  struct REGEX_INTERNAL_Transition **transitions_head,
2232  struct REGEX_INTERNAL_Transition **transitions_tail)
2233 {
2234  struct REGEX_INTERNAL_Transition *t;
2235  char *new_label;
2236 
2237 
2238  if ((NULL != label) &&
2239  (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2240  cur->accepting) ||
2241  (GNUNET_YES == cur->marked) ) ||
2242  ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2243  label))) ||
2244  ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2245  label)))))
2246  {
2248  t->label = GNUNET_strdup (label);
2249  t->to_state = cur;
2250  t->from_state = start;
2251  GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2252 
2253  if (GNUNET_NO == cur->marked)
2254  {
2256  cur,
2257  cur,
2258  NULL,
2259  max_len,
2260  transitions_head,
2261  transitions_tail);
2262  }
2263  return;
2264  }
2265  else if (cur != start)
2266  cur->contained = GNUNET_YES;
2267 
2268  if ((GNUNET_YES == cur->marked) && (cur != start))
2269  return;
2270 
2271  cur->marked = GNUNET_YES;
2272 
2273 
2274  for (t = cur->transitions_head; NULL != t; t = t->next)
2275  {
2276  if (NULL != label)
2277  GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2278  else
2279  new_label = GNUNET_strdup (t->label);
2280 
2281  if (t->to_state != cur)
2282  {
2284  start,
2285  t->to_state,
2286  new_label,
2287  max_len,
2288  transitions_head,
2289  transitions_tail);
2290  }
2291  GNUNET_free (new_label);
2292  }
2293 }
2294 
2295 
2304 static void
2306  struct REGEX_INTERNAL_Automaton *dfa,
2307  unsigned int max_len)
2308 {
2309  struct REGEX_INTERNAL_State *s;
2310  struct REGEX_INTERNAL_State *s_next;
2311  struct REGEX_INTERNAL_Transition *t;
2312  struct REGEX_INTERNAL_Transition *t_next;
2313  struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2314  struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2315 
2316  if (NULL == dfa)
2317  return;
2318 
2319  /* Count the incoming transitions on each state. */
2320  for (s = dfa->states_head; NULL != s; s = s->next)
2321  {
2322  for (t = s->transitions_head; NULL != t; t = t->next)
2323  {
2324  if (NULL != t->to_state)
2325  t->to_state->incoming_transition_count++;
2326  }
2327  }
2328 
2329  /* Unmark all states. */
2330  for (s = dfa->states_head; NULL != s; s = s->next)
2331  {
2332  s->marked = GNUNET_NO;
2333  s->contained = GNUNET_NO;
2334  }
2335 
2336  /* Add strides and mark states that can be deleted. */
2338  dfa->start,
2339  dfa->start,
2340  NULL,
2341  max_len,
2342  &transitions_head,
2343  &transitions_tail);
2344 
2345  /* Add all the new transitions to the automaton. */
2346  for (t = transitions_head; NULL != t; t = t_next)
2347  {
2348  t_next = t->next;
2349  state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2350  GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2351  GNUNET_free (t->label);
2352  GNUNET_free (t);
2353  }
2354 
2355  /* Remove marked states (including their incoming and outgoing transitions). */
2356  for (s = dfa->states_head; NULL != s; s = s_next)
2357  {
2358  s_next = s->next;
2359  if (GNUNET_YES == s->contained)
2360  automaton_remove_state (dfa, s);
2361  }
2362 }
2363 
2364 
2374 static struct REGEX_INTERNAL_Automaton *
2376  struct REGEX_INTERNAL_State *end)
2377 {
2378  struct REGEX_INTERNAL_Automaton *n;
2379 
2380  n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2381 
2382  n->type = NFA;
2383  n->start = NULL;
2384  n->end = NULL;
2385  n->state_count = 0;
2386 
2387  if ((NULL == start) || (NULL == end))
2388  return n;
2389 
2390  automaton_add_state (n, end);
2392 
2393  n->state_count = 2;
2394 
2395  n->start = start;
2396  n->end = end;
2397 
2398  return n;
2399 }
2400 
2401 
2409 static void
2413 {
2414  struct REGEX_INTERNAL_State *s;
2415 
2416  if ((NULL == n) || (NULL == states_head))
2417  {
2418  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2419  return;
2420  }
2421 
2422  if (NULL == n->states_head)
2423  {
2424  n->states_head = states_head;
2425  n->states_tail = states_tail;
2426  return;
2427  }
2428 
2429  if (NULL != states_head)
2430  {
2431  n->states_tail->next = states_head;
2432  n->states_tail = states_tail;
2433  }
2434 
2435  for (s = states_head; NULL != s; s = s->next)
2436  n->state_count++;
2437 }
2438 
2439 
2448 static struct REGEX_INTERNAL_State *
2450 {
2451  struct REGEX_INTERNAL_State *s;
2452 
2453  s = GNUNET_new (struct REGEX_INTERNAL_State);
2454  s->id = ctx->state_id++;
2455  s->accepting = accepting;
2456  s->marked = GNUNET_NO;
2457  s->contained = 0;
2458  s->index = -1;
2459  s->lowlink = -1;
2460  s->scc_id = 0;
2461  s->name = NULL;
2462  GNUNET_asprintf (&s->name, "s%i", s->id);
2463 
2464  return s;
2465 }
2466 
2467 
2477 static void
2479  struct REGEX_INTERNAL_Automaton *nfa,
2480  struct REGEX_INTERNAL_StateSet *states,
2481  const char *label)
2482 {
2483  struct REGEX_INTERNAL_State *s;
2484  unsigned int i;
2485  struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2486  struct REGEX_INTERNAL_State *clsstate;
2487  struct REGEX_INTERNAL_State *currentstate;
2488  struct REGEX_INTERNAL_Transition *ctran;
2489 
2490  memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2491  if (NULL == states)
2492  return;
2493 
2494  for (i = 0; i < states->off; i++)
2495  {
2496  s = states->states[i];
2497 
2498  /* Add start state to closure only for epsilon closure */
2499  if (NULL == label)
2500  state_set_append (ret, s);
2501 
2502  /* initialize work stack */
2503  cls_stack.head = NULL;
2504  cls_stack.tail = NULL;
2505  GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2506  cls_stack.len = 1;
2507 
2508  while (NULL != (currentstate = cls_stack.tail))
2509  {
2511  cls_stack.head,
2512  cls_stack.tail,
2513  currentstate);
2514  cls_stack.len--;
2515  for (ctran = currentstate->transitions_head; NULL != ctran;
2516  ctran = ctran->next)
2517  {
2518  if (NULL == (clsstate = ctran->to_state))
2519  continue;
2520  if (0 != clsstate->contained)
2521  continue;
2522  if (0 != nullstrcmp (label, ctran->label))
2523  continue;
2524  state_set_append (ret, clsstate);
2526  cls_stack.head,
2527  cls_stack.tail,
2528  clsstate);
2529  cls_stack.len++;
2530  clsstate->contained = 1;
2531  }
2532  }
2533  }
2534  for (i = 0; i < ret->off; i++)
2535  ret->states[i]->contained = 0;
2536 
2537  if (ret->off > 1)
2538  qsort (ret->states,
2539  ret->off,
2540  sizeof(struct REGEX_INTERNAL_State *),
2541  &state_compare);
2542 }
2543 
2544 
2550 static void
2552 {
2553  struct REGEX_INTERNAL_Automaton *a;
2554  struct REGEX_INTERNAL_Automaton *b;
2555  struct REGEX_INTERNAL_Automaton *new_nfa;
2556 
2557  b = ctx->stack_tail;
2558  GNUNET_assert (NULL != b);
2559  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2560  a = ctx->stack_tail;
2561  GNUNET_assert (NULL != a);
2562  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2563 
2564  state_add_transition (ctx, a->end, NULL, b->start);
2565  a->end->accepting = 0;
2566  b->end->accepting = 1;
2567 
2568  new_nfa = nfa_fragment_create (NULL, NULL);
2569  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2570  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2571  new_nfa->start = a->start;
2572  new_nfa->end = b->end;
2573  new_nfa->state_count += a->state_count + b->state_count;
2576 
2577  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2578 }
2579 
2580 
2586 static void
2588 {
2589  struct REGEX_INTERNAL_Automaton *a;
2590  struct REGEX_INTERNAL_Automaton *new_nfa;
2591  struct REGEX_INTERNAL_State *start;
2592  struct REGEX_INTERNAL_State *end;
2593 
2594  a = ctx->stack_tail;
2595 
2596  if (NULL == a)
2597  {
2598  GNUNET_log (
2600  "nfa_add_star_op failed, because there was no element on the stack");
2601  return;
2602  }
2603 
2604  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2605 
2606  start = nfa_state_create (ctx, 0);
2607  end = nfa_state_create (ctx, 1);
2608 
2609  state_add_transition (ctx, start, NULL, a->start);
2610  state_add_transition (ctx, start, NULL, end);
2611  state_add_transition (ctx, a->end, NULL, a->start);
2612  state_add_transition (ctx, a->end, NULL, end);
2613 
2614  a->end->accepting = 0;
2615  end->accepting = 1;
2616 
2617  new_nfa = nfa_fragment_create (start, end);
2618  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2620 
2621  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2622 }
2623 
2624 
2630 static void
2632 {
2633  struct REGEX_INTERNAL_Automaton *a;
2634 
2635  a = ctx->stack_tail;
2636 
2637  if (NULL == a)
2638  {
2639  GNUNET_log (
2641  "nfa_add_plus_op failed, because there was no element on the stack");
2642  return;
2643  }
2644 
2645  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2646 
2647  state_add_transition (ctx, a->end, NULL, a->start);
2648 
2649  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2650 }
2651 
2652 
2658 static void
2660 {
2661  struct REGEX_INTERNAL_Automaton *a;
2662  struct REGEX_INTERNAL_Automaton *new_nfa;
2663  struct REGEX_INTERNAL_State *start;
2664  struct REGEX_INTERNAL_State *end;
2665 
2666  a = ctx->stack_tail;
2667  if (NULL == a)
2668  {
2669  GNUNET_log (
2671  "nfa_add_question_op failed, because there was no element on the stack");
2672  return;
2673  }
2674 
2675  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2676 
2677  start = nfa_state_create (ctx, 0);
2678  end = nfa_state_create (ctx, 1);
2679 
2680  state_add_transition (ctx, start, NULL, a->start);
2681  state_add_transition (ctx, start, NULL, end);
2682  state_add_transition (ctx, a->end, NULL, end);
2683 
2684  a->end->accepting = 0;
2685 
2686  new_nfa = nfa_fragment_create (start, end);
2687  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2688  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2690 }
2691 
2692 
2699 static void
2701 {
2702  struct REGEX_INTERNAL_Automaton *a;
2703  struct REGEX_INTERNAL_Automaton *b;
2704  struct REGEX_INTERNAL_Automaton *new_nfa;
2705  struct REGEX_INTERNAL_State *start;
2706  struct REGEX_INTERNAL_State *end;
2707 
2708  b = ctx->stack_tail;
2709  GNUNET_assert (NULL != b);
2710  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2711  a = ctx->stack_tail;
2712  GNUNET_assert (NULL != a);
2713  GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2714 
2715  start = nfa_state_create (ctx, 0);
2716  end = nfa_state_create (ctx, 1);
2717  state_add_transition (ctx, start, NULL, a->start);
2718  state_add_transition (ctx, start, NULL, b->start);
2719 
2720  state_add_transition (ctx, a->end, NULL, end);
2721  state_add_transition (ctx, b->end, NULL, end);
2722 
2723  a->end->accepting = 0;
2724  b->end->accepting = 0;
2725  end->accepting = 1;
2726 
2727  new_nfa = nfa_fragment_create (start, end);
2728  nfa_add_states (new_nfa, a->states_head, a->states_tail);
2729  nfa_add_states (new_nfa, b->states_head, b->states_tail);
2732 
2733  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2734 }
2735 
2736 
2743 static void
2744 nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2745 {
2746  struct REGEX_INTERNAL_Automaton *n;
2747  struct REGEX_INTERNAL_State *start;
2748  struct REGEX_INTERNAL_State *end;
2749 
2750  GNUNET_assert (NULL != ctx);
2751 
2752  start = nfa_state_create (ctx, 0);
2753  end = nfa_state_create (ctx, 1);
2754  state_add_transition (ctx, start, label, end);
2755  n = nfa_fragment_create (start, end);
2756  GNUNET_assert (NULL != n);
2757  GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2758 }
2759 
2760 
2766 static void
2768 {
2769  if (NULL == ctx)
2770  {
2771  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2772  return;
2773  }
2774  ctx->state_id = 0;
2775  ctx->transition_id = 0;
2776  ctx->stack_head = NULL;
2777  ctx->stack_tail = NULL;
2778 }
2779 
2780 
2789 struct REGEX_INTERNAL_Automaton *
2790 REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2791 {
2792  struct REGEX_INTERNAL_Context ctx;
2793  struct REGEX_INTERNAL_Automaton *nfa;
2794  const char *regexp;
2795  char curlabel[2];
2796  char *error_msg;
2797  unsigned int count;
2798  unsigned int altcount;
2799  unsigned int atomcount;
2800  unsigned int poff;
2801  unsigned int psize;
2802 
2803  struct
2804  {
2805  int altcount;
2806  int atomcount;
2807  } *p;
2808 
2809  if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2810  {
2812  "Could not parse regex. Empty regex string provided.\n");
2813 
2814  return NULL;
2815  }
2817 
2818  regexp = regex;
2819  curlabel[1] = '\0';
2820  p = NULL;
2821  error_msg = NULL;
2822  altcount = 0;
2823  atomcount = 0;
2824  poff = 0;
2825  psize = 0;
2826 
2827  for (count = 0; count < len && *regexp; count++, regexp++)
2828  {
2829  switch (*regexp)
2830  {
2831  case '(':
2832  if (atomcount > 1)
2833  {
2834  --atomcount;
2836  }
2837  if (poff == psize)
2838  GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2839  p[poff].altcount = altcount;
2840  p[poff].atomcount = atomcount;
2841  poff++;
2842  altcount = 0;
2843  atomcount = 0;
2844  break;
2845 
2846  case '|':
2847  if (0 == atomcount)
2848  {
2849  error_msg = "Cannot append '|' to nothing";
2850  goto error;
2851  }
2852  while (--atomcount > 0)
2854  altcount++;
2855  break;
2856 
2857  case ')':
2858  if (0 == poff)
2859  {
2860  error_msg = "Missing opening '('";
2861  goto error;
2862  }
2863  if (0 == atomcount)
2864  {
2865  /* Ignore this: "()" */
2866  poff--;
2867  altcount = p[poff].altcount;
2868  atomcount = p[poff].atomcount;
2869  break;
2870  }
2871  while (--atomcount > 0)
2873  for (; altcount > 0; altcount--)
2875  poff--;
2876  altcount = p[poff].altcount;
2877  atomcount = p[poff].atomcount;
2878  atomcount++;
2879  break;
2880 
2881  case '*':
2882  if (atomcount == 0)
2883  {
2884  error_msg = "Cannot append '*' to nothing";
2885  goto error;
2886  }
2887  nfa_add_star_op (&ctx);
2888  break;
2889 
2890  case '+':
2891  if (atomcount == 0)
2892  {
2893  error_msg = "Cannot append '+' to nothing";
2894  goto error;
2895  }
2896  nfa_add_plus_op (&ctx);
2897  break;
2898 
2899  case '?':
2900  if (atomcount == 0)
2901  {
2902  error_msg = "Cannot append '?' to nothing";
2903  goto error;
2904  }
2906  break;
2907 
2908  default:
2909  if (atomcount > 1)
2910  {
2911  --atomcount;
2913  }
2914  curlabel[0] = *regexp;
2915  nfa_add_label (&ctx, curlabel);
2916  atomcount++;
2917  break;
2918  }
2919  }
2920  if (0 != poff)
2921  {
2922  error_msg = "Unbalanced parenthesis";
2923  goto error;
2924  }
2925  while (--atomcount > 0)
2927  for (; altcount > 0; altcount--)
2929 
2930  GNUNET_array_grow (p, psize, 0);
2931 
2932  nfa = ctx.stack_tail;
2933  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2934 
2935  if (NULL != ctx.stack_head)
2936  {
2937  error_msg = "Creating the NFA failed. NFA stack was not empty!";
2938  goto error;
2939  }
2940 
2941  /* Remember the regex that was used to generate this NFA */
2942  nfa->regex = GNUNET_strdup (regex);
2943 
2944  /* create depth-first numbering of the states for pretty printing */
2946  NULL,
2947  NULL,
2948  NULL,
2949  &number_states,
2950  NULL);
2951 
2952  /* No multistriding added so far */
2953  nfa->is_multistrided = GNUNET_NO;
2954 
2955  return nfa;
2956 
2957 error:
2958  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2959  if (NULL != error_msg)
2960  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2961 
2962  GNUNET_free (p);
2963 
2964  while (NULL != (nfa = ctx.stack_head))
2965  {
2966  GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2968  }
2969 
2970  return NULL;
2971 }
2972 
2973 
2983 static void
2985  struct REGEX_INTERNAL_Automaton *nfa,
2986  struct REGEX_INTERNAL_Automaton *dfa,
2987  struct REGEX_INTERNAL_State *dfa_state)
2988 {
2989  struct REGEX_INTERNAL_Transition *ctran;
2990  struct REGEX_INTERNAL_State *new_dfa_state;
2991  struct REGEX_INTERNAL_State *state_contains;
2992  struct REGEX_INTERNAL_State *state_iter;
2993  struct REGEX_INTERNAL_StateSet tmp;
2994  struct REGEX_INTERNAL_StateSet nfa_set;
2995 
2996  for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2997  {
2998  if ((NULL == ctran->label) || (NULL != ctran->to_state) )
2999  continue;
3000 
3001  nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3002  nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3003  state_set_clear (&tmp);
3004 
3005  state_contains = NULL;
3006  for (state_iter = dfa->states_head; NULL != state_iter;
3007  state_iter = state_iter->next)
3008  {
3009  if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3010  {
3011  state_contains = state_iter;
3012  break;
3013  }
3014  }
3015  if (NULL == state_contains)
3016  {
3017  new_dfa_state = dfa_state_create (ctx, &nfa_set);
3018  automaton_add_state (dfa, new_dfa_state);
3019  ctran->to_state = new_dfa_state;
3020  construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3021  }
3022  else
3023  {
3024  ctran->to_state = state_contains;
3025  state_set_clear (&nfa_set);
3026  }
3027  }
3028 }
3029 
3030 
3031 struct REGEX_INTERNAL_Automaton *
3033  const size_t len,
3034  unsigned int max_path_len)
3035 {
3036  struct REGEX_INTERNAL_Context ctx;
3037  struct REGEX_INTERNAL_Automaton *dfa;
3038  struct REGEX_INTERNAL_Automaton *nfa;
3039  struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3040  struct REGEX_INTERNAL_StateSet singleton_set;
3041 
3043 
3044  /* Create NFA */
3045  nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3046 
3047  if (NULL == nfa)
3048  {
3050  "Could not create DFA, because NFA creation failed\n");
3051  return NULL;
3052  }
3053 
3054  dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3055  dfa->type = DFA;
3056  dfa->regex = GNUNET_strdup (regex);
3057 
3058  /* Create DFA start state from epsilon closure */
3059  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3060  state_set_append (&singleton_set, nfa->start);
3061  nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3062  state_set_clear (&singleton_set);
3063  dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3064  automaton_add_state (dfa, dfa->start);
3065 
3066  construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3068 
3069  /* Minimize DFA */
3070  if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3071  {
3073  return NULL;
3074  }
3075 
3076  /* Create proofs and hashes for all states */
3077  if (GNUNET_OK != automaton_create_proofs (dfa))
3078  {
3080  return NULL;
3081  }
3082 
3083  /* Compress linear DFA paths */
3084  if (1 != max_path_len)
3085  dfa_compress_paths (&ctx, dfa, max_path_len);
3086 
3087  return dfa;
3088 }
3089 
3090 
3091 void
3093 {
3094  struct REGEX_INTERNAL_State *s;
3095  struct REGEX_INTERNAL_State *next_state;
3096 
3097  if (NULL == a)
3098  return;
3099 
3100  GNUNET_free (a->regex);
3102 
3103  for (s = a->states_head; NULL != s; s = next_state)
3104  {
3105  next_state = s->next;
3108  }
3109 
3110  GNUNET_free (a);
3111 }
3112 
3113 
3122 static int
3123 evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3124 {
3125  const char *strp;
3126  struct REGEX_INTERNAL_State *s;
3127  unsigned int step_len;
3128 
3129  if (DFA != a->type)
3130  {
3132  "Tried to evaluate DFA, but NFA automaton given");
3133  return -1;
3134  }
3135 
3136  s = a->start;
3137 
3138  /* If the string is empty but the starting state is accepting, we accept. */
3139  if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3140  return 0;
3141 
3142  for (strp = string; NULL != strp && *strp; strp += step_len)
3143  {
3144  step_len = dfa_move (&s, strp);
3145 
3146  if (NULL == s)
3147  break;
3148  }
3149 
3150  if ((NULL != s) && s->accepting)
3151  return 0;
3152 
3153  return 1;
3154 }
3155 
3156 
3164 static int
3165 evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3166 {
3167  const char *strp;
3168  char str[2];
3169  struct REGEX_INTERNAL_State *s;
3170  struct REGEX_INTERNAL_StateSet sset;
3171  struct REGEX_INTERNAL_StateSet new_sset;
3172  struct REGEX_INTERNAL_StateSet singleton_set;
3173  unsigned int i;
3174  int result;
3175 
3176  if (NFA != a->type)
3177  {
3179  "Tried to evaluate NFA, but DFA automaton given");
3180  return -1;
3181  }
3182 
3183  /* If the string is empty but the starting state is accepting, we accept. */
3184  if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3185  return 0;
3186 
3187  result = 1;
3188  memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3189  state_set_append (&singleton_set, a->start);
3190  nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3191  state_set_clear (&singleton_set);
3192 
3193  str[1] = '\0';
3194  for (strp = string; NULL != strp && *strp; strp++)
3195  {
3196  str[0] = *strp;
3197  nfa_closure_set_create (&new_sset, a, &sset, str);
3198  state_set_clear (&sset);
3199  nfa_closure_set_create (&sset, a, &new_sset, 0);
3200  state_set_clear (&new_sset);
3201  }
3202 
3203  for (i = 0; i < sset.off; i++)
3204  {
3205  s = sset.states[i];
3206  if ((NULL != s) && (s->accepting))
3207  {
3208  result = 0;
3209  break;
3210  }
3211  }
3212 
3213  state_set_clear (&sset);
3214  return result;
3215 }
3216 
3217 
3218 int
3219 REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3220 {
3221  int result;
3222 
3223  switch (a->type)
3224  {
3225  case DFA:
3226  result = evaluate_dfa (a, string);
3227  break;
3228 
3229  case NFA:
3230  result = evaluate_nfa (a, string);
3231  break;
3232 
3233  default:
3235  "Evaluating regex failed, automaton has no type!\n");
3237  break;
3238  }
3239 
3240  return result;
3241 }
3242 
3243 
3244 const char *
3246 {
3247  if (NULL == a)
3248  return NULL;
3249 
3250  return a->canonical_regex;
3251 }
3252 
3253 
3261 unsigned int
3263 {
3264  unsigned int t_count;
3265  struct REGEX_INTERNAL_State *s;
3266 
3267  if (NULL == a)
3268  return 0;
3269 
3270  t_count = 0;
3271  for (s = a->states_head; NULL != s; s = s->next)
3272  t_count += s->transition_count;
3273 
3274  return t_count;
3275 }
3276 
3277 
3288 size_t
3289 REGEX_INTERNAL_get_first_key (const char *input_string,
3290  size_t string_len,
3291  struct GNUNET_HashCode *key)
3292 {
3293  size_t size;
3294 
3295  size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3297  if (NULL == input_string)
3298  {
3299  GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3300  return 0;
3301  }
3302  GNUNET_CRYPTO_hash (input_string, size, key);
3303 
3304  return size;
3305 }
3306 
3307 
3318 static void
3319 iterate_initial_edge (unsigned int min_len,
3320  unsigned int max_len,
3321  char *consumed_string,
3322  struct REGEX_INTERNAL_State *state,
3324  void *iterator_cls)
3325 {
3326  char *temp;
3327  struct REGEX_INTERNAL_Transition *t;
3328  unsigned int num_edges = state->transition_count;
3329  struct REGEX_BLOCK_Edge edges[num_edges];
3330  struct REGEX_BLOCK_Edge edge[1];
3331  struct GNUNET_HashCode hash;
3332  struct GNUNET_HashCode hash_new;
3333  unsigned int cur_len;
3334 
3335  if (NULL != consumed_string)
3336  cur_len = strlen (consumed_string);
3337  else
3338  cur_len = 0;
3339 
3340  if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3341  (cur_len > 0) && (NULL != consumed_string))
3342  {
3343  if (cur_len <= max_len)
3344  {
3345  if ((NULL != state->proof) &&
3346  (0 != strcmp (consumed_string, state->proof)))
3347  {
3348  (void) state_get_edges (state, edges);
3349  GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3351  "Start state for string `%s' is %s\n",
3352  consumed_string,
3353  GNUNET_h2s (&hash));
3354  iterator (iterator_cls,
3355  &hash,
3356  consumed_string,
3357  state->accepting,
3358  num_edges,
3359  edges);
3360  }
3361 
3362  if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3363  (state->transition_count < 1) && (cur_len < max_len))
3364  {
3365  /* Special case for regex consisting of just a string that is shorter than
3366  * max_len */
3367  edge[0].label = &consumed_string[cur_len - 1];
3368  edge[0].destination = state->hash;
3369  temp = GNUNET_strdup (consumed_string);
3370  temp[cur_len - 1] = '\0';
3371  GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3373  "Start state for short string `%s' is %s\n",
3374  temp,
3375  GNUNET_h2s (&hash_new));
3376  iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3377  GNUNET_free (temp);
3378  }
3379  }
3380  else /* cur_len > max_len */
3381  {
3382  /* Case where the concatenated labels are longer than max_len, then split. */
3383  edge[0].label = &consumed_string[max_len];
3384  edge[0].destination = state->hash;
3385  temp = GNUNET_strdup (consumed_string);
3386  temp[max_len] = '\0';
3387  GNUNET_CRYPTO_hash (temp, max_len, &hash);
3389  "Start state at split edge `%s'-`%s` is %s\n",
3390  temp,
3391  edge[0].label,
3392  GNUNET_h2s (&hash_new));
3393  iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3394  GNUNET_free (temp);
3395  }
3396  }
3397 
3398  if (cur_len < max_len)
3399  {
3400  for (t = state->transitions_head; NULL != t; t = t->next)
3401  {
3402  if (NULL != strchr (t->label, (int) '.'))
3403  {
3404  /* Wildcards not allowed during starting states */
3405  GNUNET_break (0);
3406  continue;
3407  }
3408  if (NULL != consumed_string)
3409  GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3410  else
3411  GNUNET_asprintf (&temp, "%s", t->label);
3412  iterate_initial_edge (min_len,
3413  max_len,
3414  temp,
3415  t->to_state,
3416  iterator,
3417  iterator_cls);
3418  GNUNET_free (temp);
3419  }
3420  }
3421 }
3422 
3423 
3432 void
3435  void *iterator_cls)
3436 {
3437  struct REGEX_INTERNAL_State *s;
3438 
3439  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3442  NULL,
3443  a->start,
3444  iterator,
3445  iterator_cls);
3446  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3447  for (s = a->states_head; NULL != s; s = s->next)
3448  {
3449  struct REGEX_BLOCK_Edge edges[s->transition_count];
3450  unsigned int num_edges;
3451 
3452  num_edges = state_get_edges (s, edges);
3453  if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3454  {
3456  "Creating DFA edges at `%s' under key %s\n",
3457  s->proof,
3458  GNUNET_h2s (&s->hash));
3459  iterator (iterator_cls,
3460  &s->hash,
3461  s->proof,
3462  s->accepting,
3463  num_edges,
3464  edges);
3465  }
3466  s->marked = GNUNET_NO;
3467  }
3468 }
3469 
3470 
3478 {
3480  char *proof;
3484 };
3485 
3486 
3491 {
3494 };
3495 
3496 
3508 static void
3509 store_all_states (void *cls,
3510  const struct GNUNET_HashCode *key,
3511  const char *proof,
3512  int accepting,
3513  unsigned int num_edges,
3514  const struct REGEX_BLOCK_Edge *edges)
3515 {
3516  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3517  struct temporal_state_store *tmp;
3518  size_t edges_size;
3519 
3520  tmp = GNUNET_new (struct temporal_state_store);
3521  tmp->reachable = GNUNET_NO;
3522  tmp->proof = GNUNET_strdup (proof);
3523  tmp->accepting = accepting;
3524  tmp->num_edges = num_edges;
3525  edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3526  tmp->edges = GNUNET_malloc (edges_size);
3527  GNUNET_memcpy (tmp->edges, edges, edges_size);
3530  hm,
3531  key,
3532  tmp,
3534 }
3535 
3536 
3545 static void
3547  struct GNUNET_CONTAINER_MultiHashMap *hm)
3548 {
3549  struct temporal_state_store *child;
3550  unsigned int i;
3551 
3552  if (GNUNET_YES == state->reachable)
3553  /* visited */
3554  return;
3555 
3556  state->reachable = GNUNET_YES;
3557  for (i = 0; i < state->num_edges; i++)
3558  {
3559  child =
3560  GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3561  if (NULL == child)
3562  {
3563  GNUNET_break (0);
3564  continue;
3565  }
3566  mark_as_reachable (child, hm);
3567  }
3568 }
3569 
3570 
3580 static int
3582  const struct GNUNET_HashCode *key,
3583  void *value)
3584 {
3585  struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3586  struct temporal_state_store *state = value;
3587 
3588  if (GNUNET_YES == state->reachable)
3589  /* already visited and marked */
3590  return GNUNET_YES;
3591 
3592  if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3593  (GNUNET_NO == state->accepting) )
3594  /* not directly reachable */
3595  return GNUNET_YES;
3596 
3597  mark_as_reachable (state, hm);
3598  return GNUNET_YES;
3599 }
3600 
3601 
3612 static int
3613 iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3614 {
3615  struct client_iterator *ci = cls;
3616  struct temporal_state_store *state = value;
3617 
3618  if (GNUNET_YES == state->reachable)
3619  {
3620  ci->iterator (ci->iterator_cls,
3621  key,
3622  state->proof,
3623  state->accepting,
3624  state->num_edges,
3625  state->edges);
3626  }
3627  GNUNET_free (state->edges);
3628  GNUNET_free (state->proof);
3629  GNUNET_free (state);
3630  return GNUNET_YES;
3631 }
3632 
3633 
3634 void
3637  void *iterator_cls)
3638 {
3639  struct GNUNET_CONTAINER_MultiHashMap *hm;
3640  struct client_iterator ci;
3641 
3643  ci.iterator = iterator;
3645 
3649 
3651 }
3652 
3653 
3654 /* end of regex_internal.c */
static int ret
Return value of the commandline.
Definition: gnunet-abd.c:81
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:144
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
struct GNUNET_HashCode key
The key used in the DHT.
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
static char * value
Value of the record to add/remove.
enum State state
current state of profiling
static int result
Global testing status.
static uint64_t proof
Definition: gnunet-scrypt.c:49
static pid_t child
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static struct GNUNET_SCHEDULER_Task * t
Main task.
API to access regex service to advertise capabilities via regex and discover respective peers using m...
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
static int iterator(void *cls, const struct GNUNET_PeerIdentity *key, void *value)
Iterator over hash map entries.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
#define GNUNET_log(kind,...)
#define GNUNET_MAX(a, b)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
@ GNUNET_SYSERR
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
#define max(x, y)
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static unsigned int size
Size of the "table".
Definition: peer.c:68
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given 'dfa'.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given 'string' against the given compiled regex.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
static int state_compare(const void *a, const void *b)
Compare two states.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUN...
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given 'regex' of length 'len'.
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
common internal definitions for regex library.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that gets passed to automaton traversal and is called before each next traversal from state ...
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
@ NFA
@ DFA
library to parse regular expressions into dfa
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
Internal representation of the hash map.
A 512-bit hashcode.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
Edge representation.
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.
Automaton representation.
struct REGEX_INTERNAL_State * start
First state of the automaton.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
Set of states using MDLL API.
struct REGEX_INTERNAL_State * head
MDLL of states.
struct REGEX_INTERNAL_State * tail
MDLL of states.
unsigned int len
Length of the MDLL.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
char * proof
Proof for this state.
unsigned int transition_count
Number of transitions from this state to other states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
unsigned int dfs_id
Linear state ID acquired by depth-first-search.
char * name
Human readable name of the state.
int marked
Marking of the state.
unsigned int id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int contained
Marking the state as contained.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
int index
Used for SCC detection.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
Context for adding strided transitions to a DFA.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
const unsigned int stride
Length of the strides.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.
String container for faster string operations.
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges