GNUnet 0.22.2
regex_internal.c
Go to the documentation of this file.
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
26#include "platform.h"
27#include "gnunet_util_lib.h"
29#include "regex_internal_lib.h"
30#include "regex_internal.h"
31
32
37#define REGEX_DEBUG_DFA GNUNET_NO
38
43{
48
53
57 unsigned int len;
58};
59
60
67static void
70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
75
76
85static int
86nullstrcmp (const char *str1, const char *str2)
87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}
95
96
106static void
108 struct REGEX_INTERNAL_State *from_state,
109 const char *label,
110 struct REGEX_INTERNAL_State *to_state)
111{
113 struct REGEX_INTERNAL_Transition *oth;
114
115 if (NULL == from_state)
116 {
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118 return;
119 }
120
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 {
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
126 return;
127 }
128
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 {
132 if (0 < nullstrcmp (oth->label, label))
133 break;
134 }
135
137 if (NULL != ctx)
138 t->id = ctx->transition_id++;
139 if (NULL != label)
140 t->label = GNUNET_strdup (label);
141 else
142 t->label = NULL;
143 t->to_state = to_state;
144 t->from_state = from_state;
145
146 /* Add outgoing transition to 'from_state' */
150 oth,
151 t);
152}
153
154
161static void
163 struct REGEX_INTERNAL_Transition *transition)
164{
165 if ((NULL == state) || (NULL == transition))
166 return;
167
168 if (transition->from_state != state)
169 return;
170
171 GNUNET_free (transition->label);
172
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
176 transition);
177
178 GNUNET_free (transition);
179}
180
181
192static int
193state_compare (const void *a, const void *b)
194{
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197
198 return (*s1)->id - (*s2)->id;
199}
200
201
211static unsigned int
213{
215 unsigned int count;
216
217 if (NULL == s)
218 return 0;
219
220 count = 0;
221
222 for (t = s->transitions_head; NULL != t; t = t->next)
223 {
224 if (NULL != t->to_state)
225 {
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
228 count++;
229 }
230 }
231 return count;
232}
233
234
243static int
245 struct REGEX_INTERNAL_StateSet *sset2)
246{
247 int result;
248 unsigned int i;
249
250 if ((NULL == sset1) || (NULL == sset2))
251 return 1;
252
253 result = sset1->off - sset2->off;
254 if (result < 0)
255 return -1;
256 if (result > 0)
257 return 1;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260 break;
261 return result;
262}
263
264
270static void
272{
273 GNUNET_array_grow (set->states, set->size, 0);
274 set->off = 0;
275}
276
277
284static void
286{
287 if (NULL == a)
288 return;
289
290 a->start = NULL;
291 a->end = NULL;
292 a->states_head = NULL;
293 a->states_tail = NULL;
294 a->state_count = 0;
295 GNUNET_free (a);
296}
297
298
304static void
306{
308 struct REGEX_INTERNAL_Transition *next_t;
309
310 if (NULL == s)
311 return;
312
313 GNUNET_free (s->name);
314 GNUNET_free (s->proof);
316 for (t = s->transitions_head; NULL != t; t = next_t)
317 {
318 next_t = t->next;
320 }
321
322 GNUNET_free (s);
323}
324
325
334static void
336 struct REGEX_INTERNAL_State *s)
337{
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
341
342 if ((NULL == a) || (NULL == s))
343 return;
344
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347 {
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
350 {
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
354 }
355 }
356
357 /* remove state */
359 a->state_count--;
360
362}
363
364
374static void
376 struct REGEX_INTERNAL_Automaton *a,
377 struct REGEX_INTERNAL_State *s1,
378 struct REGEX_INTERNAL_State *s2)
379{
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
383 struct REGEX_INTERNAL_Transition *t_next;
384 int is_dup;
385
386 if (s1 == s2)
387 return;
388
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392 {
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394 {
395 t_next = t_check->next;
396
397 if (s2 == t_check->to_state)
398 {
399 is_dup = GNUNET_NO;
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401 {
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403 is_dup = GNUNET_YES;
404 }
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
407 else
408 state_remove_transition (t_check->from_state, t_check);
409 }
410 }
411 }
412
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415 {
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418 }
419
420 /* 3. Rename s1 to {s1,s2} */
421#if REGEX_DEBUG_DFA
422 char *new_name;
423
424 new_name = s1->name;
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
427#endif
428
429 /* remove state */
431 a->state_count--;
433}
434
435
443static void
445 struct REGEX_INTERNAL_State *s)
446{
448 a->state_count++;
449}
450
451
466static void
468 int *marks,
469 unsigned int *count,
471 void *check_cls,
473 void *action_cls)
474{
476
477 if (GNUNET_YES == marks[s->traversal_id])
478 return;
479
480 marks[s->traversal_id] = GNUNET_YES;
481
482 if (NULL != action)
483 action (action_cls, *count, s);
484
485 (*count)++;
486
487 for (t = s->transitions_head; NULL != t; t = t->next)
488 {
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491 {
492 automaton_state_traverse (t->to_state,
493 marks,
494 count,
495 check,
496 check_cls,
497 action,
498 action_cls);
499 }
500 }
501}
502
503
504void
508 void *check_cls,
510 void *action_cls)
511{
512 if ((NULL == a) || (0 == a->state_count))
513 return;
514
515 {
516 unsigned int count;
517 int marks[a->state_count];
518 struct REGEX_INTERNAL_State *s;
519
520
521 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
522 s = s->next, count++)
523 {
524 s->traversal_id = count;
525 marks[s->traversal_id] = GNUNET_NO;
526 }
527
528 count = 0;
529
530 if (NULL == start)
531 s = a->start;
532 else
533 s = start;
534
536 marks,
537 &count,
538 check,
539 check_cls,
540 action,
541 action_cls);
542 }
543}
544
545
550{
555 char *sbuf;
556
560 char *abuf;
561
565 size_t slen;
566
570 unsigned int blen;
571
575 int16_t null_flag;
576
586 int16_t synced;
587};
588
589
598static int
599sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
600{
601 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
602 return 0;
603 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
604 return -1;
605 if (s1->slen != s2->slen)
606 return -1;
607 if (0 == s1->slen)
608 return 0;
609 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
610}
611
612
621static int
622sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
623{
624 if (s1->slen != s2->slen)
625 return -1;
626 if (0 == s1->slen)
627 return 0;
628 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
629}
630
631
639static void
640sb_realloc (struct StringBuffer *ret, size_t nlen)
641{
642 char *old;
643
644 GNUNET_assert (nlen >= ret->slen);
645 old = ret->abuf;
646 ret->abuf = GNUNET_malloc (nlen);
647 ret->blen = nlen;
648 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
649 ret->sbuf = ret->abuf;
650 GNUNET_free (old);
651}
652
653
660static void
661sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
662{
663 if (GNUNET_YES == ret->null_flag)
664 ret->slen = 0;
665 ret->null_flag = GNUNET_NO;
666 if (ret->blen < sarg->slen + ret->slen)
667 sb_realloc (ret, ret->blen + sarg->slen + 128);
668 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
669 ret->slen += sarg->slen;
670}
671
672
679static void
680sb_append_cstr (struct StringBuffer *ret, const char *cstr)
681{
682 size_t cstr_len = strlen (cstr);
683
684 if (GNUNET_YES == ret->null_flag)
685 ret->slen = 0;
686 ret->null_flag = GNUNET_NO;
687 if (ret->blen < cstr_len + ret->slen)
688 sb_realloc (ret, ret->blen + cstr_len + 128);
689 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
690 ret->slen += cstr_len;
691}
692
693
704static void
705sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
706{
707 char *temp;
708
709 if (GNUNET_YES == ret->null_flag)
710 ret->slen = 0;
711 ret->null_flag = GNUNET_NO;
712 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
713 GNUNET_snprintf (temp,
714 ret->slen + extra_chars + 1,
715 format,
716 (int) ret->slen,
717 ret->sbuf);
718 GNUNET_free (ret->abuf);
719 ret->abuf = temp;
720 ret->sbuf = temp;
721 ret->blen = ret->slen + extra_chars + 1;
722 ret->slen = ret->slen + extra_chars;
723}
724
725
735static void
737 const char *format,
738 size_t extra_chars,
739 const struct StringBuffer *sarg)
740{
741 if (ret->blen < sarg->slen + extra_chars + 1)
742 sb_realloc (ret, sarg->slen + extra_chars + 1);
743 ret->null_flag = GNUNET_NO;
744 ret->sbuf = ret->abuf;
745 ret->slen = sarg->slen + extra_chars;
746 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
747}
748
749
759static void
761 const char *format,
762 size_t extra_chars,
763 const struct StringBuffer *sarg1,
764 const struct StringBuffer *sarg2)
765{
766 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
767 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
768 ret->null_flag = GNUNET_NO;
769 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
770 ret->sbuf = ret->abuf;
771 GNUNET_snprintf (ret->sbuf,
772 ret->blen,
773 format,
774 (int) sarg1->slen,
775 sarg1->sbuf,
776 (int) sarg2->slen,
777 sarg2->sbuf);
778}
779
780
792static void
794 const char *format,
795 size_t extra_chars,
796 const struct StringBuffer *sarg1,
797 const struct StringBuffer *sarg2,
798 const struct StringBuffer *sarg3)
799{
800 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
801 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
802 ret->null_flag = GNUNET_NO;
803 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
804 ret->sbuf = ret->abuf;
805 GNUNET_snprintf (ret->sbuf,
806 ret->blen,
807 format,
808 (int) sarg1->slen,
809 sarg1->sbuf,
810 (int) sarg2->slen,
811 sarg2->sbuf,
812 (int) sarg3->slen,
813 sarg3->sbuf);
814}
815
816
823static void
825{
826 GNUNET_array_grow (sb->abuf, sb->blen, 0);
827 sb->slen = 0;
828 sb->sbuf = NULL;
829 sb->null_flag = GNUNET_YES;
830}
831
832
839static void
840sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
841
842{
843 out->null_flag = in->null_flag;
844 if (GNUNET_YES == out->null_flag)
845 return;
846 if (out->blen < in->slen)
847 {
848 GNUNET_array_grow (out->abuf, out->blen, in->slen);
849 }
850 out->sbuf = out->abuf;
851 out->slen = in->slen;
852 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
853}
854
855
862static void
863sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
864{
865 if (NULL == cstr)
866 {
867 out->null_flag = GNUNET_YES;
868 return;
869 }
870 out->null_flag = GNUNET_NO;
871 out->slen = strlen (cstr);
872 if (out->blen < out->slen)
873 {
874 GNUNET_array_grow (out->abuf, out->blen, out->slen);
875 }
876 out->sbuf = out->abuf;
877 GNUNET_memcpy (out->sbuf, cstr, out->slen);
878}
879
880
889static int
891{
892 size_t slen;
893 const char *op;
894 const char *cl;
895 const char *pos;
896 const char *end;
897 unsigned int cnt;
898
899 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
900 return GNUNET_NO;
901 pos = str->sbuf;
902 if ('(' != pos[0])
903 return GNUNET_YES;
904 end = str->sbuf + slen;
905 cnt = 1;
906 pos++;
907 while (cnt > 0)
908 {
909 cl = memchr (pos, ')', end - pos);
910 if (NULL == cl)
911 {
912 GNUNET_break (0);
913 return GNUNET_YES;
914 }
915 /* while '(' before ')', count opening parens */
916 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
917 {
918 cnt++;
919 pos = op + 1;
920 }
921 /* got ')' first */
922 cnt--;
923 pos = cl + 1;
924 }
925 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
926}
927
928
938static void
940{
941 size_t slen;
942 const char *pos;
943 const char *end;
944 const char *sbuf;
945 const char *op;
946 const char *cp;
947 unsigned int cnt;
948
949 if (0)
950 return;
951 sbuf = str->sbuf;
952 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
953 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
954 return;
955 cnt = 0;
956 pos = &sbuf[1];
957 end = &sbuf[slen - 1];
958 op = memchr (pos, '(', end - pos);
959 cp = memchr (pos, ')', end - pos);
960 while (NULL != cp)
961 {
962 while ((NULL != op) && (op < cp))
963 {
964 cnt++;
965 pos = op + 1;
966 op = memchr (pos, '(', end - pos);
967 }
968 while ((NULL != cp) && ((NULL == op) || (cp < op)))
969 {
970 if (0 == cnt)
971 return; /* can't strip parens */
972 cnt--;
973 pos = cp + 1;
974 cp = memchr (pos, ')', end - pos);
975 }
976 }
977 if (0 != cnt)
978 {
979 GNUNET_break (0);
980 return;
981 }
982 str->sbuf++;
983 str->slen -= 2;
984}
985
986
995static int
996has_epsilon (const struct StringBuffer *str)
997{
998 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
999 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1000 (')' == str->sbuf[str->slen - 1]);
1001}
1002
1003
1013static void
1014remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1015{
1016 if (GNUNET_YES == str->null_flag)
1017 {
1018 ret->null_flag = GNUNET_YES;
1019 return;
1020 }
1021 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1022 (')' == str->sbuf[str->slen - 1]))
1023 {
1024 /* remove epsilon */
1025 if (ret->blen < str->slen - 3)
1026 {
1027 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1028 }
1029 ret->sbuf = ret->abuf;
1030 ret->slen = str->slen - 3;
1031 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1032 return;
1033 }
1034 sb_strdup (ret, str);
1035}
1036
1037
1047static int
1048sb_strncmp (const struct StringBuffer *str1,
1049 const struct StringBuffer *str2,
1050 size_t n)
1051{
1052 size_t max;
1053
1054 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1055 return -1;
1056 max = GNUNET_MAX (str1->slen, str2->slen);
1057 if (max > n)
1058 max = n;
1059 return memcmp (str1->sbuf, str2->sbuf, max);
1060}
1061
1062
1072static int
1073sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1074{
1075 if (str1->slen < n)
1076 return -1;
1077 return memcmp (str1->sbuf, str2, n);
1078}
1079
1080
1088static void
1089sb_init (struct StringBuffer *sb, size_t n)
1090{
1091 sb->null_flag = GNUNET_NO;
1092 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1093 sb->blen = n;
1094 sb->slen = 0;
1095}
1096
1097
1107static int
1108sb_strkcmp (const struct StringBuffer *str1,
1109 const struct StringBuffer *str2,
1110 size_t k)
1111{
1112 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1113 (k > str1->slen) || (str1->slen - k != str2->slen))
1114 return -1;
1115 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1116}
1117
1118
1127static void
1128number_states (void *cls,
1129 const unsigned int count,
1130 struct REGEX_INTERNAL_State *s)
1131{
1132 struct REGEX_INTERNAL_State **states = cls;
1133
1134 s->dfs_id = count;
1135 if (NULL != states)
1136 states[count] = s;
1137}
1138
1139
1140#define PRIS(a) \
1141 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1142 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1143
1144
1159static void
1161 const struct StringBuffer *R_last_ik,
1162 const struct StringBuffer *R_last_kk,
1163 const struct StringBuffer *R_last_kj,
1164 struct StringBuffer *R_cur_ij,
1165 struct StringBuffer *R_cur_l,
1166 struct StringBuffer *R_cur_r)
1167{
1168 struct StringBuffer R_temp_ij;
1169 struct StringBuffer R_temp_ik;
1170 struct StringBuffer R_temp_kj;
1171 struct StringBuffer R_temp_kk;
1172 int eps_check;
1173 int ij_ik_cmp;
1174 int ij_kj_cmp;
1175 int ik_kk_cmp;
1176 int kk_kj_cmp;
1177 int clean_ik_kk_cmp;
1178 int clean_kk_kj_cmp;
1179 size_t length;
1180 size_t length_l;
1181 size_t length_r;
1182
1183 /*
1184 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1185 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1186 * R_cur_ij = R_cur_l | R_cur_r
1187 * R_cur_l == R^{(k-1)}_{ij}
1188 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1189 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1190 ((GNUNET_YES == R_last_ik->null_flag) ||
1191 (GNUNET_YES == R_last_kj->null_flag)))
1192 {
1193 /* R^{(k)}_{ij} = N | N */
1194 R_cur_ij->null_flag = GNUNET_YES;
1195 R_cur_ij->synced = GNUNET_NO;
1196 return;
1197 }
1198
1199 if ((GNUNET_YES == R_last_ik->null_flag) ||
1200 (GNUNET_YES == R_last_kj->null_flag))
1201 {
1202 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1203 if (GNUNET_YES == R_last_ij->synced)
1204 {
1205 R_cur_ij->synced = GNUNET_YES;
1206 R_cur_ij->null_flag = GNUNET_NO;
1207 return;
1208 }
1209 R_cur_ij->synced = GNUNET_YES;
1210 sb_strdup (R_cur_ij, R_last_ij);
1211 return;
1212 }
1213 R_cur_ij->synced = GNUNET_NO;
1214
1215 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1216 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1217
1218 R_cur_r->null_flag = GNUNET_YES;
1219 R_cur_r->slen = 0;
1220 R_cur_l->null_flag = GNUNET_YES;
1221 R_cur_l->slen = 0;
1222
1223 /* cache results from strcmp, we might need these many times */
1224 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1225 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1226 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1227 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1228
1229 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1230 * as parentheses, so we can better compare the contents */
1231
1232 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1233 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1234 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1235 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1236 remove_epsilon (R_last_ik, &R_temp_ik);
1237 remove_epsilon (R_last_kk, &R_temp_kk);
1238 remove_epsilon (R_last_kj, &R_temp_kj);
1239 remove_parentheses (&R_temp_ik);
1240 remove_parentheses (&R_temp_kk);
1241 remove_parentheses (&R_temp_kj);
1242 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1243 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1244
1245 /* construct R_cur_l (and, if necessary R_cur_r) */
1246 if (GNUNET_YES != R_last_ij->null_flag)
1247 {
1248 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1249 * as parentheses, so we can better compare the contents */
1250 remove_epsilon (R_last_ij, &R_temp_ij);
1251 remove_parentheses (&R_temp_ij);
1252
1253 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1254 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1255 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1256 {
1257 if (0 == R_temp_ij.slen)
1258 {
1259 R_cur_r->null_flag = GNUNET_NO;
1260 }
1261 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1262 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1263 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1264 {
1265 /*
1266 * a|(e|a)a*(e|a) = a*
1267 * a|(e|a)(e|a)*(e|a) = a*
1268 * (e|a)|aa*a = a*
1269 * (e|a)|aa*(e|a) = a*
1270 * (e|a)|(e|a)a*a = a*
1271 * (e|a)|(e|a)a*(e|a) = a*
1272 * (e|a)|(e|a)(e|a)*(e|a) = a*
1273 */
1274 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1275 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1276 else
1277 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1278 }
1279 else
1280 {
1281 /*
1282 * a|aa*a = a+
1283 * a|(e|a)a*a = a+
1284 * a|aa*(e|a) = a+
1285 * a|(e|a)(e|a)*a = a+
1286 * a|a(e|a)*(e|a) = a+
1287 */
1288 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1289 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1290 else
1291 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1292 }
1293 }
1294 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1295 (0 != clean_ik_kk_cmp))
1296 {
1297 /* a|ab*b = ab* */
1298 if (0 == R_last_kk->slen)
1299 sb_strdup (R_cur_r, R_last_ij);
1300 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1301 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1302 else
1303 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1304 R_cur_l->null_flag = GNUNET_YES;
1305 }
1306 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1307 (0 != clean_kk_kj_cmp))
1308 {
1309 /* a|bb*a = b*a */
1310 if (R_last_kk->slen < 1)
1311 {
1312 sb_strdup (R_cur_r, R_last_kj);
1313 }
1314 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1315 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1316 else
1317 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1318
1319 R_cur_l->null_flag = GNUNET_YES;
1320 }
1321 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1322 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1323 {
1324 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1325 if (needs_parentheses (&R_temp_kk))
1326 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1327 else
1328 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1329 R_cur_l->null_flag = GNUNET_YES;
1330 }
1331 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1332 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1333 {
1334 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1335 if (needs_parentheses (&R_temp_kk))
1336 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1337 else
1338 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1339 R_cur_l->null_flag = GNUNET_YES;
1340 }
1341 else
1342 {
1343 sb_strdup (R_cur_l, R_last_ij);
1344 remove_parentheses (R_cur_l);
1345 }
1346 }
1347 else
1348 {
1349 /* we have no left side */
1350 R_cur_l->null_flag = GNUNET_YES;
1351 }
1352
1353 /* construct R_cur_r, if not already constructed */
1354 if (GNUNET_YES == R_cur_r->null_flag)
1355 {
1356 length = R_temp_kk.slen - R_last_ik->slen;
1357
1358 /* a(ba)*bx = (ab)+x */
1359 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1360 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1361 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1362 (0 < R_last_ik->slen) &&
1363 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1364 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1365 {
1366 struct StringBuffer temp_a;
1367 struct StringBuffer temp_b;
1368
1369 sb_init (&temp_a, length);
1370 sb_init (&temp_b, R_last_kj->slen - length);
1371
1372 length_l = length;
1373 temp_a.sbuf = temp_a.abuf;
1374 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1375 temp_a.slen = length_l;
1376
1377 length_r = R_last_kj->slen - length;
1378 temp_b.sbuf = temp_b.abuf;
1379 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1380 temp_b.slen = length_r;
1381
1382 /* e|(ab)+ = (ab)* */
1383 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1384 (0 == temp_b.slen))
1385 {
1386 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1387 sb_free (R_cur_l);
1388 R_cur_l->null_flag = GNUNET_YES;
1389 }
1390 else
1391 {
1392 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1393 }
1394 sb_free (&temp_a);
1395 sb_free (&temp_b);
1396 }
1397 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1398 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1399 {
1400 /*
1401 * (e|a)a*(e|a) = a*
1402 * (e|a)(e|a)*(e|a) = a*
1403 */
1404 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1405 {
1406 if (needs_parentheses (&R_temp_kk))
1407 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1408 else
1409 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1410 }
1411 /* aa*a = a+a */
1412 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1413 (! has_epsilon (R_last_ik)))
1414 {
1415 if (needs_parentheses (&R_temp_kk))
1416 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1417 else
1418 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1419 }
1420 /*
1421 * (e|a)a*a = a+
1422 * aa*(e|a) = a+
1423 * a(e|a)*(e|a) = a+
1424 * (e|a)a*a = a+
1425 */
1426 else
1427 {
1428 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1429 + has_epsilon (R_last_kj));
1430
1431 if (1 == eps_check)
1432 {
1433 if (needs_parentheses (&R_temp_kk))
1434 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1435 else
1436 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1437 }
1438 }
1439 }
1440 /*
1441 * aa*b = a+b
1442 * (e|a)(e|a)*b = a*b
1443 */
1444 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1445 {
1446 if (has_epsilon (R_last_ik))
1447 {
1448 if (needs_parentheses (&R_temp_kk))
1449 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1450 else
1451 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1452 }
1453 else
1454 {
1455 if (needs_parentheses (&R_temp_kk))
1456 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1457 else
1458 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1459 }
1460 }
1461 /*
1462 * ba*a = ba+
1463 * b(e|a)*(e|a) = ba*
1464 */
1465 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1466 {
1467 if (has_epsilon (R_last_kj))
1468 {
1469 if (needs_parentheses (&R_temp_kk))
1470 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1471 else
1472 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1473 }
1474 else
1475 {
1476 if (needs_parentheses (&R_temp_kk))
1477 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1478 else
1479 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1480 }
1481 }
1482 else
1483 {
1484 if (0 < R_temp_kk.slen)
1485 {
1486 if (needs_parentheses (&R_temp_kk))
1487 {
1488 sb_printf3 (R_cur_r,
1489 "%.*s(%.*s)*%.*s",
1490 3,
1491 R_last_ik,
1492 &R_temp_kk,
1493 R_last_kj);
1494 }
1495 else
1496 {
1497 sb_printf3 (R_cur_r,
1498 "%.*s%.*s*%.*s",
1499 1,
1500 R_last_ik,
1501 &R_temp_kk,
1502 R_last_kj);
1503 }
1504 }
1505 else
1506 {
1507 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1508 }
1509 }
1510 }
1511 sb_free (&R_temp_ij);
1512 sb_free (&R_temp_ik);
1513 sb_free (&R_temp_kk);
1514 sb_free (&R_temp_kj);
1515
1516 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1517 {
1518 R_cur_ij->null_flag = GNUNET_YES;
1519 return;
1520 }
1521
1522 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1523 {
1524 struct StringBuffer tmp;
1525
1526 tmp = *R_cur_ij;
1527 *R_cur_ij = *R_cur_l;
1528 *R_cur_l = tmp;
1529 return;
1530 }
1531
1532 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1533 {
1534 struct StringBuffer tmp;
1535
1536 tmp = *R_cur_ij;
1537 *R_cur_ij = *R_cur_r;
1538 *R_cur_r = tmp;
1539 return;
1540 }
1541
1542 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1543 {
1544 struct StringBuffer tmp;
1545
1546 tmp = *R_cur_ij;
1547 *R_cur_ij = *R_cur_l;
1548 *R_cur_l = tmp;
1549 return;
1550 }
1551 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1552}
1553
1554
1566static int
1568{
1569 unsigned int n = a->state_count;
1570 struct REGEX_INTERNAL_State *states[n];
1571 struct StringBuffer *R_last;
1572 struct StringBuffer *R_cur;
1573 struct StringBuffer R_cur_r;
1574 struct StringBuffer R_cur_l;
1575 struct StringBuffer *R_swap;
1577 struct StringBuffer complete_regex;
1578 unsigned int i;
1579 unsigned int j;
1580 unsigned int k;
1581
1582 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1583 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1584 if ((NULL == R_last) || (NULL == R_cur))
1585 {
1587 GNUNET_free (R_cur);
1588 GNUNET_free (R_last);
1589 return GNUNET_SYSERR;
1590 }
1591
1592 /* create depth-first numbering of the states, initializes 'state' */
1594 a->start,
1595 NULL,
1596 NULL,
1598 states);
1599
1600 for (i = 0; i < n; i++)
1601 GNUNET_assert (NULL != states[i]);
1602 for (i = 0; i < n; i++)
1603 for (j = 0; j < n; j++)
1604 R_last[i * n + j].null_flag = GNUNET_YES;
1605
1606 /* Compute regular expressions of length "1" between each pair of states */
1607 for (i = 0; i < n; i++)
1608 {
1609 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1610 {
1611 j = t->to_state->dfs_id;
1612 if (GNUNET_YES == R_last[i * n + j].null_flag)
1613 {
1614 sb_strdup_cstr (&R_last[i * n + j], t->label);
1615 }
1616 else
1617 {
1618 sb_append_cstr (&R_last[i * n + j], "|");
1619 sb_append_cstr (&R_last[i * n + j], t->label);
1620 }
1621 }
1622 /* add self-loop: i is reachable from i via epsilon-transition */
1623 if (GNUNET_YES == R_last[i * n + i].null_flag)
1624 {
1625 R_last[i * n + i].slen = 0;
1626 R_last[i * n + i].null_flag = GNUNET_NO;
1627 }
1628 else
1629 {
1630 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1631 }
1632 }
1633 for (i = 0; i < n; i++)
1634 for (j = 0; j < n; j++)
1635 if (needs_parentheses (&R_last[i * n + j]))
1636 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1637 /* Compute regular expressions of length "k" between each pair of states per
1638 * induction */
1639 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1640 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1641 for (k = 0; k < n; k++)
1642 {
1643 for (i = 0; i < n; i++)
1644 {
1645 for (j = 0; j < n; j++)
1646 {
1647 /* Basis for the recursion:
1648 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1649 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1650 */
1651
1652 /* Create R_cur[i][j] and simplify the expression */
1653 automaton_create_proofs_simplify (&R_last[i * n + j],
1654 &R_last[i * n + k],
1655 &R_last[k * n + k],
1656 &R_last[k * n + j],
1657 &R_cur[i * n + j],
1658 &R_cur_l,
1659 &R_cur_r);
1660 }
1661 }
1662 /* set R_last = R_cur */
1663 R_swap = R_last;
1664 R_last = R_cur;
1665 R_cur = R_swap;
1666 /* clear 'R_cur' for next iteration */
1667 for (i = 0; i < n; i++)
1668 for (j = 0; j < n; j++)
1669 R_cur[i * n + j].null_flag = GNUNET_YES;
1670 }
1671 sb_free (&R_cur_l);
1672 sb_free (&R_cur_r);
1673 /* assign proofs and hashes */
1674 for (i = 0; i < n; i++)
1675 {
1676 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1677 {
1678 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1679 R_last[a->start->dfs_id * n + i].slen);
1680 GNUNET_CRYPTO_hash (states[i]->proof,
1681 strlen (states[i]->proof),
1682 &states[i]->hash);
1683 }
1684 }
1685
1686 /* complete regex for whole DFA: union of all pairs (start state/accepting
1687 * state(s)). */
1688 sb_init (&complete_regex, 16 * n);
1689 for (i = 0; i < n; i++)
1690 {
1691 if (states[i]->accepting)
1692 {
1693 if ((0 == complete_regex.slen) &&
1694 (0 < R_last[a->start->dfs_id * n + i].slen))
1695 {
1696 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1697 }
1698 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1699 (0 < R_last[a->start->dfs_id * n + i].slen))
1700 {
1701 sb_append_cstr (&complete_regex, "|");
1702 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1703 }
1704 }
1705 }
1706 a->canonical_regex =
1707 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1708
1709 /* cleanup */
1710 sb_free (&complete_regex);
1711 for (i = 0; i < n; i++)
1712 for (j = 0; j < n; j++)
1713 {
1714 sb_free (&R_cur[i * n + j]);
1715 sb_free (&R_last[i * n + j]);
1716 }
1717 GNUNET_free (R_cur);
1718 GNUNET_free (R_last);
1719 return GNUNET_OK;
1720}
1721
1722
1732static struct REGEX_INTERNAL_State *
1734 struct REGEX_INTERNAL_StateSet *nfa_states)
1735{
1736 struct REGEX_INTERNAL_State *s;
1737 char *pos;
1738 size_t len;
1739 struct REGEX_INTERNAL_State *cstate;
1740 struct REGEX_INTERNAL_Transition *ctran;
1741 unsigned int i;
1742
1743 s = GNUNET_new (struct REGEX_INTERNAL_State);
1744 s->id = ctx->state_id++;
1745 s->index = -1;
1746 s->lowlink = -1;
1747
1748 if (NULL == nfa_states)
1749 {
1750 GNUNET_asprintf (&s->name, "s%i", s->id);
1751 return s;
1752 }
1753
1754 s->nfa_set = *nfa_states;
1755
1756 if (nfa_states->off < 1)
1757 return s;
1758
1759 /* Create a name based on 'nfa_states' */
1760 len = nfa_states->off * 14 + 4;
1761 s->name = GNUNET_malloc (len);
1762 strcat (s->name, "{");
1763 pos = s->name + 1;
1764
1765 for (i = 0; i < nfa_states->off; i++)
1766 {
1767 cstate = nfa_states->states[i];
1768 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1769 pos += strlen (pos);
1770
1771 /* Add a transition for each distinct label to NULL state */
1772 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1773 if (NULL != ctran->label)
1774 state_add_transition (ctx, s, ctran->label, NULL);
1775
1776 /* If the nfa_states contain an accepting state, the new dfa state is also
1777 * accepting. */
1778 if (cstate->accepting)
1779 s->accepting = 1;
1780 }
1781 pos[-1] = '}';
1782 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1783
1784 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1785 return s;
1786}
1787
1788
1802static unsigned int
1803dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1804{
1806 struct REGEX_INTERNAL_State *new_s;
1807 unsigned int len;
1808 unsigned int max_len;
1809
1810 if (NULL == s)
1811 return 0;
1812
1813 new_s = NULL;
1814 max_len = 0;
1815 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1816 {
1817 len = strlen (t->label);
1818
1819 if (0 == strncmp (t->label, str, len))
1820 {
1821 if (len >= max_len)
1822 {
1823 max_len = len;
1824 new_s = t->to_state;
1825 }
1826 }
1827 }
1828
1829 *s = new_s;
1830 return max_len;
1831}
1832
1833
1843static void
1844mark_states (void *cls,
1845 const unsigned int count,
1846 struct REGEX_INTERNAL_State *s)
1847{
1848 s->marked = GNUNET_YES;
1849}
1850
1851
1858static void
1860{
1861 struct REGEX_INTERNAL_State *s;
1862 struct REGEX_INTERNAL_State *s_next;
1863
1864 /* 1. unmark all states */
1865 for (s = a->states_head; NULL != s; s = s->next)
1866 s->marked = GNUNET_NO;
1867
1868 /* 2. traverse dfa from start state and mark all visited states */
1870 a->start,
1871 NULL,
1872 NULL,
1873 &mark_states,
1874 NULL);
1875
1876 /* 3. delete all states that were not visited */
1877 for (s = a->states_head; NULL != s; s = s_next)
1878 {
1879 s_next = s->next;
1880 if (GNUNET_NO == s->marked)
1882 }
1883}
1884
1885
1892static void
1894{
1895 struct REGEX_INTERNAL_State *s;
1896 struct REGEX_INTERNAL_State *s_next;
1898 int dead;
1899
1900 GNUNET_assert (DFA == a->type);
1901
1902 for (s = a->states_head; NULL != s; s = s_next)
1903 {
1904 s_next = s->next;
1905
1906 if (s->accepting)
1907 continue;
1908
1909 dead = 1;
1910 for (t = s->transitions_head; NULL != t; t = t->next)
1911 {
1912 if ((NULL != t->to_state) && (t->to_state != s) )
1913 {
1914 dead = 0;
1915 break;
1916 }
1917 }
1918
1919 if (0 == dead)
1920 continue;
1921
1922 /* state s is dead, remove it */
1924 }
1925}
1926
1927
1935static int
1937 struct REGEX_INTERNAL_Automaton *a)
1938{
1939 uint32_t *table;
1940 struct REGEX_INTERNAL_State *s1;
1941 struct REGEX_INTERNAL_State *s2;
1942 struct REGEX_INTERNAL_Transition *t1;
1943 struct REGEX_INTERNAL_Transition *t2;
1944 struct REGEX_INTERNAL_State *s1_next;
1945 struct REGEX_INTERNAL_State *s2_next;
1946 int change;
1947 unsigned int num_equal_edges;
1948 unsigned int i;
1949 unsigned int state_cnt;
1950 unsigned long long idx;
1951 unsigned long long idx1;
1952
1953 if ((NULL == a) || (0 == a->state_count))
1954 {
1956 "Could not merge nondistinguishable states, automaton was NULL.\n");
1957 return GNUNET_SYSERR;
1958 }
1959
1960 state_cnt = a->state_count;
1962 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1963 if (NULL == table)
1964 {
1966 return GNUNET_SYSERR;
1967 }
1968
1969 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1970 s1->marked = i++;
1971
1972 /* Mark all pairs of accepting/!accepting states */
1973 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1974 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1975 if ((s1->accepting && ! s2->accepting) ||
1976 (! s1->accepting && s2->accepting))
1977 {
1978 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1979 table[idx / 32] |= (1U << (idx % 32));
1980 }
1981
1982 /* Find all equal states */
1983 change = 1;
1984 while (0 != change)
1985 {
1986 change = 0;
1987 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1988 {
1989 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1990 {
1991 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1992 if (0 != (table[idx / 32] & (1U << (idx % 32))))
1993 continue;
1994 num_equal_edges = 0;
1995 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1996 {
1997 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1998 {
1999 if (0 == strcmp (t1->label, t2->label))
2000 {
2001 num_equal_edges++;
2002 /* same edge, but targets definitively different, so we're different
2003 as well */
2004 if (t1->to_state->marked > t2->to_state->marked)
2005 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2006 + t2->to_state->marked;
2007 else
2008 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2009 + t1->to_state->marked;
2010 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2011 {
2012 table[idx / 32] |= (1U << (idx % 32));
2013 change = 1; /* changed a marker, need to run again */
2014 }
2015 }
2016 }
2017 }
2018 if ((num_equal_edges != s1->transition_count) ||
2019 (num_equal_edges != s2->transition_count))
2020 {
2021 /* Make sure ALL edges of possible equal states are the same */
2022 table[idx / 32] |= (1U << (idx % 32));
2023 change = 1; /* changed a marker, need to run again */
2024 }
2025 }
2026 }
2027 }
2028
2029 /* Merge states that are equal */
2030 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2031 {
2032 s1_next = s1->next;
2033 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2034 {
2035 s2_next = s2->next;
2036 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2037 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2038 automaton_merge_states (ctx, a, s1, s2);
2039 }
2040 }
2041
2043 return GNUNET_OK;
2044}
2045
2046
2055static int
2057 struct REGEX_INTERNAL_Automaton *a)
2058{
2059 if (NULL == a)
2060 return GNUNET_SYSERR;
2061
2062 GNUNET_assert (DFA == a->type);
2063
2064 /* 1. remove unreachable states */
2066
2067 /* 2. remove dead states */
2069
2070 /* 3. Merge nondistinguishable states */
2072 return GNUNET_SYSERR;
2073 return GNUNET_OK;
2074}
2075
2076
2081{
2085 const unsigned int stride;
2086
2092
2097};
2098
2099
2110static void
2112 const unsigned int depth,
2113 char *label,
2115 struct REGEX_INTERNAL_State *s)
2116{
2117 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2119 char *new_label;
2120
2121 if (depth == ctx->stride)
2122 {
2124 t->label = GNUNET_strdup (label);
2125 t->to_state = s;
2126 t->from_state = start;
2127 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2128 ctx->transitions_tail,
2129 t);
2130 }
2131 else
2132 {
2133 for (t = s->transitions_head; NULL != t; t = t->next)
2134 {
2135 /* Do not consider self-loops, because it end's up in too many
2136 * transitions */
2137 if (t->to_state == t->from_state)
2138 continue;
2139
2140 if (NULL != label)
2141 {
2142 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2143 }
2144 else
2145 new_label = GNUNET_strdup (t->label);
2146
2148 (depth + 1),
2149 new_label,
2150 start,
2151 t->to_state);
2152 }
2153 }
2155}
2156
2157
2166static void
2168 const unsigned int count,
2169 struct REGEX_INTERNAL_State *s)
2170{
2171 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2172}
2173
2174
2182void
2184 struct REGEX_INTERNAL_Automaton *dfa,
2185 const unsigned int stride_len)
2186{
2187 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2189 struct REGEX_INTERNAL_Transition *t_next;
2190
2191 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2192 return;
2193
2194 /* Compute the new transitions of given stride_len */
2196 dfa->start,
2197 NULL,
2198 NULL,
2200 &ctx);
2201
2202 /* Add all the new transitions to the automaton. */
2203 for (t = ctx.transitions_head; NULL != t; t = t_next)
2204 {
2205 t_next = t->next;
2206 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2207 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2208 GNUNET_free (t->label);
2209 GNUNET_free (t);
2210 }
2211
2212 /* Mark this automaton as multistrided */
2214}
2215
2216
2230static void
2233 struct REGEX_INTERNAL_State *cur,
2234 char *label,
2235 unsigned int max_len,
2236 struct REGEX_INTERNAL_Transition **transitions_head,
2237 struct REGEX_INTERNAL_Transition **transitions_tail)
2238{
2240 char *new_label;
2241
2242
2243 if ((NULL != label) &&
2244 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2245 cur->accepting) ||
2246 (GNUNET_YES == cur->marked) ) ||
2247 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2248 label))) ||
2249 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2250 label)))))
2251 {
2253 t->label = GNUNET_strdup (label);
2254 t->to_state = cur;
2255 t->from_state = start;
2256 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2257
2258 if (GNUNET_NO == cur->marked)
2259 {
2261 cur,
2262 cur,
2263 NULL,
2264 max_len,
2265 transitions_head,
2266 transitions_tail);
2267 }
2268 return;
2269 }
2270 else if (cur != start)
2271 cur->contained = GNUNET_YES;
2272
2273 if ((GNUNET_YES == cur->marked) && (cur != start))
2274 return;
2275
2276 cur->marked = GNUNET_YES;
2277
2278
2279 for (t = cur->transitions_head; NULL != t; t = t->next)
2280 {
2281 if (NULL != label)
2282 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2283 else
2284 new_label = GNUNET_strdup (t->label);
2285
2286 if (t->to_state != cur)
2287 {
2289 start,
2290 t->to_state,
2291 new_label,
2292 max_len,
2293 transitions_head,
2294 transitions_tail);
2295 }
2296 GNUNET_free (new_label);
2297 }
2298}
2299
2300
2309static void
2311 struct REGEX_INTERNAL_Automaton *dfa,
2312 unsigned int max_len)
2313{
2314 struct REGEX_INTERNAL_State *s;
2315 struct REGEX_INTERNAL_State *s_next;
2317 struct REGEX_INTERNAL_Transition *t_next;
2318 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2319 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2320
2321 if (NULL == dfa)
2322 return;
2323
2324 /* Count the incoming transitions on each state. */
2325 for (s = dfa->states_head; NULL != s; s = s->next)
2326 {
2327 for (t = s->transitions_head; NULL != t; t = t->next)
2328 {
2329 if (NULL != t->to_state)
2330 t->to_state->incoming_transition_count++;
2331 }
2332 }
2333
2334 /* Unmark all states. */
2335 for (s = dfa->states_head; NULL != s; s = s->next)
2336 {
2337 s->marked = GNUNET_NO;
2338 s->contained = GNUNET_NO;
2339 }
2340
2341 /* Add strides and mark states that can be deleted. */
2343 dfa->start,
2344 dfa->start,
2345 NULL,
2346 max_len,
2347 &transitions_head,
2348 &transitions_tail);
2349
2350 /* Add all the new transitions to the automaton. */
2351 for (t = transitions_head; NULL != t; t = t_next)
2352 {
2353 t_next = t->next;
2354 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2355 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2356 GNUNET_free (t->label);
2357 GNUNET_free (t);
2358 }
2359
2360 /* Remove marked states (including their incoming and outgoing transitions). */
2361 for (s = dfa->states_head; NULL != s; s = s_next)
2362 {
2363 s_next = s->next;
2364 if (GNUNET_YES == s->contained)
2365 automaton_remove_state (dfa, s);
2366 }
2367}
2368
2369
2379static struct REGEX_INTERNAL_Automaton *
2381 struct REGEX_INTERNAL_State *end)
2382{
2383 struct REGEX_INTERNAL_Automaton *n;
2384
2386
2387 n->type = NFA;
2388 n->start = NULL;
2389 n->end = NULL;
2390 n->state_count = 0;
2391
2392 if ((NULL == start) || (NULL == end))
2393 return n;
2394
2397
2398 n->state_count = 2;
2399
2400 n->start = start;
2401 n->end = end;
2402
2403 return n;
2404}
2405
2406
2414static void
2418{
2419 struct REGEX_INTERNAL_State *s;
2420
2421 if ((NULL == n) || (NULL == states_head))
2422 {
2423 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2424 return;
2425 }
2426
2427 if (NULL == n->states_head)
2428 {
2429 n->states_head = states_head;
2430 n->states_tail = states_tail;
2431 return;
2432 }
2433
2434 if (NULL != states_head)
2435 {
2436 n->states_tail->next = states_head;
2437 n->states_tail = states_tail;
2438 }
2439
2440 for (s = states_head; NULL != s; s = s->next)
2441 n->state_count++;
2442}
2443
2444
2453static struct REGEX_INTERNAL_State *
2455{
2456 struct REGEX_INTERNAL_State *s;
2457
2458 s = GNUNET_new (struct REGEX_INTERNAL_State);
2459 s->id = ctx->state_id++;
2460 s->accepting = accepting;
2461 s->marked = GNUNET_NO;
2462 s->contained = 0;
2463 s->index = -1;
2464 s->lowlink = -1;
2465 s->scc_id = 0;
2466 s->name = NULL;
2467 GNUNET_asprintf (&s->name, "s%i", s->id);
2468
2469 return s;
2470}
2471
2472
2482static void
2484 struct REGEX_INTERNAL_Automaton *nfa,
2485 struct REGEX_INTERNAL_StateSet *states,
2486 const char *label)
2487{
2488 struct REGEX_INTERNAL_State *s;
2489 unsigned int i;
2490 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2491 struct REGEX_INTERNAL_State *clsstate;
2492 struct REGEX_INTERNAL_State *currentstate;
2493 struct REGEX_INTERNAL_Transition *ctran;
2494
2495 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2496 if (NULL == states)
2497 return;
2498
2499 for (i = 0; i < states->off; i++)
2500 {
2501 s = states->states[i];
2502
2503 /* Add start state to closure only for epsilon closure */
2504 if (NULL == label)
2505 state_set_append (ret, s);
2506
2507 /* initialize work stack */
2508 cls_stack.head = NULL;
2509 cls_stack.tail = NULL;
2510 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2511 cls_stack.len = 1;
2512
2513 while (NULL != (currentstate = cls_stack.tail))
2514 {
2516 cls_stack.head,
2517 cls_stack.tail,
2518 currentstate);
2519 cls_stack.len--;
2520 for (ctran = currentstate->transitions_head; NULL != ctran;
2521 ctran = ctran->next)
2522 {
2523 if (NULL == (clsstate = ctran->to_state))
2524 continue;
2525 if (0 != clsstate->contained)
2526 continue;
2527 if (0 != nullstrcmp (label, ctran->label))
2528 continue;
2529 state_set_append (ret, clsstate);
2531 cls_stack.head,
2532 cls_stack.tail,
2533 clsstate);
2534 cls_stack.len++;
2535 clsstate->contained = 1;
2536 }
2537 }
2538 }
2539 for (i = 0; i < ret->off; i++)
2540 ret->states[i]->contained = 0;
2541
2542 if (ret->off > 1)
2543 qsort (ret->states,
2544 ret->off,
2545 sizeof(struct REGEX_INTERNAL_State *),
2546 &state_compare);
2547}
2548
2549
2555static void
2557{
2558 struct REGEX_INTERNAL_Automaton *a;
2559 struct REGEX_INTERNAL_Automaton *b;
2560 struct REGEX_INTERNAL_Automaton *new_nfa;
2561
2562 b = ctx->stack_tail;
2563 GNUNET_assert (NULL != b);
2564 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2565 a = ctx->stack_tail;
2566 GNUNET_assert (NULL != a);
2567 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2568
2569 state_add_transition (ctx, a->end, NULL, b->start);
2570 a->end->accepting = 0;
2571 b->end->accepting = 1;
2572
2573 new_nfa = nfa_fragment_create (NULL, NULL);
2574 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2575 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2576 new_nfa->start = a->start;
2577 new_nfa->end = b->end;
2578 new_nfa->state_count += a->state_count + b->state_count;
2581
2582 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2583}
2584
2585
2591static void
2593{
2594 struct REGEX_INTERNAL_Automaton *a;
2595 struct REGEX_INTERNAL_Automaton *new_nfa;
2597 struct REGEX_INTERNAL_State *end;
2598
2599 a = ctx->stack_tail;
2600
2601 if (NULL == a)
2602 {
2603 GNUNET_log (
2605 "nfa_add_star_op failed, because there was no element on the stack");
2606 return;
2607 }
2608
2609 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2610
2611 start = nfa_state_create (ctx, 0);
2612 end = nfa_state_create (ctx, 1);
2613
2614 state_add_transition (ctx, start, NULL, a->start);
2616 state_add_transition (ctx, a->end, NULL, a->start);
2617 state_add_transition (ctx, a->end, NULL, end);
2618
2619 a->end->accepting = 0;
2620 end->accepting = 1;
2621
2622 new_nfa = nfa_fragment_create (start, end);
2623 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2625
2626 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2627}
2628
2629
2635static void
2637{
2638 struct REGEX_INTERNAL_Automaton *a;
2639
2640 a = ctx->stack_tail;
2641
2642 if (NULL == a)
2643 {
2644 GNUNET_log (
2646 "nfa_add_plus_op failed, because there was no element on the stack");
2647 return;
2648 }
2649
2650 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2651
2652 state_add_transition (ctx, a->end, NULL, a->start);
2653
2654 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2655}
2656
2657
2663static void
2665{
2666 struct REGEX_INTERNAL_Automaton *a;
2667 struct REGEX_INTERNAL_Automaton *new_nfa;
2669 struct REGEX_INTERNAL_State *end;
2670
2671 a = ctx->stack_tail;
2672 if (NULL == a)
2673 {
2674 GNUNET_log (
2676 "nfa_add_question_op failed, because there was no element on the stack");
2677 return;
2678 }
2679
2680 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2681
2682 start = nfa_state_create (ctx, 0);
2683 end = nfa_state_create (ctx, 1);
2684
2685 state_add_transition (ctx, start, NULL, a->start);
2687 state_add_transition (ctx, a->end, NULL, end);
2688
2689 a->end->accepting = 0;
2690
2691 new_nfa = nfa_fragment_create (start, end);
2692 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2693 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2695}
2696
2697
2704static void
2706{
2707 struct REGEX_INTERNAL_Automaton *a;
2708 struct REGEX_INTERNAL_Automaton *b;
2709 struct REGEX_INTERNAL_Automaton *new_nfa;
2711 struct REGEX_INTERNAL_State *end;
2712
2713 b = ctx->stack_tail;
2714 GNUNET_assert (NULL != b);
2715 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2716 a = ctx->stack_tail;
2717 GNUNET_assert (NULL != a);
2718 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2719
2720 start = nfa_state_create (ctx, 0);
2721 end = nfa_state_create (ctx, 1);
2722 state_add_transition (ctx, start, NULL, a->start);
2723 state_add_transition (ctx, start, NULL, b->start);
2724
2725 state_add_transition (ctx, a->end, NULL, end);
2726 state_add_transition (ctx, b->end, NULL, end);
2727
2728 a->end->accepting = 0;
2729 b->end->accepting = 0;
2730 end->accepting = 1;
2731
2732 new_nfa = nfa_fragment_create (start, end);
2733 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2734 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2737
2738 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2739}
2740
2741
2748static void
2749nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2750{
2751 struct REGEX_INTERNAL_Automaton *n;
2753 struct REGEX_INTERNAL_State *end;
2754
2755 GNUNET_assert (NULL != ctx);
2756
2757 start = nfa_state_create (ctx, 0);
2758 end = nfa_state_create (ctx, 1);
2759 state_add_transition (ctx, start, label, end);
2761 GNUNET_assert (NULL != n);
2762 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2763}
2764
2765
2771static void
2773{
2774 if (NULL == ctx)
2775 {
2776 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2777 return;
2778 }
2779 ctx->state_id = 0;
2780 ctx->transition_id = 0;
2781 ctx->stack_head = NULL;
2782 ctx->stack_tail = NULL;
2783}
2784
2785
2795REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2796{
2798 struct REGEX_INTERNAL_Automaton *nfa;
2799 const char *regexp;
2800 char curlabel[2];
2801 const char *error_msg;
2802 unsigned int count;
2803 unsigned int altcount;
2804 unsigned int atomcount;
2805 unsigned int poff;
2806 unsigned int psize;
2807
2808 struct
2809 {
2810 int altcount;
2811 int atomcount;
2812 } *p;
2813
2814 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2815 {
2817 "Could not parse regex. Empty regex string provided.\n");
2818
2819 return NULL;
2820 }
2822
2823 regexp = regex;
2824 curlabel[1] = '\0';
2825 p = NULL;
2826 error_msg = NULL;
2827 altcount = 0;
2828 atomcount = 0;
2829 poff = 0;
2830 psize = 0;
2831
2832 for (count = 0; count < len && *regexp; count++, regexp++)
2833 {
2834 switch (*regexp)
2835 {
2836 case '(':
2837 if (atomcount > 1)
2838 {
2839 --atomcount;
2841 }
2842 if (poff == psize)
2843 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2844 p[poff].altcount = altcount;
2845 p[poff].atomcount = atomcount;
2846 poff++;
2847 altcount = 0;
2848 atomcount = 0;
2849 break;
2850
2851 case '|':
2852 if (0 == atomcount)
2853 {
2854 error_msg = "Cannot append '|' to nothing";
2855 goto error;
2856 }
2857 while (--atomcount > 0)
2859 altcount++;
2860 break;
2861
2862 case ')':
2863 if (0 == poff)
2864 {
2865 error_msg = "Missing opening '('";
2866 goto error;
2867 }
2868 if (0 == atomcount)
2869 {
2870 /* Ignore this: "()" */
2871 poff--;
2872 altcount = p[poff].altcount;
2873 atomcount = p[poff].atomcount;
2874 break;
2875 }
2876 while (--atomcount > 0)
2878 for (; altcount > 0; altcount--)
2880 poff--;
2881 altcount = p[poff].altcount;
2882 atomcount = p[poff].atomcount;
2883 atomcount++;
2884 break;
2885
2886 case '*':
2887 if (atomcount == 0)
2888 {
2889 error_msg = "Cannot append '*' to nothing";
2890 goto error;
2891 }
2893 break;
2894
2895 case '+':
2896 if (atomcount == 0)
2897 {
2898 error_msg = "Cannot append '+' to nothing";
2899 goto error;
2900 }
2902 break;
2903
2904 case '?':
2905 if (atomcount == 0)
2906 {
2907 error_msg = "Cannot append '?' to nothing";
2908 goto error;
2909 }
2911 break;
2912
2913 default:
2914 if (atomcount > 1)
2915 {
2916 --atomcount;
2918 }
2919 curlabel[0] = *regexp;
2920 nfa_add_label (&ctx, curlabel);
2921 atomcount++;
2922 break;
2923 }
2924 }
2925 if (0 != poff)
2926 {
2927 error_msg = "Unbalanced parenthesis";
2928 goto error;
2929 }
2930 while (--atomcount > 0)
2932 for (; altcount > 0; altcount--)
2934
2935 GNUNET_array_grow (p, psize, 0);
2936
2937 nfa = ctx.stack_tail;
2938 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2939
2940 if (NULL != ctx.stack_head)
2941 {
2942 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2943 goto error;
2944 }
2945
2946 /* Remember the regex that was used to generate this NFA */
2947 nfa->regex = GNUNET_strdup (regex);
2948
2949 /* create depth-first numbering of the states for pretty printing */
2951 NULL,
2952 NULL,
2953 NULL,
2955 NULL);
2956
2957 /* No multistriding added so far */
2959
2960 return nfa;
2961
2962error:
2963 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2964 if (NULL != error_msg)
2965 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2966
2967 GNUNET_free (p);
2968
2969 while (NULL != (nfa = ctx.stack_head))
2970 {
2971 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2973 }
2974
2975 return NULL;
2976}
2977
2978
2988static void
2990 struct REGEX_INTERNAL_Automaton *nfa,
2991 struct REGEX_INTERNAL_Automaton *dfa,
2992 struct REGEX_INTERNAL_State *dfa_state)
2993{
2994 struct REGEX_INTERNAL_Transition *ctran;
2995 struct REGEX_INTERNAL_State *new_dfa_state;
2996 struct REGEX_INTERNAL_State *state_contains;
2997 struct REGEX_INTERNAL_State *state_iter;
2998 struct REGEX_INTERNAL_StateSet tmp;
2999 struct REGEX_INTERNAL_StateSet nfa_set;
3000
3001 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3002 {
3003 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
3004 continue;
3005
3006 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3007 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3008 state_set_clear (&tmp);
3009
3010 state_contains = NULL;
3011 for (state_iter = dfa->states_head; NULL != state_iter;
3012 state_iter = state_iter->next)
3013 {
3014 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3015 {
3016 state_contains = state_iter;
3017 break;
3018 }
3019 }
3020 if (NULL == state_contains)
3021 {
3022 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3023 automaton_add_state (dfa, new_dfa_state);
3024 ctran->to_state = new_dfa_state;
3025 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3026 }
3027 else
3028 {
3029 ctran->to_state = state_contains;
3030 state_set_clear (&nfa_set);
3031 }
3032 }
3033}
3034
3035
3038 const size_t len,
3039 unsigned int max_path_len)
3040{
3042 struct REGEX_INTERNAL_Automaton *dfa;
3043 struct REGEX_INTERNAL_Automaton *nfa;
3044 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3045 struct REGEX_INTERNAL_StateSet singleton_set;
3046
3048
3049 /* Create NFA */
3050 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3051
3052 if (NULL == nfa)
3053 {
3055 "Could not create DFA, because NFA creation failed\n");
3056 return NULL;
3057 }
3058
3059 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3060 dfa->type = DFA;
3061 dfa->regex = GNUNET_strdup (regex);
3062
3063 /* Create DFA start state from epsilon closure */
3064 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3065 state_set_append (&singleton_set, nfa->start);
3066 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3067 state_set_clear (&singleton_set);
3068 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3069 automaton_add_state (dfa, dfa->start);
3070
3071 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3073
3074 /* Minimize DFA */
3075 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3076 {
3078 return NULL;
3079 }
3080
3081 /* Create proofs and hashes for all states */
3083 {
3085 return NULL;
3086 }
3087
3088 /* Compress linear DFA paths */
3089 if (1 != max_path_len)
3090 dfa_compress_paths (&ctx, dfa, max_path_len);
3091
3092 return dfa;
3093}
3094
3095
3096void
3098{
3099 struct REGEX_INTERNAL_State *s;
3100 struct REGEX_INTERNAL_State *next_state;
3101
3102 if (NULL == a)
3103 return;
3104
3105 GNUNET_free (a->regex);
3107
3108 for (s = a->states_head; NULL != s; s = next_state)
3109 {
3110 next_state = s->next;
3113 }
3114
3115 GNUNET_free (a);
3116}
3117
3118
3127static int
3128evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3129{
3130 const char *strp;
3131 struct REGEX_INTERNAL_State *s;
3132 unsigned int step_len;
3133
3134 if (DFA != a->type)
3135 {
3137 "Tried to evaluate DFA, but NFA automaton given");
3138 return -1;
3139 }
3140
3141 s = a->start;
3142
3143 /* If the string is empty but the starting state is accepting, we accept. */
3144 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3145 return 0;
3146
3147 for (strp = string; NULL != strp && *strp; strp += step_len)
3148 {
3149 step_len = dfa_move (&s, strp);
3150
3151 if (NULL == s)
3152 break;
3153 }
3154
3155 if ((NULL != s) && s->accepting)
3156 return 0;
3157
3158 return 1;
3159}
3160
3161
3169static int
3170evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3171{
3172 const char *strp;
3173 char str[2];
3174 struct REGEX_INTERNAL_State *s;
3175 struct REGEX_INTERNAL_StateSet sset;
3176 struct REGEX_INTERNAL_StateSet new_sset;
3177 struct REGEX_INTERNAL_StateSet singleton_set;
3178 unsigned int i;
3179 int result;
3180
3181 if (NFA != a->type)
3182 {
3184 "Tried to evaluate NFA, but DFA automaton given");
3185 return -1;
3186 }
3187
3188 /* If the string is empty but the starting state is accepting, we accept. */
3189 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3190 return 0;
3191
3192 result = 1;
3193 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3194 state_set_append (&singleton_set, a->start);
3195 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3196 state_set_clear (&singleton_set);
3197
3198 str[1] = '\0';
3199 for (strp = string; NULL != strp && *strp; strp++)
3200 {
3201 str[0] = *strp;
3202 nfa_closure_set_create (&new_sset, a, &sset, str);
3203 state_set_clear (&sset);
3204 nfa_closure_set_create (&sset, a, &new_sset, 0);
3205 state_set_clear (&new_sset);
3206 }
3207
3208 for (i = 0; i < sset.off; i++)
3209 {
3210 s = sset.states[i];
3211 if ((NULL != s) && (s->accepting))
3212 {
3213 result = 0;
3214 break;
3215 }
3216 }
3217
3218 state_set_clear (&sset);
3219 return result;
3220}
3221
3222
3223int
3224REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3225{
3226 int result;
3227
3228 switch (a->type)
3229 {
3230 case DFA:
3231 result = evaluate_dfa (a, string);
3232 break;
3233
3234 case NFA:
3235 result = evaluate_nfa (a, string);
3236 break;
3237
3238 default:
3240 "Evaluating regex failed, automaton has no type!\n");
3242 break;
3243 }
3244
3245 return result;
3246}
3247
3248
3249const char *
3251{
3252 if (NULL == a)
3253 return NULL;
3254
3255 return a->canonical_regex;
3256}
3257
3258
3266unsigned int
3268{
3269 unsigned int t_count;
3270 struct REGEX_INTERNAL_State *s;
3271
3272 if (NULL == a)
3273 return 0;
3274
3275 t_count = 0;
3276 for (s = a->states_head; NULL != s; s = s->next)
3277 t_count += s->transition_count;
3278
3279 return t_count;
3280}
3281
3282
3293size_t
3294REGEX_INTERNAL_get_first_key (const char *input_string,
3295 size_t string_len,
3296 struct GNUNET_HashCode *key)
3297{
3298 size_t size;
3299
3300 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3302 if (NULL == input_string)
3303 {
3304 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3305 return 0;
3306 }
3307 GNUNET_CRYPTO_hash (input_string, size, key);
3308
3309 return size;
3310}
3311
3312
3323static void
3324iterate_initial_edge (unsigned int min_len,
3325 unsigned int max_len,
3326 char *consumed_string,
3329 void *iterator_cls)
3330{
3331 char *temp;
3333 unsigned int num_edges = state->transition_count;
3334 struct REGEX_BLOCK_Edge edges[num_edges];
3335 struct REGEX_BLOCK_Edge edge[1];
3336 struct GNUNET_HashCode hash;
3337 struct GNUNET_HashCode hash_new;
3338 unsigned int cur_len;
3339
3340 if (NULL != consumed_string)
3341 cur_len = strlen (consumed_string);
3342 else
3343 cur_len = 0;
3344
3345 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3346 (cur_len > 0) && (NULL != consumed_string))
3347 {
3348 if (cur_len <= max_len)
3349 {
3350 if ((NULL != state->proof) &&
3351 (0 != strcmp (consumed_string, state->proof)))
3352 {
3353 (void) state_get_edges (state, edges);
3354 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3356 "Start state for string `%s' is %s\n",
3357 consumed_string,
3358 GNUNET_h2s (&hash));
3359 iterator (iterator_cls,
3360 &hash,
3361 consumed_string,
3362 state->accepting,
3363 num_edges,
3364 edges);
3365 }
3366
3367 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3368 (state->transition_count < 1) && (cur_len < max_len))
3369 {
3370 /* Special case for regex consisting of just a string that is shorter than
3371 * max_len */
3372 edge[0].label = &consumed_string[cur_len - 1];
3373 edge[0].destination = state->hash;
3374 temp = GNUNET_strdup (consumed_string);
3375 temp[cur_len - 1] = '\0';
3376 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3378 "Start state for short string `%s' is %s\n",
3379 temp,
3380 GNUNET_h2s (&hash_new));
3381 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3382 GNUNET_free (temp);
3383 }
3384 }
3385 else /* cur_len > max_len */
3386 {
3387 /* Case where the concatenated labels are longer than max_len, then split. */
3388 edge[0].label = &consumed_string[max_len];
3389 edge[0].destination = state->hash;
3390 temp = GNUNET_strdup (consumed_string);
3391 temp[max_len] = '\0';
3392 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3394 "Start state at split edge `%s'-`%s` is %s\n",
3395 temp,
3396 edge[0].label,
3397 GNUNET_h2s (&hash_new));
3398 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3399 GNUNET_free (temp);
3400 }
3401 }
3402
3403 if (cur_len < max_len)
3404 {
3405 for (t = state->transitions_head; NULL != t; t = t->next)
3406 {
3407 if (NULL != strchr (t->label, (int) '.'))
3408 {
3409 /* Wildcards not allowed during starting states */
3410 GNUNET_break (0);
3411 continue;
3412 }
3413 if (NULL != consumed_string)
3414 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3415 else
3416 GNUNET_asprintf (&temp, "%s", t->label);
3417 iterate_initial_edge (min_len,
3418 max_len,
3419 temp,
3420 t->to_state,
3421 iterator,
3422 iterator_cls);
3423 GNUNET_free (temp);
3424 }
3425 }
3426}
3427
3428
3437void
3440 void *iterator_cls)
3441{
3442 struct REGEX_INTERNAL_State *s;
3443
3444 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3447 NULL,
3448 a->start,
3449 iterator,
3450 iterator_cls);
3451 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3452 for (s = a->states_head; NULL != s; s = s->next)
3453 {
3454 struct REGEX_BLOCK_Edge edges[s->transition_count];
3455 unsigned int num_edges;
3456
3457 num_edges = state_get_edges (s, edges);
3458 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3459 {
3461 "Creating DFA edges at `%s' under key %s\n",
3462 s->proof,
3463 GNUNET_h2s (&s->hash));
3464 iterator (iterator_cls,
3465 &s->hash,
3466 s->proof,
3467 s->accepting,
3468 num_edges,
3469 edges);
3470 }
3471 s->marked = GNUNET_NO;
3472 }
3473}
3474
3475
3483{
3485 char *proof;
3489};
3490
3491
3496{
3499};
3500
3501
3513static void
3515 const struct GNUNET_HashCode *key,
3516 const char *proof,
3517 int accepting,
3518 unsigned int num_edges,
3519 const struct REGEX_BLOCK_Edge *edges)
3520{
3521 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3522 struct temporal_state_store *tmp;
3523 size_t edges_size;
3524
3525 tmp = GNUNET_new (struct temporal_state_store);
3526 tmp->reachable = GNUNET_NO;
3527 tmp->proof = GNUNET_strdup (proof);
3528 tmp->accepting = accepting;
3529 tmp->num_edges = num_edges;
3530 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3531 tmp->edges = GNUNET_malloc (edges_size);
3532 GNUNET_memcpy (tmp->edges, edges, edges_size);
3535 hm,
3536 key,
3537 tmp,
3539}
3540
3541
3550static void
3553{
3555 unsigned int i;
3556
3557 if (GNUNET_YES == state->reachable)
3558 /* visited */
3559 return;
3560
3561 state->reachable = GNUNET_YES;
3562 for (i = 0; i < state->num_edges; i++)
3563 {
3564 child =
3565 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3566 if (NULL == child)
3567 {
3568 GNUNET_break (0);
3569 continue;
3570 }
3572 }
3573}
3574
3575
3585static int
3587 const struct GNUNET_HashCode *key,
3588 void *value)
3589{
3590 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3592
3593 if (GNUNET_YES == state->reachable)
3594 /* already visited and marked */
3595 return GNUNET_YES;
3596
3597 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3598 (GNUNET_NO == state->accepting) )
3599 /* not directly reachable */
3600 return GNUNET_YES;
3601
3603 return GNUNET_YES;
3604}
3605
3606
3617static int
3618iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3619{
3620 struct client_iterator *ci = cls;
3622
3623 if (GNUNET_YES == state->reachable)
3624 {
3625 ci->iterator (ci->iterator_cls,
3626 key,
3627 state->proof,
3628 state->accepting,
3629 state->num_edges,
3630 state->edges);
3631 }
3632 GNUNET_free (state->edges);
3633 GNUNET_free (state->proof);
3635 return GNUNET_YES;
3636}
3637
3638
3639void
3642 void *iterator_cls)
3643{
3645 struct client_iterator ci;
3646
3648 ci.iterator = iterator;
3650
3654
3656}
3657
3658
3659/* end of regex_internal.c */
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:143
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:38
static int ret
Final status code.
Definition: gnunet-arm.c:93
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:33
struct GNUNET_HashCode key
The key used in the DHT.
static struct GNUNET_FS_Handle * ctx
static char * value
Value of the record to add/remove.
enum State state
current state of profiling
static int result
Global testing status.
static uint64_t proof
Definition: gnunet-scrypt.c:49
static pid_t child
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_SCHEDULER_Task * t
Main task.
API to access regex service to advertise capabilities via regex and discover respective peers using m...
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
#define GNUNET_log(kind,...)
#define GNUNET_MAX(a, b)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
@ GNUNET_SYSERR
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
#define max(x, y)
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static unsigned int size
Size of the "table".
Definition: peer.c:68
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given 'regex' of length 'len'.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given 'dfa'.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given 'string' against the given compiled regex.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
static int state_compare(const void *a, const void *b)
Compare two states.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUN...
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that gets passed to automaton traversal and is called before each next traversal from state ...
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
@ NFA
@ DFA
library to parse regular expressions into dfa
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
Internal representation of the hash map.
A 512-bit hashcode.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
Edge representation.
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.
Automaton representation.
struct REGEX_INTERNAL_State * start
First state of the automaton.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
Set of states using MDLL API.
struct REGEX_INTERNAL_State * head
MDLL of states.
struct REGEX_INTERNAL_State * tail
MDLL of states.
unsigned int len
Length of the MDLL.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
char * proof
Proof for this state.
unsigned int transition_count
Number of transitions from this state to other states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
unsigned int dfs_id
Linear state ID acquired by depth-first-search.
char * name
Human readable name of the state.
int marked
Marking of the state.
unsigned int id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int contained
Marking the state as contained.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
int index
Used for SCC detection.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
Context for adding strided transitions to a DFA.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
const unsigned int stride
Length of the strides.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.
String container for faster string operations.
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges