GNUnet 0.22.0
regex_internal.c
Go to the documentation of this file.
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
26#include "platform.h"
27#include "gnunet_util_lib.h"
29#include "regex_internal_lib.h"
30#include "regex_internal.h"
31
32
37#define REGEX_DEBUG_DFA GNUNET_NO
38
43{
48
53
57 unsigned int len;
58};
59
60
67static void
70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
75
76
85static int
86nullstrcmp (const char *str1, const char *str2)
87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}
95
96
106static void
108 struct REGEX_INTERNAL_State *from_state,
109 const char *label,
110 struct REGEX_INTERNAL_State *to_state)
111{
113 struct REGEX_INTERNAL_Transition *oth;
114
115 if (NULL == from_state)
116 {
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118 return;
119 }
120
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 {
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
126 return;
127 }
128
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 {
132 if (0 < nullstrcmp (oth->label, label))
133 break;
134 }
135
137 if (NULL != ctx)
138 t->id = ctx->transition_id++;
139 if (NULL != label)
140 t->label = GNUNET_strdup (label);
141 else
142 t->label = NULL;
143 t->to_state = to_state;
144 t->from_state = from_state;
145
146 /* Add outgoing transition to 'from_state' */
150 oth,
151 t);
152}
153
154
161static void
163 struct REGEX_INTERNAL_Transition *transition)
164{
165 if ((NULL == state) || (NULL == transition))
166 return;
167
168 if (transition->from_state != state)
169 return;
170
171 GNUNET_free (transition->label);
172
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
176 transition);
177
178 GNUNET_free (transition);
179}
180
181
192static int
193state_compare (const void *a, const void *b)
194{
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197
198 return (*s1)->id - (*s2)->id;
199}
200
201
211static unsigned int
213{
215 unsigned int count;
216
217 if (NULL == s)
218 return 0;
219
220 count = 0;
221
222 for (t = s->transitions_head; NULL != t; t = t->next)
223 {
224 if (NULL != t->to_state)
225 {
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
228 count++;
229 }
230 }
231 return count;
232}
233
234
243static int
245 struct REGEX_INTERNAL_StateSet *sset2)
246{
247 int result;
248 unsigned int i;
249
250 if ((NULL == sset1) || (NULL == sset2))
251 return 1;
252
253 result = sset1->off - sset2->off;
254 if (result < 0)
255 return -1;
256 if (result > 0)
257 return 1;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260 break;
261 return result;
262}
263
264
270static void
272{
273 GNUNET_array_grow (set->states, set->size, 0);
274 set->off = 0;
275}
276
277
284static void
286{
287 if (NULL == a)
288 return;
289
290 a->start = NULL;
291 a->end = NULL;
292 a->states_head = NULL;
293 a->states_tail = NULL;
294 a->state_count = 0;
295 GNUNET_free (a);
296}
297
298
304static void
306{
308 struct REGEX_INTERNAL_Transition *next_t;
309
310 if (NULL == s)
311 return;
312
313 GNUNET_free (s->name);
314 GNUNET_free (s->proof);
316 for (t = s->transitions_head; NULL != t; t = next_t)
317 {
318 next_t = t->next;
320 }
321
322 GNUNET_free (s);
323}
324
325
334static void
336 struct REGEX_INTERNAL_State *s)
337{
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
341
342 if ((NULL == a) || (NULL == s))
343 return;
344
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347 {
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
350 {
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
354 }
355 }
356
357 /* remove state */
359 a->state_count--;
360
362}
363
364
374static void
376 struct REGEX_INTERNAL_Automaton *a,
377 struct REGEX_INTERNAL_State *s1,
378 struct REGEX_INTERNAL_State *s2)
379{
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
383 struct REGEX_INTERNAL_Transition *t_next;
384 int is_dup;
385
386 if (s1 == s2)
387 return;
388
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392 {
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394 {
395 t_next = t_check->next;
396
397 if (s2 == t_check->to_state)
398 {
399 is_dup = GNUNET_NO;
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401 {
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403 is_dup = GNUNET_YES;
404 }
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
407 else
408 state_remove_transition (t_check->from_state, t_check);
409 }
410 }
411 }
412
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415 {
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418 }
419
420 /* 3. Rename s1 to {s1,s2} */
421#if REGEX_DEBUG_DFA
422 char *new_name;
423
424 new_name = s1->name;
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
427#endif
428
429 /* remove state */
431 a->state_count--;
433}
434
435
443static void
445 struct REGEX_INTERNAL_State *s)
446{
448 a->state_count++;
449}
450
451
466static void
468 int *marks,
469 unsigned int *count,
471 void *check_cls,
473 void *action_cls)
474{
476
477 if (GNUNET_YES == marks[s->traversal_id])
478 return;
479
480 marks[s->traversal_id] = GNUNET_YES;
481
482 if (NULL != action)
483 action (action_cls, *count, s);
484
485 (*count)++;
486
487 for (t = s->transitions_head; NULL != t; t = t->next)
488 {
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491 {
492 automaton_state_traverse (t->to_state,
493 marks,
494 count,
495 check,
496 check_cls,
497 action,
498 action_cls);
499 }
500 }
501}
502
503
504void
508 void *check_cls,
510 void *action_cls)
511{
512 unsigned int count;
513 struct REGEX_INTERNAL_State *s;
514
515 if ((NULL == a) || (0 == a->state_count))
516 return;
517
518 int marks[a->state_count];
519
520 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521 s = s->next, count++)
522 {
523 s->traversal_id = count;
524 marks[s->traversal_id] = GNUNET_NO;
525 }
526
527 count = 0;
528
529 if (NULL == start)
530 s = a->start;
531 else
532 s = start;
533
535 marks,
536 &count,
537 check,
538 check_cls,
539 action,
540 action_cls);
541}
542
543
548{
553 char *sbuf;
554
558 char *abuf;
559
563 size_t slen;
564
568 unsigned int blen;
569
573 int16_t null_flag;
574
584 int16_t synced;
585};
586
587
596static int
597sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
598{
599 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
600 return 0;
601 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
602 return -1;
603 if (s1->slen != s2->slen)
604 return -1;
605 if (0 == s1->slen)
606 return 0;
607 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
608}
609
610
619static int
620sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
621{
622 if (s1->slen != s2->slen)
623 return -1;
624 if (0 == s1->slen)
625 return 0;
626 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
627}
628
629
637static void
638sb_realloc (struct StringBuffer *ret, size_t nlen)
639{
640 char *old;
641
642 GNUNET_assert (nlen >= ret->slen);
643 old = ret->abuf;
644 ret->abuf = GNUNET_malloc (nlen);
645 ret->blen = nlen;
646 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
647 ret->sbuf = ret->abuf;
648 GNUNET_free (old);
649}
650
651
658static void
659sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
660{
661 if (GNUNET_YES == ret->null_flag)
662 ret->slen = 0;
663 ret->null_flag = GNUNET_NO;
664 if (ret->blen < sarg->slen + ret->slen)
665 sb_realloc (ret, ret->blen + sarg->slen + 128);
666 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
667 ret->slen += sarg->slen;
668}
669
670
677static void
678sb_append_cstr (struct StringBuffer *ret, const char *cstr)
679{
680 size_t cstr_len = strlen (cstr);
681
682 if (GNUNET_YES == ret->null_flag)
683 ret->slen = 0;
684 ret->null_flag = GNUNET_NO;
685 if (ret->blen < cstr_len + ret->slen)
686 sb_realloc (ret, ret->blen + cstr_len + 128);
687 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
688 ret->slen += cstr_len;
689}
690
691
702static void
703sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
704{
705 char *temp;
706
707 if (GNUNET_YES == ret->null_flag)
708 ret->slen = 0;
709 ret->null_flag = GNUNET_NO;
710 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
711 GNUNET_snprintf (temp,
712 ret->slen + extra_chars + 1,
713 format,
714 (int) ret->slen,
715 ret->sbuf);
716 GNUNET_free (ret->abuf);
717 ret->abuf = temp;
718 ret->sbuf = temp;
719 ret->blen = ret->slen + extra_chars + 1;
720 ret->slen = ret->slen + extra_chars;
721}
722
723
733static void
735 const char *format,
736 size_t extra_chars,
737 const struct StringBuffer *sarg)
738{
739 if (ret->blen < sarg->slen + extra_chars + 1)
740 sb_realloc (ret, sarg->slen + extra_chars + 1);
741 ret->null_flag = GNUNET_NO;
742 ret->sbuf = ret->abuf;
743 ret->slen = sarg->slen + extra_chars;
744 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
745}
746
747
757static void
759 const char *format,
760 size_t extra_chars,
761 const struct StringBuffer *sarg1,
762 const struct StringBuffer *sarg2)
763{
764 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
765 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
766 ret->null_flag = GNUNET_NO;
767 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
768 ret->sbuf = ret->abuf;
769 GNUNET_snprintf (ret->sbuf,
770 ret->blen,
771 format,
772 (int) sarg1->slen,
773 sarg1->sbuf,
774 (int) sarg2->slen,
775 sarg2->sbuf);
776}
777
778
790static void
792 const char *format,
793 size_t extra_chars,
794 const struct StringBuffer *sarg1,
795 const struct StringBuffer *sarg2,
796 const struct StringBuffer *sarg3)
797{
798 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
799 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
800 ret->null_flag = GNUNET_NO;
801 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
802 ret->sbuf = ret->abuf;
803 GNUNET_snprintf (ret->sbuf,
804 ret->blen,
805 format,
806 (int) sarg1->slen,
807 sarg1->sbuf,
808 (int) sarg2->slen,
809 sarg2->sbuf,
810 (int) sarg3->slen,
811 sarg3->sbuf);
812}
813
814
821static void
823{
824 GNUNET_array_grow (sb->abuf, sb->blen, 0);
825 sb->slen = 0;
826 sb->sbuf = NULL;
827 sb->null_flag = GNUNET_YES;
828}
829
830
837static void
838sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
839
840{
841 out->null_flag = in->null_flag;
842 if (GNUNET_YES == out->null_flag)
843 return;
844 if (out->blen < in->slen)
845 {
846 GNUNET_array_grow (out->abuf, out->blen, in->slen);
847 }
848 out->sbuf = out->abuf;
849 out->slen = in->slen;
850 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
851}
852
853
860static void
861sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
862{
863 if (NULL == cstr)
864 {
865 out->null_flag = GNUNET_YES;
866 return;
867 }
868 out->null_flag = GNUNET_NO;
869 out->slen = strlen (cstr);
870 if (out->blen < out->slen)
871 {
872 GNUNET_array_grow (out->abuf, out->blen, out->slen);
873 }
874 out->sbuf = out->abuf;
875 GNUNET_memcpy (out->sbuf, cstr, out->slen);
876}
877
878
887static int
889{
890 size_t slen;
891 const char *op;
892 const char *cl;
893 const char *pos;
894 const char *end;
895 unsigned int cnt;
896
897 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
898 return GNUNET_NO;
899 pos = str->sbuf;
900 if ('(' != pos[0])
901 return GNUNET_YES;
902 end = str->sbuf + slen;
903 cnt = 1;
904 pos++;
905 while (cnt > 0)
906 {
907 cl = memchr (pos, ')', end - pos);
908 if (NULL == cl)
909 {
910 GNUNET_break (0);
911 return GNUNET_YES;
912 }
913 /* while '(' before ')', count opening parens */
914 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
915 {
916 cnt++;
917 pos = op + 1;
918 }
919 /* got ')' first */
920 cnt--;
921 pos = cl + 1;
922 }
923 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
924}
925
926
936static void
938{
939 size_t slen;
940 const char *pos;
941 const char *end;
942 const char *sbuf;
943 const char *op;
944 const char *cp;
945 unsigned int cnt;
946
947 if (0)
948 return;
949 sbuf = str->sbuf;
950 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
951 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
952 return;
953 cnt = 0;
954 pos = &sbuf[1];
955 end = &sbuf[slen - 1];
956 op = memchr (pos, '(', end - pos);
957 cp = memchr (pos, ')', end - pos);
958 while (NULL != cp)
959 {
960 while ((NULL != op) && (op < cp))
961 {
962 cnt++;
963 pos = op + 1;
964 op = memchr (pos, '(', end - pos);
965 }
966 while ((NULL != cp) && ((NULL == op) || (cp < op)))
967 {
968 if (0 == cnt)
969 return; /* can't strip parens */
970 cnt--;
971 pos = cp + 1;
972 cp = memchr (pos, ')', end - pos);
973 }
974 }
975 if (0 != cnt)
976 {
977 GNUNET_break (0);
978 return;
979 }
980 str->sbuf++;
981 str->slen -= 2;
982}
983
984
993static int
994has_epsilon (const struct StringBuffer *str)
995{
996 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
997 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
998 (')' == str->sbuf[str->slen - 1]);
999}
1000
1001
1011static void
1012remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1013{
1014 if (GNUNET_YES == str->null_flag)
1015 {
1016 ret->null_flag = GNUNET_YES;
1017 return;
1018 }
1019 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1020 (')' == str->sbuf[str->slen - 1]))
1021 {
1022 /* remove epsilon */
1023 if (ret->blen < str->slen - 3)
1024 {
1025 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1026 }
1027 ret->sbuf = ret->abuf;
1028 ret->slen = str->slen - 3;
1029 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1030 return;
1031 }
1032 sb_strdup (ret, str);
1033}
1034
1035
1045static int
1046sb_strncmp (const struct StringBuffer *str1,
1047 const struct StringBuffer *str2,
1048 size_t n)
1049{
1050 size_t max;
1051
1052 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1053 return -1;
1054 max = GNUNET_MAX (str1->slen, str2->slen);
1055 if (max > n)
1056 max = n;
1057 return memcmp (str1->sbuf, str2->sbuf, max);
1058}
1059
1060
1070static int
1071sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1072{
1073 if (str1->slen < n)
1074 return -1;
1075 return memcmp (str1->sbuf, str2, n);
1076}
1077
1078
1086static void
1087sb_init (struct StringBuffer *sb, size_t n)
1088{
1089 sb->null_flag = GNUNET_NO;
1090 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1091 sb->blen = n;
1092 sb->slen = 0;
1093}
1094
1095
1105static int
1106sb_strkcmp (const struct StringBuffer *str1,
1107 const struct StringBuffer *str2,
1108 size_t k)
1109{
1110 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1111 (k > str1->slen) || (str1->slen - k != str2->slen))
1112 return -1;
1113 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1114}
1115
1116
1125static void
1126number_states (void *cls,
1127 const unsigned int count,
1128 struct REGEX_INTERNAL_State *s)
1129{
1130 struct REGEX_INTERNAL_State **states = cls;
1131
1132 s->dfs_id = count;
1133 if (NULL != states)
1134 states[count] = s;
1135}
1136
1137
1138#define PRIS(a) \
1139 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1140 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1141
1142
1157static void
1159 const struct StringBuffer *R_last_ik,
1160 const struct StringBuffer *R_last_kk,
1161 const struct StringBuffer *R_last_kj,
1162 struct StringBuffer *R_cur_ij,
1163 struct StringBuffer *R_cur_l,
1164 struct StringBuffer *R_cur_r)
1165{
1166 struct StringBuffer R_temp_ij;
1167 struct StringBuffer R_temp_ik;
1168 struct StringBuffer R_temp_kj;
1169 struct StringBuffer R_temp_kk;
1170 int eps_check;
1171 int ij_ik_cmp;
1172 int ij_kj_cmp;
1173 int ik_kk_cmp;
1174 int kk_kj_cmp;
1175 int clean_ik_kk_cmp;
1176 int clean_kk_kj_cmp;
1177 size_t length;
1178 size_t length_l;
1179 size_t length_r;
1180
1181 /*
1182 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1183 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1184 * R_cur_ij = R_cur_l | R_cur_r
1185 * R_cur_l == R^{(k-1)}_{ij}
1186 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1187 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1188 ((GNUNET_YES == R_last_ik->null_flag) ||
1189 (GNUNET_YES == R_last_kj->null_flag)))
1190 {
1191 /* R^{(k)}_{ij} = N | N */
1192 R_cur_ij->null_flag = GNUNET_YES;
1193 R_cur_ij->synced = GNUNET_NO;
1194 return;
1195 }
1196
1197 if ((GNUNET_YES == R_last_ik->null_flag) ||
1198 (GNUNET_YES == R_last_kj->null_flag))
1199 {
1200 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1201 if (GNUNET_YES == R_last_ij->synced)
1202 {
1203 R_cur_ij->synced = GNUNET_YES;
1204 R_cur_ij->null_flag = GNUNET_NO;
1205 return;
1206 }
1207 R_cur_ij->synced = GNUNET_YES;
1208 sb_strdup (R_cur_ij, R_last_ij);
1209 return;
1210 }
1211 R_cur_ij->synced = GNUNET_NO;
1212
1213 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1214 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1215
1216 R_cur_r->null_flag = GNUNET_YES;
1217 R_cur_r->slen = 0;
1218 R_cur_l->null_flag = GNUNET_YES;
1219 R_cur_l->slen = 0;
1220
1221 /* cache results from strcmp, we might need these many times */
1222 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1223 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1224 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1225 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1226
1227 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1228 * as parentheses, so we can better compare the contents */
1229
1230 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1231 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1232 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1233 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1234 remove_epsilon (R_last_ik, &R_temp_ik);
1235 remove_epsilon (R_last_kk, &R_temp_kk);
1236 remove_epsilon (R_last_kj, &R_temp_kj);
1237 remove_parentheses (&R_temp_ik);
1238 remove_parentheses (&R_temp_kk);
1239 remove_parentheses (&R_temp_kj);
1240 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1241 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1242
1243 /* construct R_cur_l (and, if necessary R_cur_r) */
1244 if (GNUNET_YES != R_last_ij->null_flag)
1245 {
1246 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1247 * as parentheses, so we can better compare the contents */
1248 remove_epsilon (R_last_ij, &R_temp_ij);
1249 remove_parentheses (&R_temp_ij);
1250
1251 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1252 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1253 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1254 {
1255 if (0 == R_temp_ij.slen)
1256 {
1257 R_cur_r->null_flag = GNUNET_NO;
1258 }
1259 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1260 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1261 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1262 {
1263 /*
1264 * a|(e|a)a*(e|a) = a*
1265 * a|(e|a)(e|a)*(e|a) = a*
1266 * (e|a)|aa*a = a*
1267 * (e|a)|aa*(e|a) = a*
1268 * (e|a)|(e|a)a*a = a*
1269 * (e|a)|(e|a)a*(e|a) = a*
1270 * (e|a)|(e|a)(e|a)*(e|a) = a*
1271 */
1272 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1273 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1274 else
1275 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1276 }
1277 else
1278 {
1279 /*
1280 * a|aa*a = a+
1281 * a|(e|a)a*a = a+
1282 * a|aa*(e|a) = a+
1283 * a|(e|a)(e|a)*a = a+
1284 * a|a(e|a)*(e|a) = a+
1285 */
1286 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1287 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1288 else
1289 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1290 }
1291 }
1292 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1293 (0 != clean_ik_kk_cmp))
1294 {
1295 /* a|ab*b = ab* */
1296 if (0 == R_last_kk->slen)
1297 sb_strdup (R_cur_r, R_last_ij);
1298 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1299 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1300 else
1301 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1302 R_cur_l->null_flag = GNUNET_YES;
1303 }
1304 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1305 (0 != clean_kk_kj_cmp))
1306 {
1307 /* a|bb*a = b*a */
1308 if (R_last_kk->slen < 1)
1309 {
1310 sb_strdup (R_cur_r, R_last_kj);
1311 }
1312 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1313 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1314 else
1315 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1316
1317 R_cur_l->null_flag = GNUNET_YES;
1318 }
1319 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1320 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1321 {
1322 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1323 if (needs_parentheses (&R_temp_kk))
1324 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1325 else
1326 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1327 R_cur_l->null_flag = GNUNET_YES;
1328 }
1329 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1330 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1331 {
1332 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1333 if (needs_parentheses (&R_temp_kk))
1334 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1335 else
1336 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1337 R_cur_l->null_flag = GNUNET_YES;
1338 }
1339 else
1340 {
1341 sb_strdup (R_cur_l, R_last_ij);
1342 remove_parentheses (R_cur_l);
1343 }
1344 }
1345 else
1346 {
1347 /* we have no left side */
1348 R_cur_l->null_flag = GNUNET_YES;
1349 }
1350
1351 /* construct R_cur_r, if not already constructed */
1352 if (GNUNET_YES == R_cur_r->null_flag)
1353 {
1354 length = R_temp_kk.slen - R_last_ik->slen;
1355
1356 /* a(ba)*bx = (ab)+x */
1357 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1358 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1359 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1360 (0 < R_last_ik->slen) &&
1361 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1362 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1363 {
1364 struct StringBuffer temp_a;
1365 struct StringBuffer temp_b;
1366
1367 sb_init (&temp_a, length);
1368 sb_init (&temp_b, R_last_kj->slen - length);
1369
1370 length_l = length;
1371 temp_a.sbuf = temp_a.abuf;
1372 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1373 temp_a.slen = length_l;
1374
1375 length_r = R_last_kj->slen - length;
1376 temp_b.sbuf = temp_b.abuf;
1377 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1378 temp_b.slen = length_r;
1379
1380 /* e|(ab)+ = (ab)* */
1381 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1382 (0 == temp_b.slen))
1383 {
1384 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1385 sb_free (R_cur_l);
1386 R_cur_l->null_flag = GNUNET_YES;
1387 }
1388 else
1389 {
1390 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1391 }
1392 sb_free (&temp_a);
1393 sb_free (&temp_b);
1394 }
1395 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1396 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1397 {
1398 /*
1399 * (e|a)a*(e|a) = a*
1400 * (e|a)(e|a)*(e|a) = a*
1401 */
1402 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1403 {
1404 if (needs_parentheses (&R_temp_kk))
1405 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1406 else
1407 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1408 }
1409 /* aa*a = a+a */
1410 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1411 (! has_epsilon (R_last_ik)))
1412 {
1413 if (needs_parentheses (&R_temp_kk))
1414 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1415 else
1416 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1417 }
1418 /*
1419 * (e|a)a*a = a+
1420 * aa*(e|a) = a+
1421 * a(e|a)*(e|a) = a+
1422 * (e|a)a*a = a+
1423 */
1424 else
1425 {
1426 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1427 + has_epsilon (R_last_kj));
1428
1429 if (1 == eps_check)
1430 {
1431 if (needs_parentheses (&R_temp_kk))
1432 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1433 else
1434 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1435 }
1436 }
1437 }
1438 /*
1439 * aa*b = a+b
1440 * (e|a)(e|a)*b = a*b
1441 */
1442 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1443 {
1444 if (has_epsilon (R_last_ik))
1445 {
1446 if (needs_parentheses (&R_temp_kk))
1447 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1448 else
1449 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1450 }
1451 else
1452 {
1453 if (needs_parentheses (&R_temp_kk))
1454 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1455 else
1456 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1457 }
1458 }
1459 /*
1460 * ba*a = ba+
1461 * b(e|a)*(e|a) = ba*
1462 */
1463 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1464 {
1465 if (has_epsilon (R_last_kj))
1466 {
1467 if (needs_parentheses (&R_temp_kk))
1468 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1469 else
1470 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1471 }
1472 else
1473 {
1474 if (needs_parentheses (&R_temp_kk))
1475 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1476 else
1477 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1478 }
1479 }
1480 else
1481 {
1482 if (0 < R_temp_kk.slen)
1483 {
1484 if (needs_parentheses (&R_temp_kk))
1485 {
1486 sb_printf3 (R_cur_r,
1487 "%.*s(%.*s)*%.*s",
1488 3,
1489 R_last_ik,
1490 &R_temp_kk,
1491 R_last_kj);
1492 }
1493 else
1494 {
1495 sb_printf3 (R_cur_r,
1496 "%.*s%.*s*%.*s",
1497 1,
1498 R_last_ik,
1499 &R_temp_kk,
1500 R_last_kj);
1501 }
1502 }
1503 else
1504 {
1505 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1506 }
1507 }
1508 }
1509 sb_free (&R_temp_ij);
1510 sb_free (&R_temp_ik);
1511 sb_free (&R_temp_kk);
1512 sb_free (&R_temp_kj);
1513
1514 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1515 {
1516 R_cur_ij->null_flag = GNUNET_YES;
1517 return;
1518 }
1519
1520 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1521 {
1522 struct StringBuffer tmp;
1523
1524 tmp = *R_cur_ij;
1525 *R_cur_ij = *R_cur_l;
1526 *R_cur_l = tmp;
1527 return;
1528 }
1529
1530 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1531 {
1532 struct StringBuffer tmp;
1533
1534 tmp = *R_cur_ij;
1535 *R_cur_ij = *R_cur_r;
1536 *R_cur_r = tmp;
1537 return;
1538 }
1539
1540 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1541 {
1542 struct StringBuffer tmp;
1543
1544 tmp = *R_cur_ij;
1545 *R_cur_ij = *R_cur_l;
1546 *R_cur_l = tmp;
1547 return;
1548 }
1549 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1550}
1551
1552
1564static int
1566{
1567 unsigned int n = a->state_count;
1568 struct REGEX_INTERNAL_State *states[n];
1569 struct StringBuffer *R_last;
1570 struct StringBuffer *R_cur;
1571 struct StringBuffer R_cur_r;
1572 struct StringBuffer R_cur_l;
1573 struct StringBuffer *R_swap;
1575 struct StringBuffer complete_regex;
1576 unsigned int i;
1577 unsigned int j;
1578 unsigned int k;
1579
1580 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1581 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1582 if ((NULL == R_last) || (NULL == R_cur))
1583 {
1585 GNUNET_free (R_cur);
1586 GNUNET_free (R_last);
1587 return GNUNET_SYSERR;
1588 }
1589
1590 /* create depth-first numbering of the states, initializes 'state' */
1592 a->start,
1593 NULL,
1594 NULL,
1596 states);
1597
1598 for (i = 0; i < n; i++)
1599 GNUNET_assert (NULL != states[i]);
1600 for (i = 0; i < n; i++)
1601 for (j = 0; j < n; j++)
1602 R_last[i * n + j].null_flag = GNUNET_YES;
1603
1604 /* Compute regular expressions of length "1" between each pair of states */
1605 for (i = 0; i < n; i++)
1606 {
1607 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1608 {
1609 j = t->to_state->dfs_id;
1610 if (GNUNET_YES == R_last[i * n + j].null_flag)
1611 {
1612 sb_strdup_cstr (&R_last[i * n + j], t->label);
1613 }
1614 else
1615 {
1616 sb_append_cstr (&R_last[i * n + j], "|");
1617 sb_append_cstr (&R_last[i * n + j], t->label);
1618 }
1619 }
1620 /* add self-loop: i is reachable from i via epsilon-transition */
1621 if (GNUNET_YES == R_last[i * n + i].null_flag)
1622 {
1623 R_last[i * n + i].slen = 0;
1624 R_last[i * n + i].null_flag = GNUNET_NO;
1625 }
1626 else
1627 {
1628 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1629 }
1630 }
1631 for (i = 0; i < n; i++)
1632 for (j = 0; j < n; j++)
1633 if (needs_parentheses (&R_last[i * n + j]))
1634 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1635 /* Compute regular expressions of length "k" between each pair of states per
1636 * induction */
1637 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1638 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1639 for (k = 0; k < n; k++)
1640 {
1641 for (i = 0; i < n; i++)
1642 {
1643 for (j = 0; j < n; j++)
1644 {
1645 /* Basis for the recursion:
1646 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1647 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1648 */
1649
1650 /* Create R_cur[i][j] and simplify the expression */
1651 automaton_create_proofs_simplify (&R_last[i * n + j],
1652 &R_last[i * n + k],
1653 &R_last[k * n + k],
1654 &R_last[k * n + j],
1655 &R_cur[i * n + j],
1656 &R_cur_l,
1657 &R_cur_r);
1658 }
1659 }
1660 /* set R_last = R_cur */
1661 R_swap = R_last;
1662 R_last = R_cur;
1663 R_cur = R_swap;
1664 /* clear 'R_cur' for next iteration */
1665 for (i = 0; i < n; i++)
1666 for (j = 0; j < n; j++)
1667 R_cur[i * n + j].null_flag = GNUNET_YES;
1668 }
1669 sb_free (&R_cur_l);
1670 sb_free (&R_cur_r);
1671 /* assign proofs and hashes */
1672 for (i = 0; i < n; i++)
1673 {
1674 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1675 {
1676 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1677 R_last[a->start->dfs_id * n + i].slen);
1678 GNUNET_CRYPTO_hash (states[i]->proof,
1679 strlen (states[i]->proof),
1680 &states[i]->hash);
1681 }
1682 }
1683
1684 /* complete regex for whole DFA: union of all pairs (start state/accepting
1685 * state(s)). */
1686 sb_init (&complete_regex, 16 * n);
1687 for (i = 0; i < n; i++)
1688 {
1689 if (states[i]->accepting)
1690 {
1691 if ((0 == complete_regex.slen) &&
1692 (0 < R_last[a->start->dfs_id * n + i].slen))
1693 {
1694 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1695 }
1696 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1697 (0 < R_last[a->start->dfs_id * n + i].slen))
1698 {
1699 sb_append_cstr (&complete_regex, "|");
1700 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1701 }
1702 }
1703 }
1704 a->canonical_regex =
1705 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1706
1707 /* cleanup */
1708 sb_free (&complete_regex);
1709 for (i = 0; i < n; i++)
1710 for (j = 0; j < n; j++)
1711 {
1712 sb_free (&R_cur[i * n + j]);
1713 sb_free (&R_last[i * n + j]);
1714 }
1715 GNUNET_free (R_cur);
1716 GNUNET_free (R_last);
1717 return GNUNET_OK;
1718}
1719
1720
1730static struct REGEX_INTERNAL_State *
1732 struct REGEX_INTERNAL_StateSet *nfa_states)
1733{
1734 struct REGEX_INTERNAL_State *s;
1735 char *pos;
1736 size_t len;
1737 struct REGEX_INTERNAL_State *cstate;
1738 struct REGEX_INTERNAL_Transition *ctran;
1739 unsigned int i;
1740
1741 s = GNUNET_new (struct REGEX_INTERNAL_State);
1742 s->id = ctx->state_id++;
1743 s->index = -1;
1744 s->lowlink = -1;
1745
1746 if (NULL == nfa_states)
1747 {
1748 GNUNET_asprintf (&s->name, "s%i", s->id);
1749 return s;
1750 }
1751
1752 s->nfa_set = *nfa_states;
1753
1754 if (nfa_states->off < 1)
1755 return s;
1756
1757 /* Create a name based on 'nfa_states' */
1758 len = nfa_states->off * 14 + 4;
1759 s->name = GNUNET_malloc (len);
1760 strcat (s->name, "{");
1761 pos = s->name + 1;
1762
1763 for (i = 0; i < nfa_states->off; i++)
1764 {
1765 cstate = nfa_states->states[i];
1766 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1767 pos += strlen (pos);
1768
1769 /* Add a transition for each distinct label to NULL state */
1770 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1771 if (NULL != ctran->label)
1772 state_add_transition (ctx, s, ctran->label, NULL);
1773
1774 /* If the nfa_states contain an accepting state, the new dfa state is also
1775 * accepting. */
1776 if (cstate->accepting)
1777 s->accepting = 1;
1778 }
1779 pos[-1] = '}';
1780 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1781
1782 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1783 return s;
1784}
1785
1786
1800static unsigned int
1801dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1802{
1804 struct REGEX_INTERNAL_State *new_s;
1805 unsigned int len;
1806 unsigned int max_len;
1807
1808 if (NULL == s)
1809 return 0;
1810
1811 new_s = NULL;
1812 max_len = 0;
1813 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1814 {
1815 len = strlen (t->label);
1816
1817 if (0 == strncmp (t->label, str, len))
1818 {
1819 if (len >= max_len)
1820 {
1821 max_len = len;
1822 new_s = t->to_state;
1823 }
1824 }
1825 }
1826
1827 *s = new_s;
1828 return max_len;
1829}
1830
1831
1841static void
1842mark_states (void *cls,
1843 const unsigned int count,
1844 struct REGEX_INTERNAL_State *s)
1845{
1846 s->marked = GNUNET_YES;
1847}
1848
1849
1856static void
1858{
1859 struct REGEX_INTERNAL_State *s;
1860 struct REGEX_INTERNAL_State *s_next;
1861
1862 /* 1. unmark all states */
1863 for (s = a->states_head; NULL != s; s = s->next)
1864 s->marked = GNUNET_NO;
1865
1866 /* 2. traverse dfa from start state and mark all visited states */
1868 a->start,
1869 NULL,
1870 NULL,
1871 &mark_states,
1872 NULL);
1873
1874 /* 3. delete all states that were not visited */
1875 for (s = a->states_head; NULL != s; s = s_next)
1876 {
1877 s_next = s->next;
1878 if (GNUNET_NO == s->marked)
1880 }
1881}
1882
1883
1890static void
1892{
1893 struct REGEX_INTERNAL_State *s;
1894 struct REGEX_INTERNAL_State *s_next;
1896 int dead;
1897
1898 GNUNET_assert (DFA == a->type);
1899
1900 for (s = a->states_head; NULL != s; s = s_next)
1901 {
1902 s_next = s->next;
1903
1904 if (s->accepting)
1905 continue;
1906
1907 dead = 1;
1908 for (t = s->transitions_head; NULL != t; t = t->next)
1909 {
1910 if ((NULL != t->to_state) && (t->to_state != s) )
1911 {
1912 dead = 0;
1913 break;
1914 }
1915 }
1916
1917 if (0 == dead)
1918 continue;
1919
1920 /* state s is dead, remove it */
1922 }
1923}
1924
1925
1933static int
1935 struct REGEX_INTERNAL_Automaton *a)
1936{
1937 uint32_t *table;
1938 struct REGEX_INTERNAL_State *s1;
1939 struct REGEX_INTERNAL_State *s2;
1940 struct REGEX_INTERNAL_Transition *t1;
1941 struct REGEX_INTERNAL_Transition *t2;
1942 struct REGEX_INTERNAL_State *s1_next;
1943 struct REGEX_INTERNAL_State *s2_next;
1944 int change;
1945 unsigned int num_equal_edges;
1946 unsigned int i;
1947 unsigned int state_cnt;
1948 unsigned long long idx;
1949 unsigned long long idx1;
1950
1951 if ((NULL == a) || (0 == a->state_count))
1952 {
1954 "Could not merge nondistinguishable states, automaton was NULL.\n");
1955 return GNUNET_SYSERR;
1956 }
1957
1958 state_cnt = a->state_count;
1960 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1961 if (NULL == table)
1962 {
1964 return GNUNET_SYSERR;
1965 }
1966
1967 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1968 s1->marked = i++;
1969
1970 /* Mark all pairs of accepting/!accepting states */
1971 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1972 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1973 if ((s1->accepting && ! s2->accepting) ||
1974 (! s1->accepting && s2->accepting))
1975 {
1976 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1977 table[idx / 32] |= (1U << (idx % 32));
1978 }
1979
1980 /* Find all equal states */
1981 change = 1;
1982 while (0 != change)
1983 {
1984 change = 0;
1985 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1986 {
1987 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1988 {
1989 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1990 if (0 != (table[idx / 32] & (1U << (idx % 32))))
1991 continue;
1992 num_equal_edges = 0;
1993 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1994 {
1995 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1996 {
1997 if (0 == strcmp (t1->label, t2->label))
1998 {
1999 num_equal_edges++;
2000 /* same edge, but targets definitively different, so we're different
2001 as well */
2002 if (t1->to_state->marked > t2->to_state->marked)
2003 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2004 + t2->to_state->marked;
2005 else
2006 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2007 + t1->to_state->marked;
2008 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2009 {
2010 table[idx / 32] |= (1U << (idx % 32));
2011 change = 1; /* changed a marker, need to run again */
2012 }
2013 }
2014 }
2015 }
2016 if ((num_equal_edges != s1->transition_count) ||
2017 (num_equal_edges != s2->transition_count))
2018 {
2019 /* Make sure ALL edges of possible equal states are the same */
2020 table[idx / 32] |= (1U << (idx % 32));
2021 change = 1; /* changed a marker, need to run again */
2022 }
2023 }
2024 }
2025 }
2026
2027 /* Merge states that are equal */
2028 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2029 {
2030 s1_next = s1->next;
2031 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2032 {
2033 s2_next = s2->next;
2034 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2035 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2036 automaton_merge_states (ctx, a, s1, s2);
2037 }
2038 }
2039
2041 return GNUNET_OK;
2042}
2043
2044
2053static int
2055 struct REGEX_INTERNAL_Automaton *a)
2056{
2057 if (NULL == a)
2058 return GNUNET_SYSERR;
2059
2060 GNUNET_assert (DFA == a->type);
2061
2062 /* 1. remove unreachable states */
2064
2065 /* 2. remove dead states */
2067
2068 /* 3. Merge nondistinguishable states */
2070 return GNUNET_SYSERR;
2071 return GNUNET_OK;
2072}
2073
2074
2079{
2083 const unsigned int stride;
2084
2090
2095};
2096
2097
2108static void
2110 const unsigned int depth,
2111 char *label,
2113 struct REGEX_INTERNAL_State *s)
2114{
2115 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2117 char *new_label;
2118
2119 if (depth == ctx->stride)
2120 {
2122 t->label = GNUNET_strdup (label);
2123 t->to_state = s;
2124 t->from_state = start;
2125 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2126 ctx->transitions_tail,
2127 t);
2128 }
2129 else
2130 {
2131 for (t = s->transitions_head; NULL != t; t = t->next)
2132 {
2133 /* Do not consider self-loops, because it end's up in too many
2134 * transitions */
2135 if (t->to_state == t->from_state)
2136 continue;
2137
2138 if (NULL != label)
2139 {
2140 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2141 }
2142 else
2143 new_label = GNUNET_strdup (t->label);
2144
2146 (depth + 1),
2147 new_label,
2148 start,
2149 t->to_state);
2150 }
2151 }
2153}
2154
2155
2164static void
2166 const unsigned int count,
2167 struct REGEX_INTERNAL_State *s)
2168{
2169 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2170}
2171
2172
2180void
2182 struct REGEX_INTERNAL_Automaton *dfa,
2183 const unsigned int stride_len)
2184{
2185 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2187 struct REGEX_INTERNAL_Transition *t_next;
2188
2189 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2190 return;
2191
2192 /* Compute the new transitions of given stride_len */
2194 dfa->start,
2195 NULL,
2196 NULL,
2198 &ctx);
2199
2200 /* Add all the new transitions to the automaton. */
2201 for (t = ctx.transitions_head; NULL != t; t = t_next)
2202 {
2203 t_next = t->next;
2204 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2205 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2206 GNUNET_free (t->label);
2207 GNUNET_free (t);
2208 }
2209
2210 /* Mark this automaton as multistrided */
2212}
2213
2214
2228static void
2231 struct REGEX_INTERNAL_State *cur,
2232 char *label,
2233 unsigned int max_len,
2234 struct REGEX_INTERNAL_Transition **transitions_head,
2235 struct REGEX_INTERNAL_Transition **transitions_tail)
2236{
2238 char *new_label;
2239
2240
2241 if ((NULL != label) &&
2242 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2243 cur->accepting) ||
2244 (GNUNET_YES == cur->marked) ) ||
2245 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2246 label))) ||
2247 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2248 label)))))
2249 {
2251 t->label = GNUNET_strdup (label);
2252 t->to_state = cur;
2253 t->from_state = start;
2254 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2255
2256 if (GNUNET_NO == cur->marked)
2257 {
2259 cur,
2260 cur,
2261 NULL,
2262 max_len,
2263 transitions_head,
2264 transitions_tail);
2265 }
2266 return;
2267 }
2268 else if (cur != start)
2269 cur->contained = GNUNET_YES;
2270
2271 if ((GNUNET_YES == cur->marked) && (cur != start))
2272 return;
2273
2274 cur->marked = GNUNET_YES;
2275
2276
2277 for (t = cur->transitions_head; NULL != t; t = t->next)
2278 {
2279 if (NULL != label)
2280 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2281 else
2282 new_label = GNUNET_strdup (t->label);
2283
2284 if (t->to_state != cur)
2285 {
2287 start,
2288 t->to_state,
2289 new_label,
2290 max_len,
2291 transitions_head,
2292 transitions_tail);
2293 }
2294 GNUNET_free (new_label);
2295 }
2296}
2297
2298
2307static void
2309 struct REGEX_INTERNAL_Automaton *dfa,
2310 unsigned int max_len)
2311{
2312 struct REGEX_INTERNAL_State *s;
2313 struct REGEX_INTERNAL_State *s_next;
2315 struct REGEX_INTERNAL_Transition *t_next;
2316 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2317 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2318
2319 if (NULL == dfa)
2320 return;
2321
2322 /* Count the incoming transitions on each state. */
2323 for (s = dfa->states_head; NULL != s; s = s->next)
2324 {
2325 for (t = s->transitions_head; NULL != t; t = t->next)
2326 {
2327 if (NULL != t->to_state)
2328 t->to_state->incoming_transition_count++;
2329 }
2330 }
2331
2332 /* Unmark all states. */
2333 for (s = dfa->states_head; NULL != s; s = s->next)
2334 {
2335 s->marked = GNUNET_NO;
2336 s->contained = GNUNET_NO;
2337 }
2338
2339 /* Add strides and mark states that can be deleted. */
2341 dfa->start,
2342 dfa->start,
2343 NULL,
2344 max_len,
2345 &transitions_head,
2346 &transitions_tail);
2347
2348 /* Add all the new transitions to the automaton. */
2349 for (t = transitions_head; NULL != t; t = t_next)
2350 {
2351 t_next = t->next;
2352 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2353 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2354 GNUNET_free (t->label);
2355 GNUNET_free (t);
2356 }
2357
2358 /* Remove marked states (including their incoming and outgoing transitions). */
2359 for (s = dfa->states_head; NULL != s; s = s_next)
2360 {
2361 s_next = s->next;
2362 if (GNUNET_YES == s->contained)
2363 automaton_remove_state (dfa, s);
2364 }
2365}
2366
2367
2377static struct REGEX_INTERNAL_Automaton *
2379 struct REGEX_INTERNAL_State *end)
2380{
2381 struct REGEX_INTERNAL_Automaton *n;
2382
2384
2385 n->type = NFA;
2386 n->start = NULL;
2387 n->end = NULL;
2388 n->state_count = 0;
2389
2390 if ((NULL == start) || (NULL == end))
2391 return n;
2392
2395
2396 n->state_count = 2;
2397
2398 n->start = start;
2399 n->end = end;
2400
2401 return n;
2402}
2403
2404
2412static void
2416{
2417 struct REGEX_INTERNAL_State *s;
2418
2419 if ((NULL == n) || (NULL == states_head))
2420 {
2421 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2422 return;
2423 }
2424
2425 if (NULL == n->states_head)
2426 {
2427 n->states_head = states_head;
2428 n->states_tail = states_tail;
2429 return;
2430 }
2431
2432 if (NULL != states_head)
2433 {
2434 n->states_tail->next = states_head;
2435 n->states_tail = states_tail;
2436 }
2437
2438 for (s = states_head; NULL != s; s = s->next)
2439 n->state_count++;
2440}
2441
2442
2451static struct REGEX_INTERNAL_State *
2453{
2454 struct REGEX_INTERNAL_State *s;
2455
2456 s = GNUNET_new (struct REGEX_INTERNAL_State);
2457 s->id = ctx->state_id++;
2458 s->accepting = accepting;
2459 s->marked = GNUNET_NO;
2460 s->contained = 0;
2461 s->index = -1;
2462 s->lowlink = -1;
2463 s->scc_id = 0;
2464 s->name = NULL;
2465 GNUNET_asprintf (&s->name, "s%i", s->id);
2466
2467 return s;
2468}
2469
2470
2480static void
2482 struct REGEX_INTERNAL_Automaton *nfa,
2483 struct REGEX_INTERNAL_StateSet *states,
2484 const char *label)
2485{
2486 struct REGEX_INTERNAL_State *s;
2487 unsigned int i;
2488 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2489 struct REGEX_INTERNAL_State *clsstate;
2490 struct REGEX_INTERNAL_State *currentstate;
2491 struct REGEX_INTERNAL_Transition *ctran;
2492
2493 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2494 if (NULL == states)
2495 return;
2496
2497 for (i = 0; i < states->off; i++)
2498 {
2499 s = states->states[i];
2500
2501 /* Add start state to closure only for epsilon closure */
2502 if (NULL == label)
2503 state_set_append (ret, s);
2504
2505 /* initialize work stack */
2506 cls_stack.head = NULL;
2507 cls_stack.tail = NULL;
2508 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2509 cls_stack.len = 1;
2510
2511 while (NULL != (currentstate = cls_stack.tail))
2512 {
2514 cls_stack.head,
2515 cls_stack.tail,
2516 currentstate);
2517 cls_stack.len--;
2518 for (ctran = currentstate->transitions_head; NULL != ctran;
2519 ctran = ctran->next)
2520 {
2521 if (NULL == (clsstate = ctran->to_state))
2522 continue;
2523 if (0 != clsstate->contained)
2524 continue;
2525 if (0 != nullstrcmp (label, ctran->label))
2526 continue;
2527 state_set_append (ret, clsstate);
2529 cls_stack.head,
2530 cls_stack.tail,
2531 clsstate);
2532 cls_stack.len++;
2533 clsstate->contained = 1;
2534 }
2535 }
2536 }
2537 for (i = 0; i < ret->off; i++)
2538 ret->states[i]->contained = 0;
2539
2540 if (ret->off > 1)
2541 qsort (ret->states,
2542 ret->off,
2543 sizeof(struct REGEX_INTERNAL_State *),
2544 &state_compare);
2545}
2546
2547
2553static void
2555{
2556 struct REGEX_INTERNAL_Automaton *a;
2557 struct REGEX_INTERNAL_Automaton *b;
2558 struct REGEX_INTERNAL_Automaton *new_nfa;
2559
2560 b = ctx->stack_tail;
2561 GNUNET_assert (NULL != b);
2562 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2563 a = ctx->stack_tail;
2564 GNUNET_assert (NULL != a);
2565 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2566
2567 state_add_transition (ctx, a->end, NULL, b->start);
2568 a->end->accepting = 0;
2569 b->end->accepting = 1;
2570
2571 new_nfa = nfa_fragment_create (NULL, NULL);
2572 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2573 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2574 new_nfa->start = a->start;
2575 new_nfa->end = b->end;
2576 new_nfa->state_count += a->state_count + b->state_count;
2579
2580 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2581}
2582
2583
2589static void
2591{
2592 struct REGEX_INTERNAL_Automaton *a;
2593 struct REGEX_INTERNAL_Automaton *new_nfa;
2595 struct REGEX_INTERNAL_State *end;
2596
2597 a = ctx->stack_tail;
2598
2599 if (NULL == a)
2600 {
2601 GNUNET_log (
2603 "nfa_add_star_op failed, because there was no element on the stack");
2604 return;
2605 }
2606
2607 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2608
2609 start = nfa_state_create (ctx, 0);
2610 end = nfa_state_create (ctx, 1);
2611
2612 state_add_transition (ctx, start, NULL, a->start);
2614 state_add_transition (ctx, a->end, NULL, a->start);
2615 state_add_transition (ctx, a->end, NULL, end);
2616
2617 a->end->accepting = 0;
2618 end->accepting = 1;
2619
2620 new_nfa = nfa_fragment_create (start, end);
2621 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2623
2624 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2625}
2626
2627
2633static void
2635{
2636 struct REGEX_INTERNAL_Automaton *a;
2637
2638 a = ctx->stack_tail;
2639
2640 if (NULL == a)
2641 {
2642 GNUNET_log (
2644 "nfa_add_plus_op failed, because there was no element on the stack");
2645 return;
2646 }
2647
2648 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2649
2650 state_add_transition (ctx, a->end, NULL, a->start);
2651
2652 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2653}
2654
2655
2661static void
2663{
2664 struct REGEX_INTERNAL_Automaton *a;
2665 struct REGEX_INTERNAL_Automaton *new_nfa;
2667 struct REGEX_INTERNAL_State *end;
2668
2669 a = ctx->stack_tail;
2670 if (NULL == a)
2671 {
2672 GNUNET_log (
2674 "nfa_add_question_op failed, because there was no element on the stack");
2675 return;
2676 }
2677
2678 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2679
2680 start = nfa_state_create (ctx, 0);
2681 end = nfa_state_create (ctx, 1);
2682
2683 state_add_transition (ctx, start, NULL, a->start);
2685 state_add_transition (ctx, a->end, NULL, end);
2686
2687 a->end->accepting = 0;
2688
2689 new_nfa = nfa_fragment_create (start, end);
2690 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2691 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2693}
2694
2695
2702static void
2704{
2705 struct REGEX_INTERNAL_Automaton *a;
2706 struct REGEX_INTERNAL_Automaton *b;
2707 struct REGEX_INTERNAL_Automaton *new_nfa;
2709 struct REGEX_INTERNAL_State *end;
2710
2711 b = ctx->stack_tail;
2712 GNUNET_assert (NULL != b);
2713 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2714 a = ctx->stack_tail;
2715 GNUNET_assert (NULL != a);
2716 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2717
2718 start = nfa_state_create (ctx, 0);
2719 end = nfa_state_create (ctx, 1);
2720 state_add_transition (ctx, start, NULL, a->start);
2721 state_add_transition (ctx, start, NULL, b->start);
2722
2723 state_add_transition (ctx, a->end, NULL, end);
2724 state_add_transition (ctx, b->end, NULL, end);
2725
2726 a->end->accepting = 0;
2727 b->end->accepting = 0;
2728 end->accepting = 1;
2729
2730 new_nfa = nfa_fragment_create (start, end);
2731 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2732 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2735
2736 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2737}
2738
2739
2746static void
2747nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2748{
2749 struct REGEX_INTERNAL_Automaton *n;
2751 struct REGEX_INTERNAL_State *end;
2752
2753 GNUNET_assert (NULL != ctx);
2754
2755 start = nfa_state_create (ctx, 0);
2756 end = nfa_state_create (ctx, 1);
2757 state_add_transition (ctx, start, label, end);
2759 GNUNET_assert (NULL != n);
2760 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2761}
2762
2763
2769static void
2771{
2772 if (NULL == ctx)
2773 {
2774 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2775 return;
2776 }
2777 ctx->state_id = 0;
2778 ctx->transition_id = 0;
2779 ctx->stack_head = NULL;
2780 ctx->stack_tail = NULL;
2781}
2782
2783
2793REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2794{
2796 struct REGEX_INTERNAL_Automaton *nfa;
2797 const char *regexp;
2798 char curlabel[2];
2799 const char *error_msg;
2800 unsigned int count;
2801 unsigned int altcount;
2802 unsigned int atomcount;
2803 unsigned int poff;
2804 unsigned int psize;
2805
2806 struct
2807 {
2808 int altcount;
2809 int atomcount;
2810 } *p;
2811
2812 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2813 {
2815 "Could not parse regex. Empty regex string provided.\n");
2816
2817 return NULL;
2818 }
2820
2821 regexp = regex;
2822 curlabel[1] = '\0';
2823 p = NULL;
2824 error_msg = NULL;
2825 altcount = 0;
2826 atomcount = 0;
2827 poff = 0;
2828 psize = 0;
2829
2830 for (count = 0; count < len && *regexp; count++, regexp++)
2831 {
2832 switch (*regexp)
2833 {
2834 case '(':
2835 if (atomcount > 1)
2836 {
2837 --atomcount;
2839 }
2840 if (poff == psize)
2841 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2842 p[poff].altcount = altcount;
2843 p[poff].atomcount = atomcount;
2844 poff++;
2845 altcount = 0;
2846 atomcount = 0;
2847 break;
2848
2849 case '|':
2850 if (0 == atomcount)
2851 {
2852 error_msg = "Cannot append '|' to nothing";
2853 goto error;
2854 }
2855 while (--atomcount > 0)
2857 altcount++;
2858 break;
2859
2860 case ')':
2861 if (0 == poff)
2862 {
2863 error_msg = "Missing opening '('";
2864 goto error;
2865 }
2866 if (0 == atomcount)
2867 {
2868 /* Ignore this: "()" */
2869 poff--;
2870 altcount = p[poff].altcount;
2871 atomcount = p[poff].atomcount;
2872 break;
2873 }
2874 while (--atomcount > 0)
2876 for (; altcount > 0; altcount--)
2878 poff--;
2879 altcount = p[poff].altcount;
2880 atomcount = p[poff].atomcount;
2881 atomcount++;
2882 break;
2883
2884 case '*':
2885 if (atomcount == 0)
2886 {
2887 error_msg = "Cannot append '*' to nothing";
2888 goto error;
2889 }
2891 break;
2892
2893 case '+':
2894 if (atomcount == 0)
2895 {
2896 error_msg = "Cannot append '+' to nothing";
2897 goto error;
2898 }
2900 break;
2901
2902 case '?':
2903 if (atomcount == 0)
2904 {
2905 error_msg = "Cannot append '?' to nothing";
2906 goto error;
2907 }
2909 break;
2910
2911 default:
2912 if (atomcount > 1)
2913 {
2914 --atomcount;
2916 }
2917 curlabel[0] = *regexp;
2918 nfa_add_label (&ctx, curlabel);
2919 atomcount++;
2920 break;
2921 }
2922 }
2923 if (0 != poff)
2924 {
2925 error_msg = "Unbalanced parenthesis";
2926 goto error;
2927 }
2928 while (--atomcount > 0)
2930 for (; altcount > 0; altcount--)
2932
2933 GNUNET_array_grow (p, psize, 0);
2934
2935 nfa = ctx.stack_tail;
2936 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2937
2938 if (NULL != ctx.stack_head)
2939 {
2940 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2941 goto error;
2942 }
2943
2944 /* Remember the regex that was used to generate this NFA */
2945 nfa->regex = GNUNET_strdup (regex);
2946
2947 /* create depth-first numbering of the states for pretty printing */
2949 NULL,
2950 NULL,
2951 NULL,
2953 NULL);
2954
2955 /* No multistriding added so far */
2957
2958 return nfa;
2959
2960error:
2961 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2962 if (NULL != error_msg)
2963 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2964
2965 GNUNET_free (p);
2966
2967 while (NULL != (nfa = ctx.stack_head))
2968 {
2969 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2971 }
2972
2973 return NULL;
2974}
2975
2976
2986static void
2988 struct REGEX_INTERNAL_Automaton *nfa,
2989 struct REGEX_INTERNAL_Automaton *dfa,
2990 struct REGEX_INTERNAL_State *dfa_state)
2991{
2992 struct REGEX_INTERNAL_Transition *ctran;
2993 struct REGEX_INTERNAL_State *new_dfa_state;
2994 struct REGEX_INTERNAL_State *state_contains;
2995 struct REGEX_INTERNAL_State *state_iter;
2996 struct REGEX_INTERNAL_StateSet tmp;
2997 struct REGEX_INTERNAL_StateSet nfa_set;
2998
2999 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3000 {
3001 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
3002 continue;
3003
3004 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3005 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3006 state_set_clear (&tmp);
3007
3008 state_contains = NULL;
3009 for (state_iter = dfa->states_head; NULL != state_iter;
3010 state_iter = state_iter->next)
3011 {
3012 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3013 {
3014 state_contains = state_iter;
3015 break;
3016 }
3017 }
3018 if (NULL == state_contains)
3019 {
3020 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3021 automaton_add_state (dfa, new_dfa_state);
3022 ctran->to_state = new_dfa_state;
3023 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3024 }
3025 else
3026 {
3027 ctran->to_state = state_contains;
3028 state_set_clear (&nfa_set);
3029 }
3030 }
3031}
3032
3033
3036 const size_t len,
3037 unsigned int max_path_len)
3038{
3040 struct REGEX_INTERNAL_Automaton *dfa;
3041 struct REGEX_INTERNAL_Automaton *nfa;
3042 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3043 struct REGEX_INTERNAL_StateSet singleton_set;
3044
3046
3047 /* Create NFA */
3048 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3049
3050 if (NULL == nfa)
3051 {
3053 "Could not create DFA, because NFA creation failed\n");
3054 return NULL;
3055 }
3056
3057 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3058 dfa->type = DFA;
3059 dfa->regex = GNUNET_strdup (regex);
3060
3061 /* Create DFA start state from epsilon closure */
3062 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3063 state_set_append (&singleton_set, nfa->start);
3064 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3065 state_set_clear (&singleton_set);
3066 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3067 automaton_add_state (dfa, dfa->start);
3068
3069 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3071
3072 /* Minimize DFA */
3073 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3074 {
3076 return NULL;
3077 }
3078
3079 /* Create proofs and hashes for all states */
3081 {
3083 return NULL;
3084 }
3085
3086 /* Compress linear DFA paths */
3087 if (1 != max_path_len)
3088 dfa_compress_paths (&ctx, dfa, max_path_len);
3089
3090 return dfa;
3091}
3092
3093
3094void
3096{
3097 struct REGEX_INTERNAL_State *s;
3098 struct REGEX_INTERNAL_State *next_state;
3099
3100 if (NULL == a)
3101 return;
3102
3103 GNUNET_free (a->regex);
3105
3106 for (s = a->states_head; NULL != s; s = next_state)
3107 {
3108 next_state = s->next;
3111 }
3112
3113 GNUNET_free (a);
3114}
3115
3116
3125static int
3126evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3127{
3128 const char *strp;
3129 struct REGEX_INTERNAL_State *s;
3130 unsigned int step_len;
3131
3132 if (DFA != a->type)
3133 {
3135 "Tried to evaluate DFA, but NFA automaton given");
3136 return -1;
3137 }
3138
3139 s = a->start;
3140
3141 /* If the string is empty but the starting state is accepting, we accept. */
3142 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3143 return 0;
3144
3145 for (strp = string; NULL != strp && *strp; strp += step_len)
3146 {
3147 step_len = dfa_move (&s, strp);
3148
3149 if (NULL == s)
3150 break;
3151 }
3152
3153 if ((NULL != s) && s->accepting)
3154 return 0;
3155
3156 return 1;
3157}
3158
3159
3167static int
3168evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3169{
3170 const char *strp;
3171 char str[2];
3172 struct REGEX_INTERNAL_State *s;
3173 struct REGEX_INTERNAL_StateSet sset;
3174 struct REGEX_INTERNAL_StateSet new_sset;
3175 struct REGEX_INTERNAL_StateSet singleton_set;
3176 unsigned int i;
3177 int result;
3178
3179 if (NFA != a->type)
3180 {
3182 "Tried to evaluate NFA, but DFA automaton given");
3183 return -1;
3184 }
3185
3186 /* If the string is empty but the starting state is accepting, we accept. */
3187 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3188 return 0;
3189
3190 result = 1;
3191 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3192 state_set_append (&singleton_set, a->start);
3193 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3194 state_set_clear (&singleton_set);
3195
3196 str[1] = '\0';
3197 for (strp = string; NULL != strp && *strp; strp++)
3198 {
3199 str[0] = *strp;
3200 nfa_closure_set_create (&new_sset, a, &sset, str);
3201 state_set_clear (&sset);
3202 nfa_closure_set_create (&sset, a, &new_sset, 0);
3203 state_set_clear (&new_sset);
3204 }
3205
3206 for (i = 0; i < sset.off; i++)
3207 {
3208 s = sset.states[i];
3209 if ((NULL != s) && (s->accepting))
3210 {
3211 result = 0;
3212 break;
3213 }
3214 }
3215
3216 state_set_clear (&sset);
3217 return result;
3218}
3219
3220
3221int
3222REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3223{
3224 int result;
3225
3226 switch (a->type)
3227 {
3228 case DFA:
3229 result = evaluate_dfa (a, string);
3230 break;
3231
3232 case NFA:
3233 result = evaluate_nfa (a, string);
3234 break;
3235
3236 default:
3238 "Evaluating regex failed, automaton has no type!\n");
3240 break;
3241 }
3242
3243 return result;
3244}
3245
3246
3247const char *
3249{
3250 if (NULL == a)
3251 return NULL;
3252
3253 return a->canonical_regex;
3254}
3255
3256
3264unsigned int
3266{
3267 unsigned int t_count;
3268 struct REGEX_INTERNAL_State *s;
3269
3270 if (NULL == a)
3271 return 0;
3272
3273 t_count = 0;
3274 for (s = a->states_head; NULL != s; s = s->next)
3275 t_count += s->transition_count;
3276
3277 return t_count;
3278}
3279
3280
3291size_t
3292REGEX_INTERNAL_get_first_key (const char *input_string,
3293 size_t string_len,
3294 struct GNUNET_HashCode *key)
3295{
3296 size_t size;
3297
3298 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3300 if (NULL == input_string)
3301 {
3302 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3303 return 0;
3304 }
3305 GNUNET_CRYPTO_hash (input_string, size, key);
3306
3307 return size;
3308}
3309
3310
3321static void
3322iterate_initial_edge (unsigned int min_len,
3323 unsigned int max_len,
3324 char *consumed_string,
3327 void *iterator_cls)
3328{
3329 char *temp;
3331 unsigned int num_edges = state->transition_count;
3332 struct REGEX_BLOCK_Edge edges[num_edges];
3333 struct REGEX_BLOCK_Edge edge[1];
3334 struct GNUNET_HashCode hash;
3335 struct GNUNET_HashCode hash_new;
3336 unsigned int cur_len;
3337
3338 if (NULL != consumed_string)
3339 cur_len = strlen (consumed_string);
3340 else
3341 cur_len = 0;
3342
3343 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3344 (cur_len > 0) && (NULL != consumed_string))
3345 {
3346 if (cur_len <= max_len)
3347 {
3348 if ((NULL != state->proof) &&
3349 (0 != strcmp (consumed_string, state->proof)))
3350 {
3351 (void) state_get_edges (state, edges);
3352 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3354 "Start state for string `%s' is %s\n",
3355 consumed_string,
3356 GNUNET_h2s (&hash));
3357 iterator (iterator_cls,
3358 &hash,
3359 consumed_string,
3360 state->accepting,
3361 num_edges,
3362 edges);
3363 }
3364
3365 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3366 (state->transition_count < 1) && (cur_len < max_len))
3367 {
3368 /* Special case for regex consisting of just a string that is shorter than
3369 * max_len */
3370 edge[0].label = &consumed_string[cur_len - 1];
3371 edge[0].destination = state->hash;
3372 temp = GNUNET_strdup (consumed_string);
3373 temp[cur_len - 1] = '\0';
3374 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3376 "Start state for short string `%s' is %s\n",
3377 temp,
3378 GNUNET_h2s (&hash_new));
3379 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3380 GNUNET_free (temp);
3381 }
3382 }
3383 else /* cur_len > max_len */
3384 {
3385 /* Case where the concatenated labels are longer than max_len, then split. */
3386 edge[0].label = &consumed_string[max_len];
3387 edge[0].destination = state->hash;
3388 temp = GNUNET_strdup (consumed_string);
3389 temp[max_len] = '\0';
3390 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3392 "Start state at split edge `%s'-`%s` is %s\n",
3393 temp,
3394 edge[0].label,
3395 GNUNET_h2s (&hash_new));
3396 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3397 GNUNET_free (temp);
3398 }
3399 }
3400
3401 if (cur_len < max_len)
3402 {
3403 for (t = state->transitions_head; NULL != t; t = t->next)
3404 {
3405 if (NULL != strchr (t->label, (int) '.'))
3406 {
3407 /* Wildcards not allowed during starting states */
3408 GNUNET_break (0);
3409 continue;
3410 }
3411 if (NULL != consumed_string)
3412 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3413 else
3414 GNUNET_asprintf (&temp, "%s", t->label);
3415 iterate_initial_edge (min_len,
3416 max_len,
3417 temp,
3418 t->to_state,
3419 iterator,
3420 iterator_cls);
3421 GNUNET_free (temp);
3422 }
3423 }
3424}
3425
3426
3435void
3438 void *iterator_cls)
3439{
3440 struct REGEX_INTERNAL_State *s;
3441
3442 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3445 NULL,
3446 a->start,
3447 iterator,
3448 iterator_cls);
3449 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3450 for (s = a->states_head; NULL != s; s = s->next)
3451 {
3452 struct REGEX_BLOCK_Edge edges[s->transition_count];
3453 unsigned int num_edges;
3454
3455 num_edges = state_get_edges (s, edges);
3456 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3457 {
3459 "Creating DFA edges at `%s' under key %s\n",
3460 s->proof,
3461 GNUNET_h2s (&s->hash));
3462 iterator (iterator_cls,
3463 &s->hash,
3464 s->proof,
3465 s->accepting,
3466 num_edges,
3467 edges);
3468 }
3469 s->marked = GNUNET_NO;
3470 }
3471}
3472
3473
3481{
3483 char *proof;
3487};
3488
3489
3494{
3497};
3498
3499
3511static void
3513 const struct GNUNET_HashCode *key,
3514 const char *proof,
3515 int accepting,
3516 unsigned int num_edges,
3517 const struct REGEX_BLOCK_Edge *edges)
3518{
3519 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3520 struct temporal_state_store *tmp;
3521 size_t edges_size;
3522
3523 tmp = GNUNET_new (struct temporal_state_store);
3524 tmp->reachable = GNUNET_NO;
3525 tmp->proof = GNUNET_strdup (proof);
3526 tmp->accepting = accepting;
3527 tmp->num_edges = num_edges;
3528 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3529 tmp->edges = GNUNET_malloc (edges_size);
3530 GNUNET_memcpy (tmp->edges, edges, edges_size);
3533 hm,
3534 key,
3535 tmp,
3537}
3538
3539
3548static void
3551{
3553 unsigned int i;
3554
3555 if (GNUNET_YES == state->reachable)
3556 /* visited */
3557 return;
3558
3559 state->reachable = GNUNET_YES;
3560 for (i = 0; i < state->num_edges; i++)
3561 {
3562 child =
3563 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3564 if (NULL == child)
3565 {
3566 GNUNET_break (0);
3567 continue;
3568 }
3570 }
3571}
3572
3573
3583static int
3585 const struct GNUNET_HashCode *key,
3586 void *value)
3587{
3588 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3590
3591 if (GNUNET_YES == state->reachable)
3592 /* already visited and marked */
3593 return GNUNET_YES;
3594
3595 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3596 (GNUNET_NO == state->accepting) )
3597 /* not directly reachable */
3598 return GNUNET_YES;
3599
3601 return GNUNET_YES;
3602}
3603
3604
3615static int
3616iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3617{
3618 struct client_iterator *ci = cls;
3620
3621 if (GNUNET_YES == state->reachable)
3622 {
3623 ci->iterator (ci->iterator_cls,
3624 key,
3625 state->proof,
3626 state->accepting,
3627 state->num_edges,
3628 state->edges);
3629 }
3630 GNUNET_free (state->edges);
3631 GNUNET_free (state->proof);
3633 return GNUNET_YES;
3634}
3635
3636
3637void
3640 void *iterator_cls)
3641{
3643 struct client_iterator ci;
3644
3646 ci.iterator = iterator;
3648
3652
3654}
3655
3656
3657/* end of regex_internal.c */
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:143
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:38
static int ret
Final status code.
Definition: gnunet-arm.c:93
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:33
struct GNUNET_HashCode key
The key used in the DHT.
static struct GNUNET_FS_Handle * ctx
static char * value
Value of the record to add/remove.
enum State state
current state of profiling
static int result
Global testing status.
static uint64_t proof
Definition: gnunet-scrypt.c:49
static pid_t child
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_SCHEDULER_Task * t
Main task.
API to access regex service to advertise capabilities via regex and discover respective peers using m...
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
#define GNUNET_log(kind,...)
#define GNUNET_MAX(a, b)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
@ GNUNET_SYSERR
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
#define max(x, y)
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static unsigned int size
Size of the "table".
Definition: peer.c:68
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given 'regex' of length 'len'.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given 'dfa'.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given 'string' against the given compiled regex.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
static int state_compare(const void *a, const void *b)
Compare two states.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUN...
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that gets passed to automaton traversal and is called before each next traversal from state ...
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
@ NFA
@ DFA
library to parse regular expressions into dfa
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
Internal representation of the hash map.
A 512-bit hashcode.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:139
Edge representation.
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.
Automaton representation.
struct REGEX_INTERNAL_State * start
First state of the automaton.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
Set of states using MDLL API.
struct REGEX_INTERNAL_State * head
MDLL of states.
struct REGEX_INTERNAL_State * tail
MDLL of states.
unsigned int len
Length of the MDLL.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
char * proof
Proof for this state.
unsigned int transition_count
Number of transitions from this state to other states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
unsigned int dfs_id
Linear state ID acquired by depth-first-search.
char * name
Human readable name of the state.
int marked
Marking of the state.
unsigned int id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int contained
Marking the state as contained.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
int index
Used for SCC detection.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
Context for adding strided transitions to a DFA.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
const unsigned int stride
Length of the strides.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.
String container for faster string operations.
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges