GNUnet 0.21.1
regex_internal.c
Go to the documentation of this file.
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
26#include "platform.h"
27#include "gnunet_util_lib.h"
29#include "regex_internal_lib.h"
30#include "regex_internal.h"
31
32
37#define REGEX_DEBUG_DFA GNUNET_NO
38
43{
48
53
57 unsigned int len;
58};
59
60
67static void
70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
75
76
85static int
86nullstrcmp (const char *str1, const char *str2)
87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}
95
96
106static void
108 struct REGEX_INTERNAL_State *from_state,
109 const char *label,
110 struct REGEX_INTERNAL_State *to_state)
111{
113 struct REGEX_INTERNAL_Transition *oth;
114
115 if (NULL == from_state)
116 {
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118 return;
119 }
120
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 {
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
126 return;
127 }
128
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 {
132 if (0 < nullstrcmp (oth->label, label))
133 break;
134 }
135
137 if (NULL != ctx)
138 t->id = ctx->transition_id++;
139 if (NULL != label)
140 t->label = GNUNET_strdup (label);
141 else
142 t->label = NULL;
143 t->to_state = to_state;
144 t->from_state = from_state;
145
146 /* Add outgoing transition to 'from_state' */
150 oth,
151 t);
152}
153
154
161static void
163 struct REGEX_INTERNAL_Transition *transition)
164{
165 if ((NULL == state) || (NULL == transition))
166 return;
167
168 if (transition->from_state != state)
169 return;
170
171 GNUNET_free (transition->label);
172
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
176 transition);
177
178 GNUNET_free (transition);
179}
180
181
192static int
193state_compare (const void *a, const void *b)
194{
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197
198 return (*s1)->id - (*s2)->id;
199}
200
201
211static unsigned int
213{
215 unsigned int count;
216
217 if (NULL == s)
218 return 0;
219
220 count = 0;
221
222 for (t = s->transitions_head; NULL != t; t = t->next)
223 {
224 if (NULL != t->to_state)
225 {
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
228 count++;
229 }
230 }
231 return count;
232}
233
234
243static int
245 struct REGEX_INTERNAL_StateSet *sset2)
246{
247 int result;
248 unsigned int i;
249
250 if ((NULL == sset1) || (NULL == sset2))
251 return 1;
252
253 result = sset1->off - sset2->off;
254 if (result < 0)
255 return -1;
256 if (result > 0)
257 return 1;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260 break;
261 return result;
262}
263
264
270static void
272{
273 GNUNET_array_grow (set->states, set->size, 0);
274 set->off = 0;
275}
276
277
284static void
286{
287 if (NULL == a)
288 return;
289
290 a->start = NULL;
291 a->end = NULL;
292 a->states_head = NULL;
293 a->states_tail = NULL;
294 a->state_count = 0;
295 GNUNET_free (a);
296}
297
298
304static void
306{
308 struct REGEX_INTERNAL_Transition *next_t;
309
310 if (NULL == s)
311 return;
312
313 GNUNET_free (s->name);
314 GNUNET_free (s->proof);
316 for (t = s->transitions_head; NULL != t; t = next_t)
317 {
318 next_t = t->next;
320 }
321
322 GNUNET_free (s);
323}
324
325
334static void
336 struct REGEX_INTERNAL_State *s)
337{
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
341
342 if ((NULL == a) || (NULL == s))
343 return;
344
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347 {
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
350 {
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
354 }
355 }
356
357 /* remove state */
359 a->state_count--;
360
362}
363
364
374static void
376 struct REGEX_INTERNAL_Automaton *a,
377 struct REGEX_INTERNAL_State *s1,
378 struct REGEX_INTERNAL_State *s2)
379{
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
383 struct REGEX_INTERNAL_Transition *t_next;
384 int is_dup;
385
386 if (s1 == s2)
387 return;
388
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392 {
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394 {
395 t_next = t_check->next;
396
397 if (s2 == t_check->to_state)
398 {
399 is_dup = GNUNET_NO;
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401 {
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403 is_dup = GNUNET_YES;
404 }
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
407 else
408 state_remove_transition (t_check->from_state, t_check);
409 }
410 }
411 }
412
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415 {
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418 }
419
420 /* 3. Rename s1 to {s1,s2} */
421#if REGEX_DEBUG_DFA
422 char *new_name;
423
424 new_name = s1->name;
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
427#endif
428
429 /* remove state */
431 a->state_count--;
433}
434
435
443static void
445 struct REGEX_INTERNAL_State *s)
446{
448 a->state_count++;
449}
450
451
466static void
468 int *marks,
469 unsigned int *count,
471 void *check_cls,
473 void *action_cls)
474{
476
477 if (GNUNET_YES == marks[s->traversal_id])
478 return;
479
480 marks[s->traversal_id] = GNUNET_YES;
481
482 if (NULL != action)
483 action (action_cls, *count, s);
484
485 (*count)++;
486
487 for (t = s->transitions_head; NULL != t; t = t->next)
488 {
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491 {
492 automaton_state_traverse (t->to_state,
493 marks,
494 count,
495 check,
496 check_cls,
497 action,
498 action_cls);
499 }
500 }
501}
502
503
504void
508 void *check_cls,
510 void *action_cls)
511{
512 unsigned int count;
513 struct REGEX_INTERNAL_State *s;
514
515 if ((NULL == a) || (0 == a->state_count))
516 return;
517
518 int marks[a->state_count];
519
520 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
521 s = s->next, count++)
522 {
523 s->traversal_id = count;
524 marks[s->traversal_id] = GNUNET_NO;
525 }
526
527 count = 0;
528
529 if (NULL == start)
530 s = a->start;
531 else
532 s = start;
533
535 marks,
536 &count,
537 check,
538 check_cls,
539 action,
540 action_cls);
541}
542
543
548{
553 char *sbuf;
554
558 char *abuf;
559
563 size_t slen;
564
568 unsigned int blen;
569
573 int16_t null_flag;
574
584 int16_t synced;
585};
586
587
596static int
597sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
598{
599 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
600 return 0;
601 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
602 return -1;
603 if (s1->slen != s2->slen)
604 return -1;
605 if (0 == s1->slen)
606 return 0;
607 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
608}
609
610
619static int
620sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
621{
622 if (s1->slen != s2->slen)
623 return -1;
624 if (0 == s1->slen)
625 return 0;
626 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
627}
628
629
637static void
638sb_realloc (struct StringBuffer *ret, size_t nlen)
639{
640 char *old;
641
642 GNUNET_assert (nlen >= ret->slen);
643 old = ret->abuf;
644 ret->abuf = GNUNET_malloc (nlen);
645 ret->blen = nlen;
646 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
647 ret->sbuf = ret->abuf;
648 GNUNET_free (old);
649}
650
651
658static void
659sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
660{
661 if (GNUNET_YES == ret->null_flag)
662 ret->slen = 0;
663 ret->null_flag = GNUNET_NO;
664 if (ret->blen < sarg->slen + ret->slen)
665 sb_realloc (ret, ret->blen + sarg->slen + 128);
666 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
667 ret->slen += sarg->slen;
668}
669
670
677static void
678sb_append_cstr (struct StringBuffer *ret, const char *cstr)
679{
680 size_t cstr_len = strlen (cstr);
681
682 if (GNUNET_YES == ret->null_flag)
683 ret->slen = 0;
684 ret->null_flag = GNUNET_NO;
685 if (ret->blen < cstr_len + ret->slen)
686 sb_realloc (ret, ret->blen + cstr_len + 128);
687 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
688 ret->slen += cstr_len;
689}
690
691
702static void
703sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
704{
705 char *temp;
706
707 if (GNUNET_YES == ret->null_flag)
708 ret->slen = 0;
709 ret->null_flag = GNUNET_NO;
710 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
711 GNUNET_snprintf (temp,
712 ret->slen + extra_chars + 1,
713 format,
714 (int) ret->slen,
715 ret->sbuf);
716 GNUNET_free (ret->abuf);
717 ret->abuf = temp;
718 ret->sbuf = temp;
719 ret->blen = ret->slen + extra_chars + 1;
720 ret->slen = ret->slen + extra_chars;
721}
722
723
733static void
735 const char *format,
736 size_t extra_chars,
737 const struct StringBuffer *sarg)
738{
739 if (ret->blen < sarg->slen + extra_chars + 1)
740 sb_realloc (ret, sarg->slen + extra_chars + 1);
741 ret->null_flag = GNUNET_NO;
742 ret->sbuf = ret->abuf;
743 ret->slen = sarg->slen + extra_chars;
744 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
745}
746
747
757static void
759 const char *format,
760 size_t extra_chars,
761 const struct StringBuffer *sarg1,
762 const struct StringBuffer *sarg2)
763{
764 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
765 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
766 ret->null_flag = GNUNET_NO;
767 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
768 ret->sbuf = ret->abuf;
769 GNUNET_snprintf (ret->sbuf,
770 ret->blen,
771 format,
772 (int) sarg1->slen,
773 sarg1->sbuf,
774 (int) sarg2->slen,
775 sarg2->sbuf);
776}
777
778
790static void
792 const char *format,
793 size_t extra_chars,
794 const struct StringBuffer *sarg1,
795 const struct StringBuffer *sarg2,
796 const struct StringBuffer *sarg3)
797{
798 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
799 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
800 ret->null_flag = GNUNET_NO;
801 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
802 ret->sbuf = ret->abuf;
803 GNUNET_snprintf (ret->sbuf,
804 ret->blen,
805 format,
806 (int) sarg1->slen,
807 sarg1->sbuf,
808 (int) sarg2->slen,
809 sarg2->sbuf,
810 (int) sarg3->slen,
811 sarg3->sbuf);
812}
813
814
821static void
823{
824 GNUNET_array_grow (sb->abuf, sb->blen, 0);
825 sb->slen = 0;
826 sb->sbuf = NULL;
827 sb->null_flag = GNUNET_YES;
828}
829
830
837static void
838sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
839
840{
841 out->null_flag = in->null_flag;
842 if (GNUNET_YES == out->null_flag)
843 return;
844 if (out->blen < in->slen)
845 {
846 GNUNET_array_grow (out->abuf, out->blen, in->slen);
847 }
848 out->sbuf = out->abuf;
849 out->slen = in->slen;
850 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
851}
852
853
860static void
861sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
862{
863 if (NULL == cstr)
864 {
865 out->null_flag = GNUNET_YES;
866 return;
867 }
868 out->null_flag = GNUNET_NO;
869 out->slen = strlen (cstr);
870 if (out->blen < out->slen)
871 {
872 GNUNET_array_grow (out->abuf, out->blen, out->slen);
873 }
874 out->sbuf = out->abuf;
875 GNUNET_memcpy (out->sbuf, cstr, out->slen);
876}
877
878
887static int
889{
890 size_t slen;
891 const char *op;
892 const char *cl;
893 const char *pos;
894 const char *end;
895 unsigned int cnt;
896
897 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
898 return GNUNET_NO;
899 pos = str->sbuf;
900 if ('(' != pos[0])
901 return GNUNET_YES;
902 end = str->sbuf + slen;
903 cnt = 1;
904 pos++;
905 while (cnt > 0)
906 {
907 cl = memchr (pos, ')', end - pos);
908 if (NULL == cl)
909 {
910 GNUNET_break (0);
911 return GNUNET_YES;
912 }
913 /* while '(' before ')', count opening parens */
914 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
915 {
916 cnt++;
917 pos = op + 1;
918 }
919 /* got ')' first */
920 cnt--;
921 pos = cl + 1;
922 }
923 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
924}
925
926
936static void
938{
939 size_t slen;
940 const char *pos;
941 const char *end;
942 const char *sbuf;
943 const char *op;
944 const char *cp;
945 unsigned int cnt;
946
947 if (0)
948 return;
949 sbuf = str->sbuf;
950 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
951 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
952 return;
953 cnt = 0;
954 pos = &sbuf[1];
955 end = &sbuf[slen - 1];
956 op = memchr (pos, '(', end - pos);
957 cp = memchr (pos, ')', end - pos);
958 while (NULL != cp)
959 {
960 while ((NULL != op) && (op < cp))
961 {
962 cnt++;
963 pos = op + 1;
964 op = memchr (pos, '(', end - pos);
965 }
966 while ((NULL != cp) && ((NULL == op) || (cp < op)))
967 {
968 if (0 == cnt)
969 return; /* can't strip parens */
970 cnt--;
971 pos = cp + 1;
972 cp = memchr (pos, ')', end - pos);
973 }
974 }
975 if (0 != cnt)
976 {
977 GNUNET_break (0);
978 return;
979 }
980 str->sbuf++;
981 str->slen -= 2;
982}
983
984
993static int
994has_epsilon (const struct StringBuffer *str)
995{
996 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
997 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
998 (')' == str->sbuf[str->slen - 1]);
999}
1000
1001
1011static void
1012remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1013{
1014 if (GNUNET_YES == str->null_flag)
1015 {
1016 ret->null_flag = GNUNET_YES;
1017 return;
1018 }
1019 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1020 (')' == str->sbuf[str->slen - 1]))
1021 {
1022 /* remove epsilon */
1023 if (ret->blen < str->slen - 3)
1024 {
1025 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1026 }
1027 ret->sbuf = ret->abuf;
1028 ret->slen = str->slen - 3;
1029 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1030 return;
1031 }
1032 sb_strdup (ret, str);
1033}
1034
1035
1045static int
1046sb_strncmp (const struct StringBuffer *str1,
1047 const struct StringBuffer *str2,
1048 size_t n)
1049{
1050 size_t max;
1051
1052 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1053 return -1;
1054 max = GNUNET_MAX (str1->slen, str2->slen);
1055 if (max > n)
1056 max = n;
1057 return memcmp (str1->sbuf, str2->sbuf, max);
1058}
1059
1060
1070static int
1071sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1072{
1073 if (str1->slen < n)
1074 return -1;
1075 return memcmp (str1->sbuf, str2, n);
1076}
1077
1078
1086static void
1087sb_init (struct StringBuffer *sb, size_t n)
1088{
1089 sb->null_flag = GNUNET_NO;
1090 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1091 sb->blen = n;
1092 sb->slen = 0;
1093}
1094
1095
1105static int
1106sb_strkcmp (const struct StringBuffer *str1,
1107 const struct StringBuffer *str2,
1108 size_t k)
1109{
1110 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1111 (k > str1->slen) || (str1->slen - k != str2->slen))
1112 return -1;
1113 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1114}
1115
1116
1125static void
1126number_states (void *cls,
1127 const unsigned int count,
1128 struct REGEX_INTERNAL_State *s)
1129{
1130 struct REGEX_INTERNAL_State **states = cls;
1131
1132 s->dfs_id = count;
1133 if (NULL != states)
1134 states[count] = s;
1135}
1136
1137
1138#define PRIS(a) \
1139 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1140 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1141
1142
1157static void
1159 const struct StringBuffer *R_last_ik,
1160 const struct StringBuffer *R_last_kk,
1161 const struct StringBuffer *R_last_kj,
1162 struct StringBuffer *R_cur_ij,
1163 struct StringBuffer *R_cur_l,
1164 struct StringBuffer *R_cur_r)
1165{
1166 struct StringBuffer R_temp_ij;
1167 struct StringBuffer R_temp_ik;
1168 struct StringBuffer R_temp_kj;
1169 struct StringBuffer R_temp_kk;
1170 int eps_check;
1171 int ij_ik_cmp;
1172 int ij_kj_cmp;
1173 int ik_kk_cmp;
1174 int kk_kj_cmp;
1175 int clean_ik_kk_cmp;
1176 int clean_kk_kj_cmp;
1177 size_t length;
1178 size_t length_l;
1179 size_t length_r;
1180
1181 /*
1182 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1183 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1184 * R_cur_ij = R_cur_l | R_cur_r
1185 * R_cur_l == R^{(k-1)}_{ij}
1186 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1187 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1188 ((GNUNET_YES == R_last_ik->null_flag) ||
1189 (GNUNET_YES == R_last_kj->null_flag)))
1190 {
1191 /* R^{(k)}_{ij} = N | N */
1192 R_cur_ij->null_flag = GNUNET_YES;
1193 R_cur_ij->synced = GNUNET_NO;
1194 return;
1195 }
1196
1197 if ((GNUNET_YES == R_last_ik->null_flag) ||
1198 (GNUNET_YES == R_last_kj->null_flag))
1199 {
1200 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1201 if (GNUNET_YES == R_last_ij->synced)
1202 {
1203 R_cur_ij->synced = GNUNET_YES;
1204 R_cur_ij->null_flag = GNUNET_NO;
1205 return;
1206 }
1207 R_cur_ij->synced = GNUNET_YES;
1208 sb_strdup (R_cur_ij, R_last_ij);
1209 return;
1210 }
1211 R_cur_ij->synced = GNUNET_NO;
1212
1213 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1214 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1215
1216 R_cur_r->null_flag = GNUNET_YES;
1217 R_cur_r->slen = 0;
1218 R_cur_l->null_flag = GNUNET_YES;
1219 R_cur_l->slen = 0;
1220
1221 /* cache results from strcmp, we might need these many times */
1222 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1223 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1224 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1225 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1226
1227 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1228 * as parentheses, so we can better compare the contents */
1229
1230 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1231 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1232 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1233 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1234 remove_epsilon (R_last_ik, &R_temp_ik);
1235 remove_epsilon (R_last_kk, &R_temp_kk);
1236 remove_epsilon (R_last_kj, &R_temp_kj);
1237 remove_parentheses (&R_temp_ik);
1238 remove_parentheses (&R_temp_kk);
1239 remove_parentheses (&R_temp_kj);
1240 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1241 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1242
1243 /* construct R_cur_l (and, if necessary R_cur_r) */
1244 if (GNUNET_YES != R_last_ij->null_flag)
1245 {
1246 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1247 * as parentheses, so we can better compare the contents */
1248 remove_epsilon (R_last_ij, &R_temp_ij);
1249 remove_parentheses (&R_temp_ij);
1250
1251 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1252 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1253 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1254 {
1255 if (0 == R_temp_ij.slen)
1256 {
1257 R_cur_r->null_flag = GNUNET_NO;
1258 }
1259 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1260 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1261 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1262 {
1263 /*
1264 * a|(e|a)a*(e|a) = a*
1265 * a|(e|a)(e|a)*(e|a) = a*
1266 * (e|a)|aa*a = a*
1267 * (e|a)|aa*(e|a) = a*
1268 * (e|a)|(e|a)a*a = a*
1269 * (e|a)|(e|a)a*(e|a) = a*
1270 * (e|a)|(e|a)(e|a)*(e|a) = a*
1271 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1272 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1273 else
1274 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1275 }
1276 else
1277 {
1278 /*
1279 * a|aa*a = a+
1280 * a|(e|a)a*a = a+
1281 * a|aa*(e|a) = a+
1282 * a|(e|a)(e|a)*a = a+
1283 * a|a(e|a)*(e|a) = a+
1284 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1286 else
1287 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1288 }
1289 }
1290 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1291 (0 != clean_ik_kk_cmp))
1292 {
1293 /* a|ab*b = ab* */
1294 if (0 == R_last_kk->slen)
1295 sb_strdup (R_cur_r, R_last_ij);
1296 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1297 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1298 else
1299 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1300 R_cur_l->null_flag = GNUNET_YES;
1301 }
1302 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1303 (0 != clean_kk_kj_cmp))
1304 {
1305 /* a|bb*a = b*a */
1306 if (R_last_kk->slen < 1)
1307 {
1308 sb_strdup (R_cur_r, R_last_kj);
1309 }
1310 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1311 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1312 else
1313 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1314
1315 R_cur_l->null_flag = GNUNET_YES;
1316 }
1317 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1318 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1319 {
1320 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1321 if (needs_parentheses (&R_temp_kk))
1322 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1323 else
1324 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1325 R_cur_l->null_flag = GNUNET_YES;
1326 }
1327 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1328 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1329 {
1330 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1331 if (needs_parentheses (&R_temp_kk))
1332 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1333 else
1334 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1335 R_cur_l->null_flag = GNUNET_YES;
1336 }
1337 else
1338 {
1339 sb_strdup (R_cur_l, R_last_ij);
1340 remove_parentheses (R_cur_l);
1341 }
1342 }
1343 else
1344 {
1345 /* we have no left side */
1346 R_cur_l->null_flag = GNUNET_YES;
1347 }
1348
1349 /* construct R_cur_r, if not already constructed */
1350 if (GNUNET_YES == R_cur_r->null_flag)
1351 {
1352 length = R_temp_kk.slen - R_last_ik->slen;
1353
1354 /* a(ba)*bx = (ab)+x */
1355 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1356 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1357 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1358 (0 < R_last_ik->slen) &&
1359 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1360 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1361 {
1362 struct StringBuffer temp_a;
1363 struct StringBuffer temp_b;
1364
1365 sb_init (&temp_a, length);
1366 sb_init (&temp_b, R_last_kj->slen - length);
1367
1368 length_l = length;
1369 temp_a.sbuf = temp_a.abuf;
1370 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1371 temp_a.slen = length_l;
1372
1373 length_r = R_last_kj->slen - length;
1374 temp_b.sbuf = temp_b.abuf;
1375 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1376 temp_b.slen = length_r;
1377
1378 /* e|(ab)+ = (ab)* */
1379 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1380 (0 == temp_b.slen))
1381 {
1382 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1383 sb_free (R_cur_l);
1384 R_cur_l->null_flag = GNUNET_YES;
1385 }
1386 else
1387 {
1388 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1389 }
1390 sb_free (&temp_a);
1391 sb_free (&temp_b);
1392 }
1393 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1394 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1395 {
1396 /*
1397 * (e|a)a*(e|a) = a*
1398 * (e|a)(e|a)*(e|a) = a*
1399 */
1400 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1401 {
1402 if (needs_parentheses (&R_temp_kk))
1403 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1404 else
1405 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1406 }
1407 /* aa*a = a+a */
1408 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1409 (! has_epsilon (R_last_ik)))
1410 {
1411 if (needs_parentheses (&R_temp_kk))
1412 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1413 else
1414 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1415 }
1416 /*
1417 * (e|a)a*a = a+
1418 * aa*(e|a) = a+
1419 * a(e|a)*(e|a) = a+
1420 * (e|a)a*a = a+
1421 */else
1422 {
1423 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1424 + has_epsilon (R_last_kj));
1425
1426 if (1 == eps_check)
1427 {
1428 if (needs_parentheses (&R_temp_kk))
1429 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1430 else
1431 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1432 }
1433 }
1434 }
1435 /*
1436 * aa*b = a+b
1437 * (e|a)(e|a)*b = a*b
1438 */
1439 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1440 {
1441 if (has_epsilon (R_last_ik))
1442 {
1443 if (needs_parentheses (&R_temp_kk))
1444 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1445 else
1446 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1447 }
1448 else
1449 {
1450 if (needs_parentheses (&R_temp_kk))
1451 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1452 else
1453 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1454 }
1455 }
1456 /*
1457 * ba*a = ba+
1458 * b(e|a)*(e|a) = ba*
1459 */
1460 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1461 {
1462 if (has_epsilon (R_last_kj))
1463 {
1464 if (needs_parentheses (&R_temp_kk))
1465 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1466 else
1467 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1468 }
1469 else
1470 {
1471 if (needs_parentheses (&R_temp_kk))
1472 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1473 else
1474 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1475 }
1476 }
1477 else
1478 {
1479 if (0 < R_temp_kk.slen)
1480 {
1481 if (needs_parentheses (&R_temp_kk))
1482 {
1483 sb_printf3 (R_cur_r,
1484 "%.*s(%.*s)*%.*s",
1485 3,
1486 R_last_ik,
1487 &R_temp_kk,
1488 R_last_kj);
1489 }
1490 else
1491 {
1492 sb_printf3 (R_cur_r,
1493 "%.*s%.*s*%.*s",
1494 1,
1495 R_last_ik,
1496 &R_temp_kk,
1497 R_last_kj);
1498 }
1499 }
1500 else
1501 {
1502 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1503 }
1504 }
1505 }
1506 sb_free (&R_temp_ij);
1507 sb_free (&R_temp_ik);
1508 sb_free (&R_temp_kk);
1509 sb_free (&R_temp_kj);
1510
1511 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1512 {
1513 R_cur_ij->null_flag = GNUNET_YES;
1514 return;
1515 }
1516
1517 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1518 {
1519 struct StringBuffer tmp;
1520
1521 tmp = *R_cur_ij;
1522 *R_cur_ij = *R_cur_l;
1523 *R_cur_l = tmp;
1524 return;
1525 }
1526
1527 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1528 {
1529 struct StringBuffer tmp;
1530
1531 tmp = *R_cur_ij;
1532 *R_cur_ij = *R_cur_r;
1533 *R_cur_r = tmp;
1534 return;
1535 }
1536
1537 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1538 {
1539 struct StringBuffer tmp;
1540
1541 tmp = *R_cur_ij;
1542 *R_cur_ij = *R_cur_l;
1543 *R_cur_l = tmp;
1544 return;
1545 }
1546 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1547}
1548
1549
1561static int
1563{
1564 unsigned int n = a->state_count;
1565 struct REGEX_INTERNAL_State *states[n];
1566 struct StringBuffer *R_last;
1567 struct StringBuffer *R_cur;
1568 struct StringBuffer R_cur_r;
1569 struct StringBuffer R_cur_l;
1570 struct StringBuffer *R_swap;
1572 struct StringBuffer complete_regex;
1573 unsigned int i;
1574 unsigned int j;
1575 unsigned int k;
1576
1577 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1578 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1579 if ((NULL == R_last) || (NULL == R_cur))
1580 {
1582 GNUNET_free (R_cur);
1583 GNUNET_free (R_last);
1584 return GNUNET_SYSERR;
1585 }
1586
1587 /* create depth-first numbering of the states, initializes 'state' */
1589 a->start,
1590 NULL,
1591 NULL,
1593 states);
1594
1595 for (i = 0; i < n; i++)
1596 GNUNET_assert (NULL != states[i]);
1597 for (i = 0; i < n; i++)
1598 for (j = 0; j < n; j++)
1599 R_last[i * n + j].null_flag = GNUNET_YES;
1600
1601 /* Compute regular expressions of length "1" between each pair of states */
1602 for (i = 0; i < n; i++)
1603 {
1604 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1605 {
1606 j = t->to_state->dfs_id;
1607 if (GNUNET_YES == R_last[i * n + j].null_flag)
1608 {
1609 sb_strdup_cstr (&R_last[i * n + j], t->label);
1610 }
1611 else
1612 {
1613 sb_append_cstr (&R_last[i * n + j], "|");
1614 sb_append_cstr (&R_last[i * n + j], t->label);
1615 }
1616 }
1617 /* add self-loop: i is reachable from i via epsilon-transition */
1618 if (GNUNET_YES == R_last[i * n + i].null_flag)
1619 {
1620 R_last[i * n + i].slen = 0;
1621 R_last[i * n + i].null_flag = GNUNET_NO;
1622 }
1623 else
1624 {
1625 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1626 }
1627 }
1628 for (i = 0; i < n; i++)
1629 for (j = 0; j < n; j++)
1630 if (needs_parentheses (&R_last[i * n + j]))
1631 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1632 /* Compute regular expressions of length "k" between each pair of states per
1633 * induction */
1634 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1635 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1636 for (k = 0; k < n; k++)
1637 {
1638 for (i = 0; i < n; i++)
1639 {
1640 for (j = 0; j < n; j++)
1641 {
1642 /* Basis for the recursion:
1643 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1644 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1645 */
1646
1647 /* Create R_cur[i][j] and simplify the expression */
1648 automaton_create_proofs_simplify (&R_last[i * n + j],
1649 &R_last[i * n + k],
1650 &R_last[k * n + k],
1651 &R_last[k * n + j],
1652 &R_cur[i * n + j],
1653 &R_cur_l,
1654 &R_cur_r);
1655 }
1656 }
1657 /* set R_last = R_cur */
1658 R_swap = R_last;
1659 R_last = R_cur;
1660 R_cur = R_swap;
1661 /* clear 'R_cur' for next iteration */
1662 for (i = 0; i < n; i++)
1663 for (j = 0; j < n; j++)
1664 R_cur[i * n + j].null_flag = GNUNET_YES;
1665 }
1666 sb_free (&R_cur_l);
1667 sb_free (&R_cur_r);
1668 /* assign proofs and hashes */
1669 for (i = 0; i < n; i++)
1670 {
1671 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1672 {
1673 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1674 R_last[a->start->dfs_id * n + i].slen);
1675 GNUNET_CRYPTO_hash (states[i]->proof,
1676 strlen (states[i]->proof),
1677 &states[i]->hash);
1678 }
1679 }
1680
1681 /* complete regex for whole DFA: union of all pairs (start state/accepting
1682 * state(s)). */
1683 sb_init (&complete_regex, 16 * n);
1684 for (i = 0; i < n; i++)
1685 {
1686 if (states[i]->accepting)
1687 {
1688 if ((0 == complete_regex.slen) &&
1689 (0 < R_last[a->start->dfs_id * n + i].slen))
1690 {
1691 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1692 }
1693 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1694 (0 < R_last[a->start->dfs_id * n + i].slen))
1695 {
1696 sb_append_cstr (&complete_regex, "|");
1697 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1698 }
1699 }
1700 }
1701 a->canonical_regex =
1702 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1703
1704 /* cleanup */
1705 sb_free (&complete_regex);
1706 for (i = 0; i < n; i++)
1707 for (j = 0; j < n; j++)
1708 {
1709 sb_free (&R_cur[i * n + j]);
1710 sb_free (&R_last[i * n + j]);
1711 }
1712 GNUNET_free (R_cur);
1713 GNUNET_free (R_last);
1714 return GNUNET_OK;
1715}
1716
1717
1727static struct REGEX_INTERNAL_State *
1729 struct REGEX_INTERNAL_StateSet *nfa_states)
1730{
1731 struct REGEX_INTERNAL_State *s;
1732 char *pos;
1733 size_t len;
1734 struct REGEX_INTERNAL_State *cstate;
1735 struct REGEX_INTERNAL_Transition *ctran;
1736 unsigned int i;
1737
1738 s = GNUNET_new (struct REGEX_INTERNAL_State);
1739 s->id = ctx->state_id++;
1740 s->index = -1;
1741 s->lowlink = -1;
1742
1743 if (NULL == nfa_states)
1744 {
1745 GNUNET_asprintf (&s->name, "s%i", s->id);
1746 return s;
1747 }
1748
1749 s->nfa_set = *nfa_states;
1750
1751 if (nfa_states->off < 1)
1752 return s;
1753
1754 /* Create a name based on 'nfa_states' */
1755 len = nfa_states->off * 14 + 4;
1756 s->name = GNUNET_malloc (len);
1757 strcat (s->name, "{");
1758 pos = s->name + 1;
1759
1760 for (i = 0; i < nfa_states->off; i++)
1761 {
1762 cstate = nfa_states->states[i];
1763 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1764 pos += strlen (pos);
1765
1766 /* Add a transition for each distinct label to NULL state */
1767 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1768 if (NULL != ctran->label)
1769 state_add_transition (ctx, s, ctran->label, NULL);
1770
1771 /* If the nfa_states contain an accepting state, the new dfa state is also
1772 * accepting. */
1773 if (cstate->accepting)
1774 s->accepting = 1;
1775 }
1776 pos[-1] = '}';
1777 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1778
1779 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1780 return s;
1781}
1782
1783
1797static unsigned int
1798dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1799{
1801 struct REGEX_INTERNAL_State *new_s;
1802 unsigned int len;
1803 unsigned int max_len;
1804
1805 if (NULL == s)
1806 return 0;
1807
1808 new_s = NULL;
1809 max_len = 0;
1810 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1811 {
1812 len = strlen (t->label);
1813
1814 if (0 == strncmp (t->label, str, len))
1815 {
1816 if (len >= max_len)
1817 {
1818 max_len = len;
1819 new_s = t->to_state;
1820 }
1821 }
1822 }
1823
1824 *s = new_s;
1825 return max_len;
1826}
1827
1828
1838static void
1839mark_states (void *cls,
1840 const unsigned int count,
1841 struct REGEX_INTERNAL_State *s)
1842{
1843 s->marked = GNUNET_YES;
1844}
1845
1846
1853static void
1855{
1856 struct REGEX_INTERNAL_State *s;
1857 struct REGEX_INTERNAL_State *s_next;
1858
1859 /* 1. unmark all states */
1860 for (s = a->states_head; NULL != s; s = s->next)
1861 s->marked = GNUNET_NO;
1862
1863 /* 2. traverse dfa from start state and mark all visited states */
1865 a->start,
1866 NULL,
1867 NULL,
1868 &mark_states,
1869 NULL);
1870
1871 /* 3. delete all states that were not visited */
1872 for (s = a->states_head; NULL != s; s = s_next)
1873 {
1874 s_next = s->next;
1875 if (GNUNET_NO == s->marked)
1877 }
1878}
1879
1880
1887static void
1889{
1890 struct REGEX_INTERNAL_State *s;
1891 struct REGEX_INTERNAL_State *s_next;
1893 int dead;
1894
1895 GNUNET_assert (DFA == a->type);
1896
1897 for (s = a->states_head; NULL != s; s = s_next)
1898 {
1899 s_next = s->next;
1900
1901 if (s->accepting)
1902 continue;
1903
1904 dead = 1;
1905 for (t = s->transitions_head; NULL != t; t = t->next)
1906 {
1907 if ((NULL != t->to_state) && (t->to_state != s) )
1908 {
1909 dead = 0;
1910 break;
1911 }
1912 }
1913
1914 if (0 == dead)
1915 continue;
1916
1917 /* state s is dead, remove it */
1919 }
1920}
1921
1922
1930static int
1932 struct REGEX_INTERNAL_Automaton *a)
1933{
1934 uint32_t *table;
1935 struct REGEX_INTERNAL_State *s1;
1936 struct REGEX_INTERNAL_State *s2;
1937 struct REGEX_INTERNAL_Transition *t1;
1938 struct REGEX_INTERNAL_Transition *t2;
1939 struct REGEX_INTERNAL_State *s1_next;
1940 struct REGEX_INTERNAL_State *s2_next;
1941 int change;
1942 unsigned int num_equal_edges;
1943 unsigned int i;
1944 unsigned int state_cnt;
1945 unsigned long long idx;
1946 unsigned long long idx1;
1947
1948 if ((NULL == a) || (0 == a->state_count))
1949 {
1951 "Could not merge nondistinguishable states, automaton was NULL.\n");
1952 return GNUNET_SYSERR;
1953 }
1954
1955 state_cnt = a->state_count;
1957 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1958 if (NULL == table)
1959 {
1961 return GNUNET_SYSERR;
1962 }
1963
1964 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1965 s1->marked = i++;
1966
1967 /* Mark all pairs of accepting/!accepting states */
1968 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1969 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1970 if ((s1->accepting && ! s2->accepting) ||
1971 (! s1->accepting && s2->accepting))
1972 {
1973 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1974 table[idx / 32] |= (1U << (idx % 32));
1975 }
1976
1977 /* Find all equal states */
1978 change = 1;
1979 while (0 != change)
1980 {
1981 change = 0;
1982 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1983 {
1984 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1985 {
1986 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987 if (0 != (table[idx / 32] & (1U << (idx % 32))))
1988 continue;
1989 num_equal_edges = 0;
1990 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
1991 {
1992 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
1993 {
1994 if (0 == strcmp (t1->label, t2->label))
1995 {
1996 num_equal_edges++;
1997 /* same edge, but targets definitively different, so we're different
1998 as well */
1999 if (t1->to_state->marked > t2->to_state->marked)
2000 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2001 + t2->to_state->marked;
2002 else
2003 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2004 + t1->to_state->marked;
2005 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2006 {
2007 table[idx / 32] |= (1U << (idx % 32));
2008 change = 1; /* changed a marker, need to run again */
2009 }
2010 }
2011 }
2012 }
2013 if ((num_equal_edges != s1->transition_count) ||
2014 (num_equal_edges != s2->transition_count))
2015 {
2016 /* Make sure ALL edges of possible equal states are the same */
2017 table[idx / 32] |= (1U << (idx % 32));
2018 change = 1; /* changed a marker, need to run again */
2019 }
2020 }
2021 }
2022 }
2023
2024 /* Merge states that are equal */
2025 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2026 {
2027 s1_next = s1->next;
2028 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2029 {
2030 s2_next = s2->next;
2031 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2032 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2033 automaton_merge_states (ctx, a, s1, s2);
2034 }
2035 }
2036
2038 return GNUNET_OK;
2039}
2040
2041
2050static int
2052 struct REGEX_INTERNAL_Automaton *a)
2053{
2054 if (NULL == a)
2055 return GNUNET_SYSERR;
2056
2057 GNUNET_assert (DFA == a->type);
2058
2059 /* 1. remove unreachable states */
2061
2062 /* 2. remove dead states */
2064
2065 /* 3. Merge nondistinguishable states */
2067 return GNUNET_SYSERR;
2068 return GNUNET_OK;
2069}
2070
2071
2076{
2080 const unsigned int stride;
2081
2087
2092};
2093
2094
2105static void
2107 const unsigned int depth,
2108 char *label,
2110 struct REGEX_INTERNAL_State *s)
2111{
2112 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2114 char *new_label;
2115
2116 if (depth == ctx->stride)
2117 {
2119 t->label = GNUNET_strdup (label);
2120 t->to_state = s;
2121 t->from_state = start;
2122 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2123 ctx->transitions_tail,
2124 t);
2125 }
2126 else
2127 {
2128 for (t = s->transitions_head; NULL != t; t = t->next)
2129 {
2130 /* Do not consider self-loops, because it end's up in too many
2131 * transitions */
2132 if (t->to_state == t->from_state)
2133 continue;
2134
2135 if (NULL != label)
2136 {
2137 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2138 }
2139 else
2140 new_label = GNUNET_strdup (t->label);
2141
2143 (depth + 1),
2144 new_label,
2145 start,
2146 t->to_state);
2147 }
2148 }
2150}
2151
2152
2161static void
2163 const unsigned int count,
2164 struct REGEX_INTERNAL_State *s)
2165{
2166 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2167}
2168
2169
2177void
2179 struct REGEX_INTERNAL_Automaton *dfa,
2180 const unsigned int stride_len)
2181{
2182 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2184 struct REGEX_INTERNAL_Transition *t_next;
2185
2186 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2187 return;
2188
2189 /* Compute the new transitions of given stride_len */
2191 dfa->start,
2192 NULL,
2193 NULL,
2195 &ctx);
2196
2197 /* Add all the new transitions to the automaton. */
2198 for (t = ctx.transitions_head; NULL != t; t = t_next)
2199 {
2200 t_next = t->next;
2201 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2202 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2203 GNUNET_free (t->label);
2204 GNUNET_free (t);
2205 }
2206
2207 /* Mark this automaton as multistrided */
2209}
2210
2211
2225void
2228 struct REGEX_INTERNAL_State *cur,
2229 char *label,
2230 unsigned int max_len,
2231 struct REGEX_INTERNAL_Transition **transitions_head,
2232 struct REGEX_INTERNAL_Transition **transitions_tail)
2233{
2235 char *new_label;
2236
2237
2238 if ((NULL != label) &&
2239 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2240 cur->accepting) ||
2241 (GNUNET_YES == cur->marked) ) ||
2242 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2243 label))) ||
2244 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2245 label)))))
2246 {
2248 t->label = GNUNET_strdup (label);
2249 t->to_state = cur;
2250 t->from_state = start;
2251 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2252
2253 if (GNUNET_NO == cur->marked)
2254 {
2256 cur,
2257 cur,
2258 NULL,
2259 max_len,
2260 transitions_head,
2261 transitions_tail);
2262 }
2263 return;
2264 }
2265 else if (cur != start)
2266 cur->contained = GNUNET_YES;
2267
2268 if ((GNUNET_YES == cur->marked) && (cur != start))
2269 return;
2270
2271 cur->marked = GNUNET_YES;
2272
2273
2274 for (t = cur->transitions_head; NULL != t; t = t->next)
2275 {
2276 if (NULL != label)
2277 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2278 else
2279 new_label = GNUNET_strdup (t->label);
2280
2281 if (t->to_state != cur)
2282 {
2284 start,
2285 t->to_state,
2286 new_label,
2287 max_len,
2288 transitions_head,
2289 transitions_tail);
2290 }
2291 GNUNET_free (new_label);
2292 }
2293}
2294
2295
2304static void
2306 struct REGEX_INTERNAL_Automaton *dfa,
2307 unsigned int max_len)
2308{
2309 struct REGEX_INTERNAL_State *s;
2310 struct REGEX_INTERNAL_State *s_next;
2312 struct REGEX_INTERNAL_Transition *t_next;
2313 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2314 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2315
2316 if (NULL == dfa)
2317 return;
2318
2319 /* Count the incoming transitions on each state. */
2320 for (s = dfa->states_head; NULL != s; s = s->next)
2321 {
2322 for (t = s->transitions_head; NULL != t; t = t->next)
2323 {
2324 if (NULL != t->to_state)
2325 t->to_state->incoming_transition_count++;
2326 }
2327 }
2328
2329 /* Unmark all states. */
2330 for (s = dfa->states_head; NULL != s; s = s->next)
2331 {
2332 s->marked = GNUNET_NO;
2333 s->contained = GNUNET_NO;
2334 }
2335
2336 /* Add strides and mark states that can be deleted. */
2338 dfa->start,
2339 dfa->start,
2340 NULL,
2341 max_len,
2342 &transitions_head,
2343 &transitions_tail);
2344
2345 /* Add all the new transitions to the automaton. */
2346 for (t = transitions_head; NULL != t; t = t_next)
2347 {
2348 t_next = t->next;
2349 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2350 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2351 GNUNET_free (t->label);
2352 GNUNET_free (t);
2353 }
2354
2355 /* Remove marked states (including their incoming and outgoing transitions). */
2356 for (s = dfa->states_head; NULL != s; s = s_next)
2357 {
2358 s_next = s->next;
2359 if (GNUNET_YES == s->contained)
2360 automaton_remove_state (dfa, s);
2361 }
2362}
2363
2364
2374static struct REGEX_INTERNAL_Automaton *
2376 struct REGEX_INTERNAL_State *end)
2377{
2378 struct REGEX_INTERNAL_Automaton *n;
2379
2381
2382 n->type = NFA;
2383 n->start = NULL;
2384 n->end = NULL;
2385 n->state_count = 0;
2386
2387 if ((NULL == start) || (NULL == end))
2388 return n;
2389
2392
2393 n->state_count = 2;
2394
2395 n->start = start;
2396 n->end = end;
2397
2398 return n;
2399}
2400
2401
2409static void
2413{
2414 struct REGEX_INTERNAL_State *s;
2415
2416 if ((NULL == n) || (NULL == states_head))
2417 {
2418 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2419 return;
2420 }
2421
2422 if (NULL == n->states_head)
2423 {
2424 n->states_head = states_head;
2425 n->states_tail = states_tail;
2426 return;
2427 }
2428
2429 if (NULL != states_head)
2430 {
2431 n->states_tail->next = states_head;
2432 n->states_tail = states_tail;
2433 }
2434
2435 for (s = states_head; NULL != s; s = s->next)
2436 n->state_count++;
2437}
2438
2439
2448static struct REGEX_INTERNAL_State *
2450{
2451 struct REGEX_INTERNAL_State *s;
2452
2453 s = GNUNET_new (struct REGEX_INTERNAL_State);
2454 s->id = ctx->state_id++;
2455 s->accepting = accepting;
2456 s->marked = GNUNET_NO;
2457 s->contained = 0;
2458 s->index = -1;
2459 s->lowlink = -1;
2460 s->scc_id = 0;
2461 s->name = NULL;
2462 GNUNET_asprintf (&s->name, "s%i", s->id);
2463
2464 return s;
2465}
2466
2467
2477static void
2479 struct REGEX_INTERNAL_Automaton *nfa,
2480 struct REGEX_INTERNAL_StateSet *states,
2481 const char *label)
2482{
2483 struct REGEX_INTERNAL_State *s;
2484 unsigned int i;
2485 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2486 struct REGEX_INTERNAL_State *clsstate;
2487 struct REGEX_INTERNAL_State *currentstate;
2488 struct REGEX_INTERNAL_Transition *ctran;
2489
2490 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2491 if (NULL == states)
2492 return;
2493
2494 for (i = 0; i < states->off; i++)
2495 {
2496 s = states->states[i];
2497
2498 /* Add start state to closure only for epsilon closure */
2499 if (NULL == label)
2500 state_set_append (ret, s);
2501
2502 /* initialize work stack */
2503 cls_stack.head = NULL;
2504 cls_stack.tail = NULL;
2505 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2506 cls_stack.len = 1;
2507
2508 while (NULL != (currentstate = cls_stack.tail))
2509 {
2511 cls_stack.head,
2512 cls_stack.tail,
2513 currentstate);
2514 cls_stack.len--;
2515 for (ctran = currentstate->transitions_head; NULL != ctran;
2516 ctran = ctran->next)
2517 {
2518 if (NULL == (clsstate = ctran->to_state))
2519 continue;
2520 if (0 != clsstate->contained)
2521 continue;
2522 if (0 != nullstrcmp (label, ctran->label))
2523 continue;
2524 state_set_append (ret, clsstate);
2526 cls_stack.head,
2527 cls_stack.tail,
2528 clsstate);
2529 cls_stack.len++;
2530 clsstate->contained = 1;
2531 }
2532 }
2533 }
2534 for (i = 0; i < ret->off; i++)
2535 ret->states[i]->contained = 0;
2536
2537 if (ret->off > 1)
2538 qsort (ret->states,
2539 ret->off,
2540 sizeof(struct REGEX_INTERNAL_State *),
2541 &state_compare);
2542}
2543
2544
2550static void
2552{
2553 struct REGEX_INTERNAL_Automaton *a;
2554 struct REGEX_INTERNAL_Automaton *b;
2555 struct REGEX_INTERNAL_Automaton *new_nfa;
2556
2557 b = ctx->stack_tail;
2558 GNUNET_assert (NULL != b);
2559 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2560 a = ctx->stack_tail;
2561 GNUNET_assert (NULL != a);
2562 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2563
2564 state_add_transition (ctx, a->end, NULL, b->start);
2565 a->end->accepting = 0;
2566 b->end->accepting = 1;
2567
2568 new_nfa = nfa_fragment_create (NULL, NULL);
2569 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2570 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2571 new_nfa->start = a->start;
2572 new_nfa->end = b->end;
2573 new_nfa->state_count += a->state_count + b->state_count;
2576
2577 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2578}
2579
2580
2586static void
2588{
2589 struct REGEX_INTERNAL_Automaton *a;
2590 struct REGEX_INTERNAL_Automaton *new_nfa;
2592 struct REGEX_INTERNAL_State *end;
2593
2594 a = ctx->stack_tail;
2595
2596 if (NULL == a)
2597 {
2598 GNUNET_log (
2600 "nfa_add_star_op failed, because there was no element on the stack");
2601 return;
2602 }
2603
2604 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2605
2606 start = nfa_state_create (ctx, 0);
2607 end = nfa_state_create (ctx, 1);
2608
2609 state_add_transition (ctx, start, NULL, a->start);
2611 state_add_transition (ctx, a->end, NULL, a->start);
2612 state_add_transition (ctx, a->end, NULL, end);
2613
2614 a->end->accepting = 0;
2615 end->accepting = 1;
2616
2617 new_nfa = nfa_fragment_create (start, end);
2618 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2620
2621 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2622}
2623
2624
2630static void
2632{
2633 struct REGEX_INTERNAL_Automaton *a;
2634
2635 a = ctx->stack_tail;
2636
2637 if (NULL == a)
2638 {
2639 GNUNET_log (
2641 "nfa_add_plus_op failed, because there was no element on the stack");
2642 return;
2643 }
2644
2645 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2646
2647 state_add_transition (ctx, a->end, NULL, a->start);
2648
2649 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2650}
2651
2652
2658static void
2660{
2661 struct REGEX_INTERNAL_Automaton *a;
2662 struct REGEX_INTERNAL_Automaton *new_nfa;
2664 struct REGEX_INTERNAL_State *end;
2665
2666 a = ctx->stack_tail;
2667 if (NULL == a)
2668 {
2669 GNUNET_log (
2671 "nfa_add_question_op failed, because there was no element on the stack");
2672 return;
2673 }
2674
2675 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2676
2677 start = nfa_state_create (ctx, 0);
2678 end = nfa_state_create (ctx, 1);
2679
2680 state_add_transition (ctx, start, NULL, a->start);
2682 state_add_transition (ctx, a->end, NULL, end);
2683
2684 a->end->accepting = 0;
2685
2686 new_nfa = nfa_fragment_create (start, end);
2687 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2688 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2690}
2691
2692
2699static void
2701{
2702 struct REGEX_INTERNAL_Automaton *a;
2703 struct REGEX_INTERNAL_Automaton *b;
2704 struct REGEX_INTERNAL_Automaton *new_nfa;
2706 struct REGEX_INTERNAL_State *end;
2707
2708 b = ctx->stack_tail;
2709 GNUNET_assert (NULL != b);
2710 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2711 a = ctx->stack_tail;
2712 GNUNET_assert (NULL != a);
2713 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2714
2715 start = nfa_state_create (ctx, 0);
2716 end = nfa_state_create (ctx, 1);
2717 state_add_transition (ctx, start, NULL, a->start);
2718 state_add_transition (ctx, start, NULL, b->start);
2719
2720 state_add_transition (ctx, a->end, NULL, end);
2721 state_add_transition (ctx, b->end, NULL, end);
2722
2723 a->end->accepting = 0;
2724 b->end->accepting = 0;
2725 end->accepting = 1;
2726
2727 new_nfa = nfa_fragment_create (start, end);
2728 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2729 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2732
2733 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2734}
2735
2736
2743static void
2744nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2745{
2746 struct REGEX_INTERNAL_Automaton *n;
2748 struct REGEX_INTERNAL_State *end;
2749
2750 GNUNET_assert (NULL != ctx);
2751
2752 start = nfa_state_create (ctx, 0);
2753 end = nfa_state_create (ctx, 1);
2754 state_add_transition (ctx, start, label, end);
2756 GNUNET_assert (NULL != n);
2757 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2758}
2759
2760
2766static void
2768{
2769 if (NULL == ctx)
2770 {
2771 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2772 return;
2773 }
2774 ctx->state_id = 0;
2775 ctx->transition_id = 0;
2776 ctx->stack_head = NULL;
2777 ctx->stack_tail = NULL;
2778}
2779
2780
2790REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2791{
2793 struct REGEX_INTERNAL_Automaton *nfa;
2794 const char *regexp;
2795 char curlabel[2];
2796 char *error_msg;
2797 unsigned int count;
2798 unsigned int altcount;
2799 unsigned int atomcount;
2800 unsigned int poff;
2801 unsigned int psize;
2802
2803 struct
2804 {
2805 int altcount;
2806 int atomcount;
2807 } *p;
2808
2809 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2810 {
2812 "Could not parse regex. Empty regex string provided.\n");
2813
2814 return NULL;
2815 }
2817
2818 regexp = regex;
2819 curlabel[1] = '\0';
2820 p = NULL;
2821 error_msg = NULL;
2822 altcount = 0;
2823 atomcount = 0;
2824 poff = 0;
2825 psize = 0;
2826
2827 for (count = 0; count < len && *regexp; count++, regexp++)
2828 {
2829 switch (*regexp)
2830 {
2831 case '(':
2832 if (atomcount > 1)
2833 {
2834 --atomcount;
2836 }
2837 if (poff == psize)
2838 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2839 p[poff].altcount = altcount;
2840 p[poff].atomcount = atomcount;
2841 poff++;
2842 altcount = 0;
2843 atomcount = 0;
2844 break;
2845
2846 case '|':
2847 if (0 == atomcount)
2848 {
2849 error_msg = "Cannot append '|' to nothing";
2850 goto error;
2851 }
2852 while (--atomcount > 0)
2854 altcount++;
2855 break;
2856
2857 case ')':
2858 if (0 == poff)
2859 {
2860 error_msg = "Missing opening '('";
2861 goto error;
2862 }
2863 if (0 == atomcount)
2864 {
2865 /* Ignore this: "()" */
2866 poff--;
2867 altcount = p[poff].altcount;
2868 atomcount = p[poff].atomcount;
2869 break;
2870 }
2871 while (--atomcount > 0)
2873 for (; altcount > 0; altcount--)
2875 poff--;
2876 altcount = p[poff].altcount;
2877 atomcount = p[poff].atomcount;
2878 atomcount++;
2879 break;
2880
2881 case '*':
2882 if (atomcount == 0)
2883 {
2884 error_msg = "Cannot append '*' to nothing";
2885 goto error;
2886 }
2888 break;
2889
2890 case '+':
2891 if (atomcount == 0)
2892 {
2893 error_msg = "Cannot append '+' to nothing";
2894 goto error;
2895 }
2897 break;
2898
2899 case '?':
2900 if (atomcount == 0)
2901 {
2902 error_msg = "Cannot append '?' to nothing";
2903 goto error;
2904 }
2906 break;
2907
2908 default:
2909 if (atomcount > 1)
2910 {
2911 --atomcount;
2913 }
2914 curlabel[0] = *regexp;
2915 nfa_add_label (&ctx, curlabel);
2916 atomcount++;
2917 break;
2918 }
2919 }
2920 if (0 != poff)
2921 {
2922 error_msg = "Unbalanced parenthesis";
2923 goto error;
2924 }
2925 while (--atomcount > 0)
2927 for (; altcount > 0; altcount--)
2929
2930 GNUNET_array_grow (p, psize, 0);
2931
2932 nfa = ctx.stack_tail;
2933 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2934
2935 if (NULL != ctx.stack_head)
2936 {
2937 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2938 goto error;
2939 }
2940
2941 /* Remember the regex that was used to generate this NFA */
2942 nfa->regex = GNUNET_strdup (regex);
2943
2944 /* create depth-first numbering of the states for pretty printing */
2946 NULL,
2947 NULL,
2948 NULL,
2950 NULL);
2951
2952 /* No multistriding added so far */
2954
2955 return nfa;
2956
2957error:
2958 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2959 if (NULL != error_msg)
2960 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2961
2962 GNUNET_free (p);
2963
2964 while (NULL != (nfa = ctx.stack_head))
2965 {
2966 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2968 }
2969
2970 return NULL;
2971}
2972
2973
2983static void
2985 struct REGEX_INTERNAL_Automaton *nfa,
2986 struct REGEX_INTERNAL_Automaton *dfa,
2987 struct REGEX_INTERNAL_State *dfa_state)
2988{
2989 struct REGEX_INTERNAL_Transition *ctran;
2990 struct REGEX_INTERNAL_State *new_dfa_state;
2991 struct REGEX_INTERNAL_State *state_contains;
2992 struct REGEX_INTERNAL_State *state_iter;
2993 struct REGEX_INTERNAL_StateSet tmp;
2994 struct REGEX_INTERNAL_StateSet nfa_set;
2995
2996 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2997 {
2998 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
2999 continue;
3000
3001 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3002 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3003 state_set_clear (&tmp);
3004
3005 state_contains = NULL;
3006 for (state_iter = dfa->states_head; NULL != state_iter;
3007 state_iter = state_iter->next)
3008 {
3009 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3010 {
3011 state_contains = state_iter;
3012 break;
3013 }
3014 }
3015 if (NULL == state_contains)
3016 {
3017 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3018 automaton_add_state (dfa, new_dfa_state);
3019 ctran->to_state = new_dfa_state;
3020 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3021 }
3022 else
3023 {
3024 ctran->to_state = state_contains;
3025 state_set_clear (&nfa_set);
3026 }
3027 }
3028}
3029
3030
3033 const size_t len,
3034 unsigned int max_path_len)
3035{
3037 struct REGEX_INTERNAL_Automaton *dfa;
3038 struct REGEX_INTERNAL_Automaton *nfa;
3039 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3040 struct REGEX_INTERNAL_StateSet singleton_set;
3041
3043
3044 /* Create NFA */
3045 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3046
3047 if (NULL == nfa)
3048 {
3050 "Could not create DFA, because NFA creation failed\n");
3051 return NULL;
3052 }
3053
3054 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3055 dfa->type = DFA;
3056 dfa->regex = GNUNET_strdup (regex);
3057
3058 /* Create DFA start state from epsilon closure */
3059 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3060 state_set_append (&singleton_set, nfa->start);
3061 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3062 state_set_clear (&singleton_set);
3063 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3064 automaton_add_state (dfa, dfa->start);
3065
3066 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3068
3069 /* Minimize DFA */
3070 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3071 {
3073 return NULL;
3074 }
3075
3076 /* Create proofs and hashes for all states */
3078 {
3080 return NULL;
3081 }
3082
3083 /* Compress linear DFA paths */
3084 if (1 != max_path_len)
3085 dfa_compress_paths (&ctx, dfa, max_path_len);
3086
3087 return dfa;
3088}
3089
3090
3091void
3093{
3094 struct REGEX_INTERNAL_State *s;
3095 struct REGEX_INTERNAL_State *next_state;
3096
3097 if (NULL == a)
3098 return;
3099
3100 GNUNET_free (a->regex);
3102
3103 for (s = a->states_head; NULL != s; s = next_state)
3104 {
3105 next_state = s->next;
3108 }
3109
3110 GNUNET_free (a);
3111}
3112
3113
3122static int
3123evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3124{
3125 const char *strp;
3126 struct REGEX_INTERNAL_State *s;
3127 unsigned int step_len;
3128
3129 if (DFA != a->type)
3130 {
3132 "Tried to evaluate DFA, but NFA automaton given");
3133 return -1;
3134 }
3135
3136 s = a->start;
3137
3138 /* If the string is empty but the starting state is accepting, we accept. */
3139 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3140 return 0;
3141
3142 for (strp = string; NULL != strp && *strp; strp += step_len)
3143 {
3144 step_len = dfa_move (&s, strp);
3145
3146 if (NULL == s)
3147 break;
3148 }
3149
3150 if ((NULL != s) && s->accepting)
3151 return 0;
3152
3153 return 1;
3154}
3155
3156
3164static int
3165evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3166{
3167 const char *strp;
3168 char str[2];
3169 struct REGEX_INTERNAL_State *s;
3170 struct REGEX_INTERNAL_StateSet sset;
3171 struct REGEX_INTERNAL_StateSet new_sset;
3172 struct REGEX_INTERNAL_StateSet singleton_set;
3173 unsigned int i;
3174 int result;
3175
3176 if (NFA != a->type)
3177 {
3179 "Tried to evaluate NFA, but DFA automaton given");
3180 return -1;
3181 }
3182
3183 /* If the string is empty but the starting state is accepting, we accept. */
3184 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3185 return 0;
3186
3187 result = 1;
3188 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3189 state_set_append (&singleton_set, a->start);
3190 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3191 state_set_clear (&singleton_set);
3192
3193 str[1] = '\0';
3194 for (strp = string; NULL != strp && *strp; strp++)
3195 {
3196 str[0] = *strp;
3197 nfa_closure_set_create (&new_sset, a, &sset, str);
3198 state_set_clear (&sset);
3199 nfa_closure_set_create (&sset, a, &new_sset, 0);
3200 state_set_clear (&new_sset);
3201 }
3202
3203 for (i = 0; i < sset.off; i++)
3204 {
3205 s = sset.states[i];
3206 if ((NULL != s) && (s->accepting))
3207 {
3208 result = 0;
3209 break;
3210 }
3211 }
3212
3213 state_set_clear (&sset);
3214 return result;
3215}
3216
3217
3218int
3219REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3220{
3221 int result;
3222
3223 switch (a->type)
3224 {
3225 case DFA:
3226 result = evaluate_dfa (a, string);
3227 break;
3228
3229 case NFA:
3230 result = evaluate_nfa (a, string);
3231 break;
3232
3233 default:
3235 "Evaluating regex failed, automaton has no type!\n");
3237 break;
3238 }
3239
3240 return result;
3241}
3242
3243
3244const char *
3246{
3247 if (NULL == a)
3248 return NULL;
3249
3250 return a->canonical_regex;
3251}
3252
3253
3261unsigned int
3263{
3264 unsigned int t_count;
3265 struct REGEX_INTERNAL_State *s;
3266
3267 if (NULL == a)
3268 return 0;
3269
3270 t_count = 0;
3271 for (s = a->states_head; NULL != s; s = s->next)
3272 t_count += s->transition_count;
3273
3274 return t_count;
3275}
3276
3277
3288size_t
3289REGEX_INTERNAL_get_first_key (const char *input_string,
3290 size_t string_len,
3291 struct GNUNET_HashCode *key)
3292{
3293 size_t size;
3294
3295 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3297 if (NULL == input_string)
3298 {
3299 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3300 return 0;
3301 }
3302 GNUNET_CRYPTO_hash (input_string, size, key);
3303
3304 return size;
3305}
3306
3307
3318static void
3319iterate_initial_edge (unsigned int min_len,
3320 unsigned int max_len,
3321 char *consumed_string,
3324 void *iterator_cls)
3325{
3326 char *temp;
3328 unsigned int num_edges = state->transition_count;
3329 struct REGEX_BLOCK_Edge edges[num_edges];
3330 struct REGEX_BLOCK_Edge edge[1];
3331 struct GNUNET_HashCode hash;
3332 struct GNUNET_HashCode hash_new;
3333 unsigned int cur_len;
3334
3335 if (NULL != consumed_string)
3336 cur_len = strlen (consumed_string);
3337 else
3338 cur_len = 0;
3339
3340 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3341 (cur_len > 0) && (NULL != consumed_string))
3342 {
3343 if (cur_len <= max_len)
3344 {
3345 if ((NULL != state->proof) &&
3346 (0 != strcmp (consumed_string, state->proof)))
3347 {
3348 (void) state_get_edges (state, edges);
3349 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3351 "Start state for string `%s' is %s\n",
3352 consumed_string,
3353 GNUNET_h2s (&hash));
3354 iterator (iterator_cls,
3355 &hash,
3356 consumed_string,
3357 state->accepting,
3358 num_edges,
3359 edges);
3360 }
3361
3362 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3363 (state->transition_count < 1) && (cur_len < max_len))
3364 {
3365 /* Special case for regex consisting of just a string that is shorter than
3366 * max_len */
3367 edge[0].label = &consumed_string[cur_len - 1];
3368 edge[0].destination = state->hash;
3369 temp = GNUNET_strdup (consumed_string);
3370 temp[cur_len - 1] = '\0';
3371 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3373 "Start state for short string `%s' is %s\n",
3374 temp,
3375 GNUNET_h2s (&hash_new));
3376 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3377 GNUNET_free (temp);
3378 }
3379 }
3380 else /* cur_len > max_len */
3381 {
3382 /* Case where the concatenated labels are longer than max_len, then split. */
3383 edge[0].label = &consumed_string[max_len];
3384 edge[0].destination = state->hash;
3385 temp = GNUNET_strdup (consumed_string);
3386 temp[max_len] = '\0';
3387 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3389 "Start state at split edge `%s'-`%s` is %s\n",
3390 temp,
3391 edge[0].label,
3392 GNUNET_h2s (&hash_new));
3393 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3394 GNUNET_free (temp);
3395 }
3396 }
3397
3398 if (cur_len < max_len)
3399 {
3400 for (t = state->transitions_head; NULL != t; t = t->next)
3401 {
3402 if (NULL != strchr (t->label, (int) '.'))
3403 {
3404 /* Wildcards not allowed during starting states */
3405 GNUNET_break (0);
3406 continue;
3407 }
3408 if (NULL != consumed_string)
3409 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3410 else
3411 GNUNET_asprintf (&temp, "%s", t->label);
3412 iterate_initial_edge (min_len,
3413 max_len,
3414 temp,
3415 t->to_state,
3416 iterator,
3417 iterator_cls);
3418 GNUNET_free (temp);
3419 }
3420 }
3421}
3422
3423
3432void
3435 void *iterator_cls)
3436{
3437 struct REGEX_INTERNAL_State *s;
3438
3439 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3442 NULL,
3443 a->start,
3444 iterator,
3445 iterator_cls);
3446 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3447 for (s = a->states_head; NULL != s; s = s->next)
3448 {
3449 struct REGEX_BLOCK_Edge edges[s->transition_count];
3450 unsigned int num_edges;
3451
3452 num_edges = state_get_edges (s, edges);
3453 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3454 {
3456 "Creating DFA edges at `%s' under key %s\n",
3457 s->proof,
3458 GNUNET_h2s (&s->hash));
3459 iterator (iterator_cls,
3460 &s->hash,
3461 s->proof,
3462 s->accepting,
3463 num_edges,
3464 edges);
3465 }
3466 s->marked = GNUNET_NO;
3467 }
3468}
3469
3470
3478{
3480 char *proof;
3484};
3485
3486
3491{
3494};
3495
3496
3508static void
3510 const struct GNUNET_HashCode *key,
3511 const char *proof,
3512 int accepting,
3513 unsigned int num_edges,
3514 const struct REGEX_BLOCK_Edge *edges)
3515{
3516 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3517 struct temporal_state_store *tmp;
3518 size_t edges_size;
3519
3520 tmp = GNUNET_new (struct temporal_state_store);
3521 tmp->reachable = GNUNET_NO;
3522 tmp->proof = GNUNET_strdup (proof);
3523 tmp->accepting = accepting;
3524 tmp->num_edges = num_edges;
3525 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3526 tmp->edges = GNUNET_malloc (edges_size);
3527 GNUNET_memcpy (tmp->edges, edges, edges_size);
3530 hm,
3531 key,
3532 tmp,
3534}
3535
3536
3545static void
3548{
3550 unsigned int i;
3551
3552 if (GNUNET_YES == state->reachable)
3553 /* visited */
3554 return;
3555
3556 state->reachable = GNUNET_YES;
3557 for (i = 0; i < state->num_edges; i++)
3558 {
3559 child =
3560 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3561 if (NULL == child)
3562 {
3563 GNUNET_break (0);
3564 continue;
3565 }
3567 }
3568}
3569
3570
3580static int
3582 const struct GNUNET_HashCode *key,
3583 void *value)
3584{
3585 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3587
3588 if (GNUNET_YES == state->reachable)
3589 /* already visited and marked */
3590 return GNUNET_YES;
3591
3592 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3593 (GNUNET_NO == state->accepting) )
3594 /* not directly reachable */
3595 return GNUNET_YES;
3596
3598 return GNUNET_YES;
3599}
3600
3601
3612static int
3613iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3614{
3615 struct client_iterator *ci = cls;
3617
3618 if (GNUNET_YES == state->reachable)
3619 {
3620 ci->iterator (ci->iterator_cls,
3621 key,
3622 state->proof,
3623 state->accepting,
3624 state->num_edges,
3625 state->edges);
3626 }
3627 GNUNET_free (state->edges);
3628 GNUNET_free (state->proof);
3630 return GNUNET_YES;
3631}
3632
3633
3634void
3637 void *iterator_cls)
3638{
3640 struct client_iterator ci;
3641
3643 ci.iterator = iterator;
3645
3649
3651}
3652
3653
3654/* end of regex_internal.c */
static struct GNUNET_ARM_Operation * op
Current operation.
Definition: gnunet-arm.c:144
static int start
Set if we are to start default services (including ARM).
Definition: gnunet-arm.c:39
static int ret
Final status code.
Definition: gnunet-arm.c:94
static int end
Set if we are to shutdown all services (including ARM).
Definition: gnunet-arm.c:34
struct GNUNET_HashCode key
The key used in the DHT.
static struct GNUNET_FS_Handle * ctx
static char * value
Value of the record to add/remove.
enum State state
current state of profiling
static int result
Global testing status.
static uint64_t proof
Definition: gnunet-scrypt.c:49
static pid_t child
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_SCHEDULER_Task * t
Main task.
API to access regex service to advertise capabilities via regex and discover respective peers using m...
#define GNUNET_CONTAINER_MDLL_remove(mdll, head, tail, element)
Remove an element from a MDLL.
#define GNUNET_CONTAINER_MDLL_insert_tail(mdll, head, tail, element)
Insert an element at the tail of a MDLL.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define GNUNET_CONTAINER_DLL_insert_before(head, tail, other, element)
Insert an element into a DLL before the given other element.
#define GNUNET_CONTAINER_MDLL_insert(mdll, head, tail, element)
Insert an element at the head of a MDLL.
#define GNUNET_CONTAINER_DLL_insert_tail(head, tail, element)
Insert an element at the tail of a DLL.
#define GNUNET_CONTAINER_DLL_insert(head, tail, element)
Insert an element at the head of a DLL.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST
, ' bother checking if a value already exists (faster than GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE...
#define GNUNET_log(kind,...)
#define GNUNET_MAX(a, b)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
@ GNUNET_SYSERR
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
const char * GNUNET_h2s(const struct GNUNET_HashCode *hc)
Convert a hash value to a string (for printing debug messages).
#define GNUNET_log_strerror(level, cmd)
Log an error message at log-level 'level' that indicates a failure of the command 'cmd' with the mess...
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_malloc_large(size)
Wrapper around malloc.
#define GNUNET_strndup(a, length)
Wrapper around GNUNET_xstrndup_.
int GNUNET_snprintf(char *buf, size_t size, const char *format,...) __attribute__((format(printf
Like snprintf, just aborts if the buffer is of insufficient size.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
#define GNUNET_REGEX_INITIAL_BYTES
Constant for how many bytes the initial string regex should have.
#define max(x, y)
static struct PeerEntry ** table
Table with our interned peer IDs.
Definition: peer.c:56
static unsigned int size
Size of the "table".
Definition: peer.c:68
static void sb_strdup_cstr(struct StringBuffer *out, const char *cstr)
Copy the given string buffer from 'in' to 'out'.
static struct REGEX_INTERNAL_State * dfa_state_create(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_StateSet *nfa_states)
Creates a new DFA state based on a set of NFA states.
static void state_add_transition(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_State *from_state, const char *label, struct REGEX_INTERNAL_State *to_state)
Adds a transition from one state to another on label.
static void sb_printf3(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2, const struct StringBuffer *sarg3)
Format a string buffer.
static void construct_dfa_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *dfa_state)
Create DFA states based on given 'nfa' and starting with 'dfa_state'.
static void state_set_clear(struct REGEX_INTERNAL_StateSet *set)
Clears the given StateSet 'set'.
static void nfa_add_concatenation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
static void remove_parentheses(struct StringBuffer *str)
Remove parentheses surrounding string str.
static void sb_printf1(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg)
Format a string buffer.
static struct REGEX_INTERNAL_Automaton * nfa_fragment_create(struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *end)
Creates a new NFA fragment.
static int sb_strncmp_cstr(const struct StringBuffer *str1, const char *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static void automaton_merge_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s1, struct REGEX_INTERNAL_State *s2)
Merge two states into one.
static void dfa_add_multi_strides(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function called for each state in the DFA.
static void sb_append(struct StringBuffer *ret, const struct StringBuffer *sarg)
Append a string.
void REGEX_INTERNAL_iterate_all_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges starting from start state of automaton 'a'.
static void sb_free(struct StringBuffer *sb)
Free resources of the given string buffer.
static void sb_realloc(struct StringBuffer *ret, size_t nlen)
Reallocate the buffer of 'ret' to fit 'nlen' characters; move the existing string to the beginning of...
static void nfa_add_states(struct REGEX_INTERNAL_Automaton *n, struct REGEX_INTERNAL_State *states_head, struct REGEX_INTERNAL_State *states_tail)
Adds a list of states to the given automaton 'n'.
size_t REGEX_INTERNAL_get_first_key(const char *input_string, size_t string_len, struct GNUNET_HashCode *key)
Get the first key for the given input_string.
static void state_set_append(struct REGEX_INTERNAL_StateSet *set, struct REGEX_INTERNAL_State *state)
Append state to the given StateSet.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_dfa(const char *regex, const size_t len, unsigned int max_path_len)
Construct DFA for the given 'regex' of length 'len'.
static void dfa_add_multi_strides_helper(void *cls, const unsigned int depth, char *label, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *s)
Recursive helper function to add strides to a DFA.
void REGEX_INTERNAL_automaton_traverse(const struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *start, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Traverses the given automaton using depth-first-search (DFS) from it's start state,...
static void mark_as_reachable(struct temporal_state_store *state, struct GNUNET_CONTAINER_MultiHashMap *hm)
Mark state as reachable and call recursively on all its edges.
void REGEX_INTERNAL_dfa_add_multi_strides(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, const unsigned int stride_len)
Adds multi-strided transitions to the given 'dfa'.
int REGEX_INTERNAL_eval(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given 'string' against the given compiled regex.
static void nfa_add_question_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
static int nullstrcmp(const char *str1, const char *str2)
Compare two strings for equality.
static int dfa_merge_nondistinguishable_states(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Merge all non distinguishable states in the DFA 'a'.
static int dfa_minimize(struct REGEX_INTERNAL_Context *ctx, struct REGEX_INTERNAL_Automaton *a)
Minimize the given DFA 'a' by removing all unreachable states, removing all dead states and merging a...
static void sb_append_cstr(struct StringBuffer *ret, const char *cstr)
Append a C string.
const char * REGEX_INTERNAL_get_canonical_regex(struct REGEX_INTERNAL_Automaton *a)
Get the canonical regex of the given automaton.
static int sb_nullstrcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static int automaton_create_proofs(struct REGEX_INTERNAL_Automaton *a)
Create proofs for all states in the given automaton.
static int state_compare(const void *a, const void *b)
Compare two states.
static void dfa_remove_dead_states(struct REGEX_INTERNAL_Automaton *a)
Remove all dead states from the DFA 'a'.
static int state_set_compare(struct REGEX_INTERNAL_StateSet *sset1, struct REGEX_INTERNAL_StateSet *sset2)
Compare to state sets by comparing the id's of the states that are contained in each set.
void REGEX_INTERNAL_iterate_reachable_edges(struct REGEX_INTERNAL_Automaton *a, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Iterate over all edges of automaton 'a' that are reachable from a state with a proof of at least GNUN...
static void remove_epsilon(const struct StringBuffer *str, struct StringBuffer *ret)
Remove an epsilon from the string str.
static void sb_wrap(struct StringBuffer *ret, const char *format, size_t extra_chars)
Wrap a string buffer, that is, set ret to the format string which contains an "%s" which is to be rep...
void dfa_compress_paths_helper(struct REGEX_INTERNAL_Automaton *dfa, struct REGEX_INTERNAL_State *start, struct REGEX_INTERNAL_State *cur, char *label, unsigned int max_len, struct REGEX_INTERNAL_Transition **transitions_head, struct REGEX_INTERNAL_Transition **transitions_tail)
Recursive Helper function for DFA path compression.
static void sb_strdup(struct StringBuffer *out, const struct StringBuffer *in)
Copy the given string buffer from 'in' to 'out'.
static int evaluate_dfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given DFA automaton.
static void nfa_closure_set_create(struct REGEX_INTERNAL_StateSet *ret, struct REGEX_INTERNAL_Automaton *nfa, struct REGEX_INTERNAL_StateSet *states, const char *label)
Calculates the closure set for the given set of states.
static int iterate_reachables(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries.
static void sb_printf2(struct StringBuffer *ret, const char *format, size_t extra_chars, const struct StringBuffer *sarg1, const struct StringBuffer *sarg2)
Format a string buffer.
static unsigned int state_get_edges(struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
Get all edges leaving state s.
static void automaton_create_proofs_simplify(const struct StringBuffer *R_last_ij, const struct StringBuffer *R_last_ik, const struct StringBuffer *R_last_kk, const struct StringBuffer *R_last_kj, struct StringBuffer *R_cur_ij, struct StringBuffer *R_cur_l, struct StringBuffer *R_cur_r)
Construct the regular expression given the inductive step, $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}...
static int sb_strcmp(const struct StringBuffer *s1, const struct StringBuffer *s2)
Compare two strings for equality.
static void store_all_states(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator over all edges of a dfa.
static int evaluate_nfa(struct REGEX_INTERNAL_Automaton *a, const char *string)
Evaluates the given string using the given NFA automaton.
static int sb_strncmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t n)
Compare n bytes of 'str1' and 'str2'.
static int needs_parentheses(const struct StringBuffer *str)
Check if the given string str needs parentheses around it when using it to generate a regex.
static void state_remove_transition(struct REGEX_INTERNAL_State *state, struct REGEX_INTERNAL_Transition *transition)
Remove a 'transition' from 'state'.
static struct REGEX_INTERNAL_State * nfa_state_create(struct REGEX_INTERNAL_Context *ctx, int accepting)
Creates a new NFA state.
static void iterate_initial_edge(unsigned int min_len, unsigned int max_len, char *consumed_string, struct REGEX_INTERNAL_State *state, REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
Recursive function that calls the iterator for each synthetic start state.
static void mark_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Set the given state 'marked' to GNUNET_YES.
static void automaton_add_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Add a state to the automaton 'a', always use this function to alter the states DLL of the automaton.
static void dfa_remove_unreachable_states(struct REGEX_INTERNAL_Automaton *a)
Remove all unreachable states from DFA 'a'.
void REGEX_INTERNAL_automaton_destroy(struct REGEX_INTERNAL_Automaton *a)
Free the memory allocated by constructing the REGEX_INTERNAL_Automaton.
struct REGEX_INTERNAL_Automaton * REGEX_INTERNAL_construct_nfa(const char *regex, const size_t len)
Construct an NFA by parsing the regex string of length 'len'.
unsigned int REGEX_INTERNAL_get_transition_count(struct REGEX_INTERNAL_Automaton *a)
Get the number of transitions that are contained in the given automaton.
static void nfa_add_star_op(struct REGEX_INTERNAL_Context *ctx)
Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
static int has_epsilon(const struct StringBuffer *str)
Check if the string 'str' starts with an epsilon (empty string).
static void automaton_destroy_state(struct REGEX_INTERNAL_State *s)
Frees the memory used by State s.
static unsigned int dfa_move(struct REGEX_INTERNAL_State **s, const char *str)
Move from the given state 's' to the next state on transition 'str'.
static void nfa_add_alternation(struct REGEX_INTERNAL_Context *ctx)
Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that alternates between a an...
static void sb_init(struct StringBuffer *sb, size_t n)
Initialize string buffer for storing strings of up to n characters.
static void nfa_add_label(struct REGEX_INTERNAL_Context *ctx, const char *label)
Adds a new nfa fragment to the stack.
static int sb_strkcmp(const struct StringBuffer *str1, const struct StringBuffer *str2, size_t k)
Compare 'str1', starting from position 'k', with whole 'str2'.
static void automaton_state_traverse(struct REGEX_INTERNAL_State *s, int *marks, unsigned int *count, REGEX_INTERNAL_traverse_check check, void *check_cls, REGEX_INTERNAL_traverse_action action, void *action_cls)
Depth-first traversal (DFS) of all states that are reachable from state 's'.
static void automaton_remove_state(struct REGEX_INTERNAL_Automaton *a, struct REGEX_INTERNAL_State *s)
Remove a state from the given automaton 'a'.
static void dfa_compress_paths(struct REGEX_INTERNAL_Context *regex_ctx, struct REGEX_INTERNAL_Automaton *dfa, unsigned int max_len)
Compress paths in the given 'dfa'.
static void number_states(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' function to create the depth-...
static void REGEX_INTERNAL_context_init(struct REGEX_INTERNAL_Context *ctx)
Initialize a new context.
static void nfa_add_plus_op(struct REGEX_INTERNAL_Context *ctx)
Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
static void automaton_fragment_clear(struct REGEX_INTERNAL_Automaton *a)
Clears an automaton fragment.
static int reachability_iterator(void *cls, const struct GNUNET_HashCode *key, void *value)
Iterator over hash map entries to mark the ones that are reachable.
int(* REGEX_INTERNAL_traverse_check)(void *cls, struct REGEX_INTERNAL_State *s, struct REGEX_INTERNAL_Transition *t)
Function that gets passed to automaton traversal and is called before each next traversal from state ...
void(* REGEX_INTERNAL_traverse_action)(void *cls, const unsigned int count, struct REGEX_INTERNAL_State *s)
Function that is called with each state, when traversing an automaton.
@ NFA
@ DFA
library to parse regular expressions into dfa
void(* REGEX_INTERNAL_KeyIterator)(void *cls, const struct GNUNET_HashCode *key, const char *proof, int accepting, unsigned int num_edges, const struct REGEX_BLOCK_Edge *edges)
Iterator callback function.
Internal representation of the hash map.
A 512-bit hashcode.
struct GNUNET_SCHEDULER_Task * next
This is a linked list.
Definition: scheduler.c:140
Edge representation.
const char * label
Label of the edge.
struct GNUNET_HashCode destination
Destination of the edge.
Automaton representation.
struct REGEX_INTERNAL_State * start
First state of the automaton.
char * canonical_regex
Canonical regex (result of RX->NFA->DFA->RX)
int is_multistrided
GNUNET_YES, if multi strides have been added to the Automaton.
struct REGEX_INTERNAL_State * states_tail
DLL of states.
unsigned int state_count
Number of states in the automaton.
struct REGEX_INTERNAL_State * states_head
DLL of states.
struct REGEX_INTERNAL_State * end
End state of the partial NFA.
enum REGEX_INTERNAL_AutomatonType type
Type of the automaton.
Context that contains an id counter for states and transitions as well as a DLL of automatons used as...
Set of states using MDLL API.
struct REGEX_INTERNAL_State * head
MDLL of states.
struct REGEX_INTERNAL_State * tail
MDLL of states.
unsigned int len
Length of the MDLL.
unsigned int size
Length of the 'states' array.
struct REGEX_INTERNAL_State ** states
Array of states.
unsigned int off
Number of entries in use in the 'states' array.
unsigned int incoming_transition_count
Number of incoming transitions.
struct REGEX_INTERNAL_State * next
This is a linked list to keep states in an automaton.
char * proof
Proof for this state.
unsigned int transition_count
Number of transitions from this state to other states.
unsigned int traversal_id
Unique state id that is used for traversing the automaton.
struct REGEX_INTERNAL_StateSet nfa_set
Set of states on which this state is based on.
unsigned int dfs_id
Linear state ID acquired by depth-first-search.
char * name
Human readable name of the state.
int marked
Marking of the state.
unsigned int id
Unique state id.
struct REGEX_INTERNAL_Transition * transitions_tail
DLL of transitions.
int accepting
If this is an accepting state or not.
int lowlink
Used for SCC detection.
int contained
Marking the state as contained.
struct REGEX_INTERNAL_Transition * transitions_head
DLL of transitions.
int index
Used for SCC detection.
unsigned int scc_id
Marking the state as part of an SCC (Strongly Connected Component).
Context for adding strided transitions to a DFA.
struct REGEX_INTERNAL_Transition * transitions_tail
Strided transitions DLL.
struct REGEX_INTERNAL_Transition * transitions_head
Strided transitions DLL.
const unsigned int stride
Length of the strides.
Transition between two states.
struct REGEX_INTERNAL_State * to_state
State to which this transition leads.
struct REGEX_INTERNAL_Transition * next
This is a linked list.
struct REGEX_INTERNAL_State * from_state
State from which this transition origins.
char * label
Label for this transition.
String container for faster string operations.
int16_t null_flag
Buffer currently represents "NULL" (not the empty string!)
char * sbuf
Buffer holding the string (may start in the middle!); NOT 0-terminated!
size_t slen
Length of the string in the buffer.
unsigned int blen
Number of bytes allocated for sbuf.
char * abuf
Allocated buffer.
int16_t synced
If this entry is part of the last/current generation array, this flag is GNUNET_YES if the last and c...
Store regex iterator and cls in one place to pass to the hashmap iterator.
REGEX_INTERNAL_KeyIterator iterator
Struct to hold all the relevant state information in the HashMap.
struct REGEX_BLOCK_Edge * edges