GNUnet 0.22.2
regex_test_lib.c
Go to the documentation of this file.
1/*
2 * This file is part of GNUnet
3 * Copyright (C) 2012-2017 GNUnet e.V.
4 *
5 * GNUnet is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU Affero General Public License as published
7 * by the Free Software Foundation, either version 3 of the License,
8 * or (at your option) any later version.
9 *
10 * GNUnet is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Affero General Public License for more details.
14 *
15 * You should have received a copy of the GNU Affero General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
28#include "platform.h"
29#include "gnunet_util_lib.h"
30#include "regex_test_lib.h"
31
36{
41
45 unsigned int size;
46
50 char *s;
51};
52
53
64static int
65c2i (char c, int size)
66{
67 switch (size)
68 {
69 case 2:
70 case 8:
71 return c - '0';
72 break;
73
74 case 16:
75 if ((c >= '0') && (c <= '9') )
76 return c - '0';
77 else if ((c >= 'A') && (c <= 'F') )
78 return c - 'A' + 10;
79 else if ((c >= 'a') && (c <= 'f') )
80 return c - 'a' + 10;
81 else
82 {
84 "Cannot convert char %c in base %u\n",
85 c, size);
86 GNUNET_assert (0);
87 }
88 break;
89
90 default:
91 GNUNET_assert (0);
92 }
93}
94
95
96#if DEBUG_REGEX
102static void
103space (int n)
104{
105 for (int i = 0; i < n; i++)
106 fprintf (stderr, "| ");
107}
108
109
110#endif
111
112
119static void
120debugctx (struct RegexCombineCtx *ctx, int level)
121{
122#if DEBUG_REGEX
123 if (NULL != ctx->s)
124 {
125 space (level - 1);
126 fprintf (stderr, "%u:'%s'\n", c2i (ctx->s[0], ctx->size), ctx->s);
127 }
128 else
129 fprintf (stderr, "ROOT (base %u)\n", ctx->size);
130 for (unsigned int i = 0; i < ctx->size; i++)
131 {
132 if (NULL != ctx->children[i])
133 {
134 space (level);
135 debugctx (ctx->children[i], level + 1);
136 }
137 }
138 fflush (stderr);
139#endif
140}
141
142
149static void
151 const char *regex);
152
153
159static struct RegexCombineCtx *
160new_regex_ctx (unsigned int alphabet_size)
161{
162 struct RegexCombineCtx *ctx;
163 size_t array_size;
164
165 array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
166 ctx = GNUNET_new (struct RegexCombineCtx);
167 ctx->children = GNUNET_malloc (array_size);
168 ctx->size = alphabet_size;
169
170 return ctx;
171}
172
173
174static void
176 const struct RegexCombineCtx *src)
177{
178 size_t array_size;
179
180 array_size = sizeof(struct RegexCombineCtx *) * src->size;
182 src->children,
183 array_size);
184 for (unsigned int i = 0; i < src->size; i++)
185 {
186 src->children[i] = NULL;
187 }
188}
189
190
198static char *
200{
201 struct RegexCombineCtx *p;
202 unsigned int i;
203 size_t len;
204 char *regex;
205 char *tmp;
206 char *s;
207 int opt;
208
209 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
210 regex = GNUNET_strdup ("");
211 opt = GNUNET_NO;
212 for (i = 0; i < ctx->size; i++)
213 {
214 p = ctx->children[i];
215 if (NULL == p)
216 continue;
218 "adding '%s' to innner %s\n",
219 p->s, ctx->s);
220 s = regex_combine (p);
221 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
222 if (strlen (s) == 0)
223 {
224 opt = GNUNET_YES;
225 }
226 else
227 {
228 GNUNET_asprintf (&tmp, "%s%s|", regex, s);
229 GNUNET_free (regex);
230 regex = tmp;
231 }
232 GNUNET_free (s);
233 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex,
234 ctx->s);
235 }
236
237 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
238 len = strlen (regex);
239 if (0 == len)
240 {
241 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
242 GNUNET_free (regex);
243 return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
244 }
245
246 if ('|' == regex[len - 1])
247 regex[len - 1] = '\0';
248
249 if (NULL != ctx->s)
250 {
251 if (opt)
252 GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
253 else
254 GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
255 GNUNET_free (regex);
256 regex = s;
257 }
258
259 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
260 return regex;
261}
262
263
272static unsigned int
273get_prefix_length (const char *s1, const char *s2)
274{
275 unsigned int l1;
276 unsigned int l2;
277 unsigned int limit;
278 unsigned int i;
279
280 l1 = strlen (s1);
281 l2 = strlen (s2);
282 limit = l1 > l2 ? l2 : l1;
283
284 for (i = 0; i < limit; i++)
285 {
286 if (s1[i] != s2[i])
287 return i;
288 }
289 return limit;
290}
291
292
302static struct RegexCombineCtx *
303get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
304{
305 struct RegexCombineCtx *p;
306 struct RegexCombineCtx *best;
307 unsigned int i;
308 unsigned int l;
309 unsigned int best_l;
310
311 best_l = 0;
312 best = NULL;
313
314 for (i = 0; i < ctx->size; i++)
315 {
316 p = ctx->children[i];
317 if (NULL == p)
318 continue;
319
320 l = get_prefix_length (p->s, regex);
321 if (l > best_l)
322 {
323 GNUNET_break (0 == best_l);
324 best = p;
325 best_l = l;
326 }
327 }
328 return best;
329}
330
331
332static void
334 const char *regex,
335 struct RegexCombineCtx **children)
336{
337 char tmp[2];
338 long unsigned int i;
339 size_t l;
340 struct RegexCombineCtx *newctx;
341 unsigned int count;
342
343 if ('(' != regex[0])
344 {
345 GNUNET_assert (0);
346 }
347
348 /* Does the regex cover *all* possible children? Then don't add any,
349 * as it will be covered by the post-regex "(a-z)*"
350 */
351 l = strlen (regex);
352 count = 0;
353 for (i = 1UL; i < l; i++)
354 {
355 if ((regex[i] != '|') && (regex[i] != ')') )
356 {
357 count++;
358 }
359 }
360 if (count == ctx->size)
361 {
362 return;
363 }
364
365 /* Add every component as a child node */
366 tmp[1] = '\0';
367 for (i = 1UL; i < l; i++)
368 {
369 if ((regex[i] != '|') && (regex[i] != ')') )
370 {
371 tmp[0] = regex[i];
372 newctx = new_regex_ctx (ctx->size);
373 newctx->s = GNUNET_strdup (tmp);
374 if (children != NULL)
375 GNUNET_memcpy (newctx->children,
376 children,
377 sizeof(*children) * ctx->size);
378 ctx->children[c2i (tmp[0], ctx->size)] = newctx;
379 }
380 }
381}
382
383
394static void
396 unsigned int len,
397 unsigned int prefix_l)
398{
399 struct RegexCombineCtx *newctx;
400 unsigned int idx;
401 char *suffix;
402
403 suffix = GNUNET_malloc (len - prefix_l + 1);
404 /*
405 * We can use GNUNET_strlcpy because ctx->s is null-terminated
406 */
407 GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
408
409 /* Suffix saved, truncate current node so it only contains the prefix,
410 * copy any children nodes to put as grandchildren and initialize new empty
411 * children array.
412 */
413 ctx->s[prefix_l] = '\0';
414
415 /* If the suffix is an OR expression, add multiple children */
416 if ('(' == suffix[0])
417 {
418 struct RegexCombineCtx **tmp;
419
420 tmp = ctx->children;
421 ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
422 regex_add_multiple (ctx, suffix, tmp);
423 GNUNET_free (suffix);
424 GNUNET_free (tmp);
425 return;
426 }
427
428 /* The suffix is a normal string, add as one node */
429 newctx = new_regex_ctx (ctx->size);
430 newctx->s = suffix;
431 move_children (newctx, ctx);
432 idx = c2i (suffix[0], ctx->size);
433 ctx->children[idx] = newctx;
434}
435
436
443static void
444regex_add (struct RegexCombineCtx *ctx, const char *regex)
445{
446 struct RegexCombineCtx *p;
447 struct RegexCombineCtx *newctx;
448 long unsigned int l;
449 unsigned int prefix_l;
450 const char *rest_r;
451 const char *rest_s;
452 size_t len;
453 int idx;
454
456 "regex_add '%s' into '%s'\n",
457 regex, ctx->s);
458 l = strlen (regex);
459 if (0UL == l)
460 return;
461
462 /* If the regex is in the form of (a|b|c), add every character separately */
463 if ('(' == regex[0])
464 {
465 regex_add_multiple (ctx, regex, NULL);
466 return;
467 }
468
469 p = get_longest_prefix (ctx, regex);
470 if (NULL != p)
471 {
472 /* There is some prefix match, reduce regex and try again */
473 prefix_l = get_prefix_length (p->s, regex);
474 rest_s = &p->s[prefix_l];
475 rest_r = &regex[prefix_l];
476 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
477 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
478 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
479 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
480 len = strlen (p->s);
481 if (prefix_l < len)
482 {
483 regex_split (p, len, prefix_l);
484 }
485 regex_add (p, rest_r);
486 return;
487 }
488
489 /* There is no prefix match, add new */
490 idx = c2i (regex[0], ctx->size);
491 if ((NULL == ctx->children[idx]) && (NULL != ctx->s))
492 {
493 /* this was the end before, add empty string */
494 newctx = new_regex_ctx (ctx->size);
495 newctx->s = GNUNET_strdup ("");
496 ctx->children[idx] = newctx;
497 }
498 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
499 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
500 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
501 newctx = new_regex_ctx (ctx->size);
502 newctx->s = GNUNET_strdup (regex);
503 ctx->children[idx] = newctx;
504}
505
506
512static void
514{
515 unsigned int i;
516
517 if (NULL == ctx)
518 return;
519
520 for (i = 0; i < ctx->size; i++)
521 {
522 regex_ctx_destroy (ctx->children[i]);
523 }
524 GNUNET_free (ctx->s); /* 's' on root node is null */
525 GNUNET_free (ctx->children);
527}
528
529
544char *
545REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size)
546{
547 unsigned int i;
548 char *combined;
549 const char *current;
550 struct RegexCombineCtx *ctx;
551
552 ctx = new_regex_ctx (alphabet_size);
553 for (i = 0; regexes[i]; i++)
554 {
555 current = regexes[i];
556 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
557 regex_add (ctx, current);
558 debugctx (ctx, 0);
559 }
560 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
561 debugctx (ctx, 0);
562
563 combined = regex_combine (ctx);
564
566
567 return combined;
568}
569
570
580char **
582{
584 unsigned int nr;
585 unsigned int offset;
586 off_t size;
587 size_t len;
588 char *buffer;
589 char *regex;
590 char **regexes;
591
595 if (NULL == f)
596 {
598 "Can't open file %s for reading\n", filename);
599 return NULL;
600 }
602 {
604 "Can't get size of file %s\n", filename);
606 return NULL;
607 }
609 "using file %s, size %llu\n",
610 filename, (unsigned long long) size);
611
612 buffer = GNUNET_malloc (size + 1);
613 GNUNET_DISK_file_read (f, buffer, size);
615 regexes = GNUNET_malloc (sizeof(char *));
616 nr = 1;
617 offset = 0;
618 regex = NULL;
619 do
620 {
621 if (NULL == regex)
622 regex = GNUNET_malloc (size + 1);
623 len = (size_t) sscanf (&buffer[offset], "%s", regex);
624 if (0 == len)
625 break;
626 len = strlen (regex);
627 offset += len + 1;
628 if (len < 1)
629 continue;
630 regex[len] = '\0';
631 regex = GNUNET_realloc (regex, len + 1);
632 GNUNET_array_grow (regexes, nr, nr + 1);
633 GNUNET_assert (NULL == regexes[nr - 2]);
634 regexes[nr - 2] = regex;
635 regexes[nr - 1] = NULL;
636 regex = NULL;
637 }
638 while (offset < size);
639 GNUNET_free (regex);
640 GNUNET_free (buffer);
641
642 return regexes;
643}
644
645
651void
653{
654 unsigned int i;
655
656 for (i = 0; regexes[i]; i++)
657 GNUNET_free (regexes[i]);
658 GNUNET_free (regexes);
659}
660
661
662/* end of regex_test_lib.c */
static struct GNUNET_FS_Handle * ctx
static char * filename
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
struct GNUNET_DISK_FileHandle * GNUNET_DISK_file_open(const char *fn, enum GNUNET_DISK_OpenFlags flags, enum GNUNET_DISK_AccessPermissions perm)
Open a file.
Definition: disk.c:1238
enum GNUNET_GenericReturnValue GNUNET_DISK_file_close(struct GNUNET_DISK_FileHandle *h)
Close an open file.
Definition: disk.c:1309
ssize_t GNUNET_DISK_file_read(const struct GNUNET_DISK_FileHandle *h, void *result, size_t len)
Read the contents of a binary file into a buffer.
Definition: disk.c:623
enum GNUNET_GenericReturnValue GNUNET_DISK_file_handle_size(struct GNUNET_DISK_FileHandle *fh, off_t *size)
Get the size of an open file.
Definition: disk.c:193
@ GNUNET_DISK_OPEN_READ
Open the file for reading.
@ GNUNET_DISK_PERM_NONE
Nobody is allowed to do anything to the file.
#define GNUNET_log(kind,...)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
size_t GNUNET_strlcpy(char *dst, const char *src, size_t n)
Like strlcpy but portable.
Definition: strings.c:137
static unsigned int size
Size of the "table".
Definition: peer.c:68
static void regex_split(struct RegexCombineCtx *ctx, unsigned int len, unsigned int prefix_l)
Add a single regex to a context, splitting the existing state.
static void move_children(struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
static char * regex_combine(struct RegexCombineCtx *ctx)
Extract a string from all prefix-combined regexes.
static void regex_ctx_destroy(struct RegexCombineCtx *ctx)
Free all resources used by the context node and all its children.
void REGEX_TEST_free_from_file(char **regexes)
Free all memory reserved for a set of regexes created by read_from_file.
static struct RegexCombineCtx * new_regex_ctx(unsigned int alphabet_size)
Create and initialize a new RegexCombineCtx.
static int c2i(char c, int size)
Char 2 int.
static void regex_add(struct RegexCombineCtx *ctx, const char *regex)
Add a single regex to a context, combining with existing regex by-prefix.
static void debugctx(struct RegexCombineCtx *ctx, int level)
Printf the combined regex ctx.
char * REGEX_TEST_combine(char *const regexes[], unsigned int alphabet_size)
Combine an array of regexes into a single prefix-shared regex.
static unsigned int get_prefix_length(const char *s1, const char *s2)
Get the number of matching characters on the prefix of both strings.
static struct RegexCombineCtx * get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
Return the child context with the longest prefix match with the regex.
static void regex_add_multiple(struct RegexCombineCtx *ctx, const char *regex, struct RegexCombineCtx **children)
char ** REGEX_TEST_read_from_file(const char *filename)
Read a set of regexes from a file, one per line and return them in an array suitable for REGEX_TEST_c...
Handle used to access files (and pipes).
Struct to hold the tree formed by prefix-combining the regexes.
struct RegexCombineCtx ** children
Child nodes with same prefix and token.
char * s
Token.
unsigned int size
Alphabet size (how many children there are)