GNUnet  0.20.0
regex_test_lib.c
Go to the documentation of this file.
1 /*
2  * This file is part of GNUnet
3  * Copyright (C) 2012-2017 GNUnet e.V.
4  *
5  * GNUnet is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Affero General Public License as published
7  * by the Free Software Foundation, either version 3 of the License,
8  * or (at your option) any later version.
9  *
10  * GNUnet is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
28 #include "platform.h"
29 #include "gnunet_util_lib.h"
30 
31 
36 {
41 
45  unsigned int size;
46 
50  char *s;
51 };
52 
53 
64 static int
65 c2i (char c, int size)
66 {
67  switch (size)
68  {
69  case 2:
70  case 8:
71  return c - '0';
72  break;
73 
74  case 16:
75  if ((c >= '0') && (c <= '9') )
76  return c - '0';
77  else if ((c >= 'A') && (c <= 'F') )
78  return c - 'A' + 10;
79  else if ((c >= 'a') && (c <= 'f') )
80  return c - 'a' + 10;
81  else
82  {
84  "Cannot convert char %c in base %u\n",
85  c, size);
86  GNUNET_assert (0);
87  }
88  break;
89 
90  default:
91  GNUNET_assert (0);
92  }
93 }
94 
95 
96 #if DEBUG_REGEX
102 static void
103 space (int n)
104 {
105  for (int i = 0; i < n; i++)
106  fprintf (stderr, "| ");
107 }
108 
109 
110 #endif
111 
112 
119 static void
120 debugctx (struct RegexCombineCtx *ctx, int level)
121 {
122 #if DEBUG_REGEX
123  if (NULL != ctx->s)
124  {
125  space (level - 1);
126  fprintf (stderr, "%u:'%s'\n", c2i (ctx->s[0], ctx->size), ctx->s);
127  }
128  else
129  fprintf (stderr, "ROOT (base %u)\n", ctx->size);
130  for (unsigned int i = 0; i < ctx->size; i++)
131  {
132  if (NULL != ctx->children[i])
133  {
134  space (level);
135  debugctx (ctx->children[i], level + 1);
136  }
137  }
138  fflush (stderr);
139 #endif
140 }
141 
142 
149 static void
150 regex_add (struct RegexCombineCtx *ctx,
151  const char *regex);
152 
153 
159 static struct RegexCombineCtx *
160 new_regex_ctx (unsigned int alphabet_size)
161 {
162  struct RegexCombineCtx *ctx;
163  size_t array_size;
164 
165  array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
166  ctx = GNUNET_new (struct RegexCombineCtx);
167  ctx->children = GNUNET_malloc (array_size);
168  ctx->size = alphabet_size;
169 
170  return ctx;
171 }
172 
173 
174 static void
176  const struct RegexCombineCtx *src)
177 {
178  size_t array_size;
179 
180  array_size = sizeof(struct RegexCombineCtx *) * src->size;
181  GNUNET_memcpy (dst->children,
182  src->children,
183  array_size);
184  for (unsigned int i = 0; i < src->size; i++)
185  {
186  src->children[i] = NULL;
187  }
188 }
189 
190 
198 static char *
200 {
201  struct RegexCombineCtx *p;
202  unsigned int i;
203  size_t len;
204  char *regex;
205  char *tmp;
206  char *s;
207  int opt;
208 
209  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
210  regex = GNUNET_strdup ("");
211  opt = GNUNET_NO;
212  for (i = 0; i < ctx->size; i++)
213  {
214  p = ctx->children[i];
215  if (NULL == p)
216  continue;
218  "adding '%s' to innner %s\n",
219  p->s, ctx->s);
220  s = regex_combine (p);
221  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
222  if (strlen (s) == 0)
223  {
224  opt = GNUNET_YES;
225  }
226  else
227  {
228  GNUNET_asprintf (&tmp, "%s%s|", regex, s);
229  GNUNET_free (regex);
230  regex = tmp;
231  }
232  GNUNET_free (s);
233  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex,
234  ctx->s);
235  }
236 
237  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
238  len = strlen (regex);
239  if (0 == len)
240  {
241  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
242  GNUNET_free (regex);
243  return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
244  }
245 
246  if ('|' == regex[len - 1])
247  regex[len - 1] = '\0';
248 
249  if (NULL != ctx->s)
250  {
251  if (opt)
252  GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
253  else
254  GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
255  GNUNET_free (regex);
256  regex = s;
257  }
258 
259  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
260  return regex;
261 }
262 
263 
272 static unsigned int
273 get_prefix_length (const char *s1, const char *s2)
274 {
275  unsigned int l1;
276  unsigned int l2;
277  unsigned int limit;
278  unsigned int i;
279 
280  l1 = strlen (s1);
281  l2 = strlen (s2);
282  limit = l1 > l2 ? l2 : l1;
283 
284  for (i = 0; i < limit; i++)
285  {
286  if (s1[i] != s2[i])
287  return i;
288  }
289  return limit;
290 }
291 
292 
302 static struct RegexCombineCtx *
303 get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
304 {
305  struct RegexCombineCtx *p;
306  struct RegexCombineCtx *best;
307  unsigned int i;
308  unsigned int l;
309  unsigned int best_l;
310 
311  best_l = 0;
312  best = NULL;
313 
314  for (i = 0; i < ctx->size; i++)
315  {
316  p = ctx->children[i];
317  if (NULL == p)
318  continue;
319 
320  l = get_prefix_length (p->s, regex);
321  if (l > best_l)
322  {
323  GNUNET_break (0 == best_l);
324  best = p;
325  best_l = l;
326  }
327  }
328  return best;
329 }
330 
331 
332 static void
334  const char *regex,
335  struct RegexCombineCtx **children)
336 {
337  char tmp[2];
338  long unsigned int i;
339  size_t l;
340  struct RegexCombineCtx *newctx;
341  unsigned int count;
342 
343  if ('(' != regex[0])
344  {
345  GNUNET_assert (0);
346  }
347 
348  /* Does the regex cover *all* possible children? Then don't add any,
349  * as it will be covered by the post-regex "(a-z)*"
350  */
351  l = strlen (regex);
352  count = 0;
353  for (i = 1UL; i < l; i++)
354  {
355  if ((regex[i] != '|') && (regex[i] != ')') )
356  {
357  count++;
358  }
359  }
360  if (count == ctx->size)
361  {
362  return;
363  }
364 
365  /* Add every component as a child node */
366  tmp[1] = '\0';
367  for (i = 1UL; i < l; i++)
368  {
369  if ((regex[i] != '|') && (regex[i] != ')') )
370  {
371  tmp[0] = regex[i];
372  newctx = new_regex_ctx (ctx->size);
373  newctx->s = GNUNET_strdup (tmp);
374  if (children != NULL)
375  GNUNET_memcpy (newctx->children,
376  children,
377  sizeof(*children) * ctx->size);
378  ctx->children[c2i (tmp[0], ctx->size)] = newctx;
379  }
380  }
381 }
382 
383 
394 static void
396  unsigned int len,
397  unsigned int prefix_l)
398 {
399  struct RegexCombineCtx *newctx;
400  unsigned int idx;
401  char *suffix;
402 
403  suffix = GNUNET_malloc (len - prefix_l + 1);
404  /*
405  * We can use GNUNET_strlcpy because ctx->s is null-terminated
406  */
407  GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
408 
409  /* Suffix saved, truncate current node so it only contains the prefix,
410  * copy any children nodes to put as grandchildren and initialize new empty
411  * children array.
412  */
413  ctx->s[prefix_l] = '\0';
414 
415  /* If the suffix is an OR expression, add multiple children */
416  if ('(' == suffix[0])
417  {
418  struct RegexCombineCtx **tmp;
419 
420  tmp = ctx->children;
421  ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
422  regex_add_multiple (ctx, suffix, tmp);
423  GNUNET_free (suffix);
424  GNUNET_free (tmp);
425  return;
426  }
427 
428  /* The suffix is a normal string, add as one node */
429  newctx = new_regex_ctx (ctx->size);
430  newctx->s = suffix;
431  move_children (newctx, ctx);
432  idx = c2i (suffix[0], ctx->size);
433  ctx->children[idx] = newctx;
434 }
435 
436 
443 static void
444 regex_add (struct RegexCombineCtx *ctx, const char *regex)
445 {
446  struct RegexCombineCtx *p;
447  struct RegexCombineCtx *newctx;
448  long unsigned int l;
449  unsigned int prefix_l;
450  const char *rest_r;
451  const char *rest_s;
452  size_t len;
453  int idx;
454 
456  "regex_add '%s' into '%s'\n",
457  regex, ctx->s);
458  l = strlen (regex);
459  if (0UL == l)
460  return;
461 
462  /* If the regex is in the form of (a|b|c), add every character separately */
463  if ('(' == regex[0])
464  {
465  regex_add_multiple (ctx, regex, NULL);
466  return;
467  }
468 
469  p = get_longest_prefix (ctx, regex);
470  if (NULL != p)
471  {
472  /* There is some prefix match, reduce regex and try again */
473  prefix_l = get_prefix_length (p->s, regex);
474  rest_s = &p->s[prefix_l];
475  rest_r = &regex[prefix_l];
476  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
477  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
478  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
479  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
480  len = strlen (p->s);
481  if (prefix_l < len)
482  {
483  regex_split (p, len, prefix_l);
484  }
485  regex_add (p, rest_r);
486  return;
487  }
488 
489  /* There is no prefix match, add new */
490  idx = c2i (regex[0], ctx->size);
491  if ((NULL == ctx->children[idx]) && (NULL != ctx->s))
492  {
493  /* this was the end before, add empty string */
494  newctx = new_regex_ctx (ctx->size);
495  newctx->s = GNUNET_strdup ("");
496  ctx->children[idx] = newctx;
497  }
498  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
499  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
500  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
501  newctx = new_regex_ctx (ctx->size);
502  newctx->s = GNUNET_strdup (regex);
503  ctx->children[idx] = newctx;
504 }
505 
506 
512 static void
514 {
515  unsigned int i;
516 
517  if (NULL == ctx)
518  return;
519 
520  for (i = 0; i < ctx->size; i++)
521  {
522  regex_ctx_destroy (ctx->children[i]);
523  }
524  GNUNET_free (ctx->s); /* 's' on root node is null */
525  GNUNET_free (ctx->children);
526  GNUNET_free (ctx);
527 }
528 
529 
544 char *
545 REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size)
546 {
547  unsigned int i;
548  char *combined;
549  const char *current;
550  struct RegexCombineCtx *ctx;
551 
552  ctx = new_regex_ctx (alphabet_size);
553  for (i = 0; regexes[i]; i++)
554  {
555  current = regexes[i];
556  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
557  regex_add (ctx, current);
558  debugctx (ctx, 0);
559  }
560  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
561  debugctx (ctx, 0);
562 
563  combined = regex_combine (ctx);
564 
566 
567  return combined;
568 }
569 
570 
580 char **
582 {
583  struct GNUNET_DISK_FileHandle *f;
584  unsigned int nr;
585  unsigned int offset;
586  off_t size;
587  size_t len;
588  char *buffer;
589  char *regex;
590  char **regexes;
591 
595  if (NULL == f)
596  {
598  "Can't open file %s for reading\n", filename);
599  return NULL;
600  }
602  {
604  "Can't get size of file %s\n", filename);
606  return NULL;
607  }
609  "using file %s, size %llu\n",
610  filename, (unsigned long long) size);
611 
612  buffer = GNUNET_malloc (size + 1);
613  GNUNET_DISK_file_read (f, buffer, size);
615  regexes = GNUNET_malloc (sizeof(char *));
616  nr = 1;
617  offset = 0;
618  regex = NULL;
619  do
620  {
621  if (NULL == regex)
622  regex = GNUNET_malloc (size + 1);
623  len = (size_t) sscanf (&buffer[offset], "%s", regex);
624  if (0 == len)
625  break;
626  len = strlen (regex);
627  offset += len + 1;
628  if (len < 1)
629  continue;
630  regex[len] = '\0';
631  regex = GNUNET_realloc (regex, len + 1);
632  GNUNET_array_grow (regexes, nr, nr + 1);
633  GNUNET_assert (NULL == regexes[nr - 2]);
634  regexes[nr - 2] = regex;
635  regexes[nr - 1] = NULL;
636  regex = NULL;
637  }
638  while (offset < size);
639  GNUNET_free (regex);
640  GNUNET_free (buffer);
641 
642  return regexes;
643 }
644 
645 
651 void
653 {
654  unsigned int i;
655 
656  for (i = 0; regexes[i]; i++)
657  GNUNET_free (regexes[i]);
658  GNUNET_free (regexes);
659 }
660 
661 
662 /* end of regex_test_lib.c */
static struct LoggingHandle * l
static char * filename
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-uri.c:38
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
struct GNUNET_DISK_FileHandle * GNUNET_DISK_file_open(const char *fn, enum GNUNET_DISK_OpenFlags flags, enum GNUNET_DISK_AccessPermissions perm)
Open a file.
Definition: disk.c:1237
enum GNUNET_GenericReturnValue GNUNET_DISK_file_close(struct GNUNET_DISK_FileHandle *h)
Close an open file.
Definition: disk.c:1308
ssize_t GNUNET_DISK_file_read(const struct GNUNET_DISK_FileHandle *h, void *result, size_t len)
Read the contents of a binary file into a buffer.
Definition: disk.c:622
enum GNUNET_GenericReturnValue GNUNET_DISK_file_handle_size(struct GNUNET_DISK_FileHandle *fh, off_t *size)
Get the size of an open file.
Definition: disk.c:192
@ GNUNET_DISK_OPEN_READ
Open the file for reading.
@ GNUNET_DISK_PERM_NONE
Nobody is allowed to do anything to the file.
#define GNUNET_log(kind,...)
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur.
@ GNUNET_ERROR_TYPE_ERROR
@ GNUNET_ERROR_TYPE_DEBUG
int int GNUNET_asprintf(char **buf, const char *format,...) __attribute__((format(printf
Like asprintf, just portable.
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
#define GNUNET_free(ptr)
Wrapper around free.
size_t GNUNET_strlcpy(char *dst, const char *src, size_t n)
Like strlcpy but portable.
Definition: strings.c:138
static unsigned int size
Size of the "table".
Definition: peer.c:68
static void regex_split(struct RegexCombineCtx *ctx, unsigned int len, unsigned int prefix_l)
Add a single regex to a context, splitting the existing state.
static void move_children(struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
static struct RegexCombineCtx * new_regex_ctx(unsigned int alphabet_size)
Create and initialize a new RegexCombineCtx.
char * REGEX_TEST_combine(char *const regexes[], unsigned int alphabet_size)
Combine an array of regexes into a single prefix-shared regex.
static void regex_ctx_destroy(struct RegexCombineCtx *ctx)
Free all resources used by the context node and all its children.
void REGEX_TEST_free_from_file(char **regexes)
Free all memory reserved for a set of regexes created by read_from_file.
static int c2i(char c, int size)
Char 2 int.
static void regex_add(struct RegexCombineCtx *ctx, const char *regex)
Add a single regex to a context, combining with existing regex by-prefix.
static void debugctx(struct RegexCombineCtx *ctx, int level)
Printf the combined regex ctx.
static struct RegexCombineCtx * get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
Return the child context with the longest prefix match with the regex.
char ** REGEX_TEST_read_from_file(const char *filename)
Read a set of regexes from a file, one per line and return them in an array suitable for REGEX_TEST_c...
static unsigned int get_prefix_length(const char *s1, const char *s2)
Get the number of matching characters on the prefix of both strings.
static char * regex_combine(struct RegexCombineCtx *ctx)
Extract a string from all prefix-combined regexes.
static void regex_add_multiple(struct RegexCombineCtx *ctx, const char *regex, struct RegexCombineCtx **children)
Handle used to access files (and pipes).
Struct to hold the tree formed by prefix-combining the regexes.
struct RegexCombineCtx ** children
Child nodes with same prefix and token.
char * s
Token.
unsigned int size
Alphabet size (how many children there are)