GNUnet  0.10.x
regex_test_lib.c
Go to the documentation of this file.
1 /*
2  * This file is part of GNUnet
3  * Copyright (C) 2012-2017 GNUnet e.V.
4  *
5  * GNUnet is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Affero General Public License as published
7  * by the Free Software Foundation, either version 3 of the License,
8  * or (at your option) any later version.
9  *
10  * GNUnet is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
28 #include "platform.h"
29 #include "gnunet_util_lib.h"
30 
31 
36 {
41 
45  unsigned int size;
46 
50  char *s;
51 };
52 
53 
64 static int
65 c2i (char c, int size)
66 {
67  switch (size)
68  {
69  case 2:
70  case 8:
71  return c - '0';
72  break;
73  case 16:
74  if (c >= '0' && c <= '9')
75  return c - '0';
76  else if (c >= 'A' && c <= 'F')
77  return c - 'A' + 10;
78  else if (c >= 'a' && c <= 'f')
79  return c - 'a' + 10;
80  else
81  {
83  "Cannot convert char %c in base %u\n",
84  c, size);
85  GNUNET_assert (0);
86  }
87  break;
88  default:
89  GNUNET_assert (0);
90  }
91 }
92 
93 
99 static void
100 space (int n)
101 {
102  for (int i = 0; i < n; i++)
103  fprintf (stderr, "| ");
104 }
105 
106 
113 static void
114 debugctx (struct RegexCombineCtx *ctx, int level)
115 {
116 #if DEBUG_REGEX
117  if (NULL != ctx->s)
118  {
119  space (level - 1);
120  fprintf (stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s);
121  }
122  else
123  fprintf (stderr, "ROOT (base %u)\n", ctx->size);
124  for (unsigned int i = 0; i < ctx->size; i++)
125  {
126  if (NULL != ctx->children[i])
127  {
128  space (level);
129  debugctx (ctx->children[i], level + 1);
130  }
131  }
132  fflush(stderr);
133 #endif
134 }
135 
136 
143 static void
144 regex_add (struct RegexCombineCtx *ctx,
145  const char *regex);
146 
147 
153 static struct RegexCombineCtx *
154 new_regex_ctx (unsigned int alphabet_size)
155 {
156  struct RegexCombineCtx *ctx;
157  size_t array_size;
158 
159  array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
160  ctx = GNUNET_new (struct RegexCombineCtx);
161  ctx->children = GNUNET_malloc (array_size);
162  ctx->size = alphabet_size;
163 
164  return ctx;
165 }
166 
167 
168 static void
170  const struct RegexCombineCtx *src)
171 {
172  size_t array_size;
173 
174  array_size = sizeof(struct RegexCombineCtx *) * src->size;
175  GNUNET_memcpy (dst->children,
176  src->children,
177  array_size);
178  for (unsigned int i = 0; i < src->size; i++)
179  {
180  src->children[i] = NULL;
181  }
182 }
183 
184 
192 static char *
194 {
195  struct RegexCombineCtx *p;
196  unsigned int i;
197  size_t len;
198  char *regex;
199  char *tmp;
200  char *s;
201  int opt;
202 
203  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
204  regex = GNUNET_strdup ("");
205  opt = GNUNET_NO;
206  for (i = 0; i < ctx->size; i++)
207  {
208  p = ctx->children[i];
209  if (NULL == p)
210  continue;
212  "adding '%s' to innner %s\n",
213  p->s, ctx->s);
214  s = regex_combine (p);
215  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
216  if (strlen(s) == 0)
217  {
218  opt = GNUNET_YES;
219  }
220  else
221  {
222  GNUNET_asprintf (&tmp, "%s%s|", regex, s);
223  GNUNET_free_non_null (regex);
224  regex = tmp;
225  }
227  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s);
228  }
229 
230  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
231  len = strlen (regex);
232  if (0 == len)
233  {
234  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
235  GNUNET_free (regex);
236  return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
237  }
238 
239  if ('|' == regex[len - 1])
240  regex[len - 1] = '\0';
241 
242  if (NULL != ctx->s)
243  {
244  if (opt)
245  GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
246  else
247  GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
248  GNUNET_free (regex);
249  regex = s;
250  }
251 
252  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
253  return regex;
254 }
255 
256 
265 static unsigned int
266 get_prefix_length (const char *s1, const char *s2)
267 {
268  unsigned int l1;
269  unsigned int l2;
270  unsigned int limit;
271  unsigned int i;
272 
273  l1 = strlen (s1);
274  l2 = strlen (s2);
275  limit = l1 > l2 ? l2 : l1;
276 
277  for (i = 0; i < limit; i++)
278  {
279  if (s1[i] != s2[i])
280  return i;
281  }
282  return limit;
283 }
284 
285 
295 static struct RegexCombineCtx *
296 get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
297 {
298  struct RegexCombineCtx *p;
299  struct RegexCombineCtx *best;
300  unsigned int i;
301  unsigned int l;
302  unsigned int best_l;
303 
304  best_l = 0;
305  best = NULL;
306 
307  for (i = 0; i < ctx->size; i++)
308  {
309  p = ctx->children[i];
310  if (NULL == p)
311  continue;
312 
313  l = get_prefix_length (p->s, regex);
314  if (l > best_l)
315  {
316  GNUNET_break (0 == best_l);
317  best = p;
318  best_l = l;
319  }
320  }
321  return best;
322 }
323 
324 static void
326  const char *regex,
327  struct RegexCombineCtx **children)
328 {
329  char tmp[2];
330  long unsigned int i;
331  size_t l;
332  struct RegexCombineCtx *newctx;
333  unsigned int count;
334 
335  if ('(' != regex[0])
336  {
337  GNUNET_assert (0);
338  }
339 
340  /* Does the regex cover *all* possible children? Then don't add any,
341  * as it will be covered by the post-regex "(a-z)*"
342  */
343  l = strlen (regex);
344  count = 0;
345  for (i = 1UL; i < l; i++)
346  {
347  if (regex[i] != '|' && regex[i] != ')')
348  {
349  count++;
350  }
351  }
352  if (count == ctx->size)
353  {
354  return;
355  }
356 
357  /* Add every component as a child node */
358  tmp[1] = '\0';
359  for (i = 1UL; i < l; i++)
360  {
361  if (regex[i] != '|' && regex[i] != ')')
362  {
363  tmp[0] = regex[i];
364  newctx = new_regex_ctx(ctx->size);
365  newctx->s = GNUNET_strdup (tmp);
366  if (children != NULL)
367  GNUNET_memcpy (newctx->children,
368  children,
369  sizeof (*children) * ctx->size);
370  ctx->children[c2i(tmp[0], ctx->size)] = newctx;
371  }
372  }
373 }
374 
385 static void
387  unsigned int len,
388  unsigned int prefix_l)
389 {
390  struct RegexCombineCtx *newctx;
391  unsigned int idx;
392  char *suffix;
393 
394  suffix = GNUNET_malloc (len - prefix_l + 1);
395  /*
396  * We can use GNUNET_strlcpy because ctx->s is null-terminated
397  */
398  GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
399 
400  /* Suffix saved, truncate current node so it only contains the prefix,
401  * copy any children nodes to put as grandchildren and initialize new empty
402  * children array.
403  */
404  ctx->s[prefix_l] = '\0';
405 
406  /* If the suffix is an OR expression, add multiple children */
407  if ('(' == suffix[0])
408  {
409  struct RegexCombineCtx **tmp;
410 
411  tmp = ctx->children;
412  ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
413  regex_add_multiple (ctx, suffix, tmp);
414  GNUNET_free (suffix);
415  GNUNET_free (tmp);
416  return;
417  }
418 
419  /* The suffix is a normal string, add as one node */
420  newctx = new_regex_ctx (ctx->size);
421  newctx->s = suffix;
422  move_children (newctx, ctx);
423  idx = c2i(suffix[0], ctx->size);
424  ctx->children[idx] = newctx;
425 }
426 
427 
434 static void
435 regex_add (struct RegexCombineCtx *ctx, const char *regex)
436 {
437  struct RegexCombineCtx *p;
438  struct RegexCombineCtx *newctx;
439  long unsigned int l;
440  unsigned int prefix_l;
441  const char *rest_r;
442  const char *rest_s;
443  size_t len;
444  int idx;
445 
447  "regex_add '%s' into '%s'\n",
448  regex, ctx->s);
449  l = strlen (regex);
450  if (0UL == l)
451  return;
452 
453  /* If the regex is in the form of (a|b|c), add every character separately */
454  if ('(' == regex[0])
455  {
456  regex_add_multiple (ctx, regex, NULL);
457  return;
458  }
459 
460  p = get_longest_prefix (ctx, regex);
461  if (NULL != p)
462  {
463  /* There is some prefix match, reduce regex and try again */
464  prefix_l = get_prefix_length (p->s, regex);
465  rest_s = &p->s[prefix_l];
466  rest_r = &regex[prefix_l];
467  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
468  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
469  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
470  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
471  len = strlen (p->s);
472  if (prefix_l < len)
473  {
474  regex_split (p, len, prefix_l);
475  }
476  regex_add (p, rest_r);
477  return;
478  }
479 
480  /* There is no prefix match, add new */
481  idx = c2i(regex[0], ctx->size);
482  if (NULL == ctx->children[idx] && NULL != ctx->s)
483  {
484  /* this was the end before, add empty string */
485  newctx = new_regex_ctx (ctx->size);
486  newctx->s = GNUNET_strdup ("");
487  ctx->children[idx] = newctx;
488  }
489  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
490  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
491  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
492  newctx = new_regex_ctx(ctx->size);
493  newctx->s = GNUNET_strdup (regex);
494  ctx->children[idx] = newctx;
495 }
496 
497 
503 static void
505 {
506  unsigned int i;
507 
508  if (NULL == ctx)
509  return;
510 
511  for (i = 0; i < ctx->size; i++)
512  {
513  regex_ctx_destroy (ctx->children[i]);
514  }
515  GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
516  GNUNET_free (ctx->children);
517  GNUNET_free (ctx);
518 }
519 
520 
535 char *
536 REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size)
537 {
538  unsigned int i;
539  char *combined;
540  const char *current;
541  struct RegexCombineCtx *ctx;
542 
543  ctx = new_regex_ctx (alphabet_size);
544  for (i = 0; regexes[i]; i++)
545  {
546  current = regexes[i];
547  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
548  regex_add (ctx, current);
549  debugctx (ctx, 0);
550  }
551  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
552  debugctx (ctx, 0);
553 
554  combined = regex_combine (ctx);
555 
556  regex_ctx_destroy (ctx);
557 
558  return combined;
559 }
560 
561 
571 char **
573 {
574  struct GNUNET_DISK_FileHandle *f;
575  unsigned int nr;
576  unsigned int offset;
577  off_t size;
578  size_t len;
579  char *buffer;
580  char *regex;
581  char **regexes;
582 
583  f = GNUNET_DISK_file_open (filename,
586  if (NULL == f)
587  {
589  "Can't open file %s for reading\n", filename);
590  return NULL;
591  }
592  if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size))
593  {
595  "Can't get size of file %s\n", filename);
597  return NULL;
598  }
600  "using file %s, size %llu\n",
601  filename, (unsigned long long) size);
602 
603  buffer = GNUNET_malloc (size + 1);
604  GNUNET_DISK_file_read (f, buffer, size);
606  regexes = GNUNET_malloc (sizeof (char *));
607  nr = 1;
608  offset = 0;
609  regex = NULL;
610  do
611  {
612  if (NULL == regex)
613  regex = GNUNET_malloc (size + 1);
614  len = (size_t) sscanf (&buffer[offset], "%s", regex);
615  if (0 == len)
616  break;
617  len = strlen (regex);
618  offset += len + 1;
619  if (len < 1)
620  continue;
621  regex[len] = '\0';
622  regex = GNUNET_realloc (regex, len + 1);
623  GNUNET_array_grow (regexes, nr, nr + 1);
624  GNUNET_assert (NULL == regexes[nr - 2]);
625  regexes[nr - 2] = regex;
626  regexes[nr - 1] = NULL;
627  regex = NULL;
628  } while (offset < size);
629  GNUNET_free_non_null (regex);
630  GNUNET_free (buffer);
631 
632  return regexes;
633 }
634 
635 
641 void
643 {
644  unsigned int i;
645 
646  for (i = 0; regexes[i]; i++)
647  GNUNET_free (regexes[i]);
648  GNUNET_free (regexes);
649 }
650 
651 /* end of regex_test_lib.c */
Open the file for reading.
int GNUNET_DISK_file_close(struct GNUNET_DISK_FileHandle *h)
Close an open file.
Definition: disk.c:1817
static void space(int n)
Printf spaces to indent the regex tree.
char * s
Token.
struct RegexCombineCtx ** children
Child nodes with same prefix and token.
ssize_t GNUNET_DISK_file_read(const struct GNUNET_DISK_FileHandle *h, void *result, size_t len)
Read the contents of a binary file into a buffer.
Definition: disk.c:881
Struct to hold the tree formed by prefix-combining the regexes.
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
static struct RegexCombineCtx * get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
Return the child context with the longest prefix match with the regex.
Nobody is allowed to do anything to the file.
#define GNUNET_NO
Definition: gnunet_common.h:81
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:78
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static void regex_add_multiple(struct RegexCombineCtx *ctx, const char *regex, struct RegexCombineCtx **children)
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
unsigned int size
Alphabet size (how many children there are)
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void debugctx(struct RegexCombineCtx *ctx, int level)
Printf the combined regex ctx.
static void move_children(struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
static struct LoggingHandle * l
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void regex_ctx_destroy(struct RegexCombineCtx *ctx)
Free all resources used by the context node and all its children.
char * REGEX_TEST_combine(char *const regexes[], unsigned int alphabet_size)
Combine an array of regexes into a single prefix-shared regex.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
#define GNUNET_memcpy(dst, src, n)
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
static int c2i(char c, int size)
Char 2 int.
static char * filename
static char * regex_combine(struct RegexCombineCtx *ctx)
Extract a string from all prefix-combined regexes.
static void regex_split(struct RegexCombineCtx *ctx, unsigned int len, unsigned int prefix_l)
Add a single regex to a context, splitting the exisiting state.
void REGEX_TEST_free_from_file(char **regexes)
Free all memory reserved for a set of regexes created by read_from_file.
int GNUNET_DISK_file_handle_size(struct GNUNET_DISK_FileHandle *fh, off_t *size)
Get the size of an open file.
Definition: disk.c:208
char ** REGEX_TEST_read_from_file(const char *filename)
Read a set of regexes from a file, one per line and return them in an array suitable for REGEX_TEST_c...
#define GNUNET_log(kind,...)
static struct RegexCombineCtx * new_regex_ctx(unsigned int alphabet_size)
Create and initialize a new RegexCombineCtx.
#define GNUNET_YES
Definition: gnunet_common.h:80
struct GNUNET_DISK_FileHandle * GNUNET_DISK_file_open(const char *fn, enum GNUNET_DISK_OpenFlags flags, enum GNUNET_DISK_AccessPermissions perm)
Open a file.
Definition: disk.c:1673
static void regex_add(struct RegexCombineCtx *ctx, const char *regex)
Add a single regex to a context, combining with exisiting regex by-prefix.
size_t GNUNET_strlcpy(char *dst, const char *src, size_t n)
Like strlcpy but portable.
Handle used to access files (and pipes).
static unsigned int get_prefix_length(const char *s1, const char *s2)
Get the number of matching characters on the prefix of both strings.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_free(ptr)
Wrapper around free.
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...