GNUnet  0.11.x
regex_test_lib.c
Go to the documentation of this file.
1 /*
2  * This file is part of GNUnet
3  * Copyright (C) 2012-2017 GNUnet e.V.
4  *
5  * GNUnet is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Affero General Public License as published
7  * by the Free Software Foundation, either version 3 of the License,
8  * or (at your option) any later version.
9  *
10  * GNUnet is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
28 #include "platform.h"
29 #include "gnunet_util_lib.h"
30 
31 
36 {
41 
45  unsigned int size;
46 
50  char *s;
51 };
52 
53 
64 static int
65 c2i (char c, int size)
66 {
67  switch (size)
68  {
69  case 2:
70  case 8:
71  return c - '0';
72  break;
73 
74  case 16:
75  if ((c >= '0') && (c <= '9') )
76  return c - '0';
77  else if ((c >= 'A') && (c <= 'F') )
78  return c - 'A' + 10;
79  else if ((c >= 'a') && (c <= 'f') )
80  return c - 'a' + 10;
81  else
82  {
84  "Cannot convert char %c in base %u\n",
85  c, size);
86  GNUNET_assert (0);
87  }
88  break;
89 
90  default:
91  GNUNET_assert (0);
92  }
93 }
94 
95 
101 static void
102 space (int n)
103 {
104  for (int i = 0; i < n; i++)
105  fprintf (stderr, "| ");
106 }
107 
108 
115 static void
116 debugctx (struct RegexCombineCtx *ctx, int level)
117 {
118 #if DEBUG_REGEX
119  if (NULL != ctx->s)
120  {
121  space (level - 1);
122  fprintf (stderr, "%u:'%s'\n", c2i (ctx->s[0], ctx->size), ctx->s);
123  }
124  else
125  fprintf (stderr, "ROOT (base %u)\n", ctx->size);
126  for (unsigned int i = 0; i < ctx->size; i++)
127  {
128  if (NULL != ctx->children[i])
129  {
130  space (level);
131  debugctx (ctx->children[i], level + 1);
132  }
133  }
134  fflush (stderr);
135 #endif
136 }
137 
138 
145 static void
146 regex_add (struct RegexCombineCtx *ctx,
147  const char *regex);
148 
149 
155 static struct RegexCombineCtx *
156 new_regex_ctx (unsigned int alphabet_size)
157 {
158  struct RegexCombineCtx *ctx;
159  size_t array_size;
160 
161  array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
162  ctx = GNUNET_new (struct RegexCombineCtx);
163  ctx->children = GNUNET_malloc (array_size);
164  ctx->size = alphabet_size;
165 
166  return ctx;
167 }
168 
169 
170 static void
172  const struct RegexCombineCtx *src)
173 {
174  size_t array_size;
175 
176  array_size = sizeof(struct RegexCombineCtx *) * src->size;
177  GNUNET_memcpy (dst->children,
178  src->children,
179  array_size);
180  for (unsigned int i = 0; i < src->size; i++)
181  {
182  src->children[i] = NULL;
183  }
184 }
185 
186 
194 static char *
196 {
197  struct RegexCombineCtx *p;
198  unsigned int i;
199  size_t len;
200  char *regex;
201  char *tmp;
202  char *s;
203  int opt;
204 
205  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
206  regex = GNUNET_strdup ("");
207  opt = GNUNET_NO;
208  for (i = 0; i < ctx->size; i++)
209  {
210  p = ctx->children[i];
211  if (NULL == p)
212  continue;
214  "adding '%s' to innner %s\n",
215  p->s, ctx->s);
216  s = regex_combine (p);
217  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
218  if (strlen (s) == 0)
219  {
220  opt = GNUNET_YES;
221  }
222  else
223  {
224  GNUNET_asprintf (&tmp, "%s%s|", regex, s);
225  GNUNET_free_non_null (regex);
226  regex = tmp;
227  }
229  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex,
230  ctx->s);
231  }
232 
233  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
234  len = strlen (regex);
235  if (0 == len)
236  {
237  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
238  GNUNET_free (regex);
239  return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
240  }
241 
242  if ('|' == regex[len - 1])
243  regex[len - 1] = '\0';
244 
245  if (NULL != ctx->s)
246  {
247  if (opt)
248  GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
249  else
250  GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
251  GNUNET_free (regex);
252  regex = s;
253  }
254 
255  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
256  return regex;
257 }
258 
259 
268 static unsigned int
269 get_prefix_length (const char *s1, const char *s2)
270 {
271  unsigned int l1;
272  unsigned int l2;
273  unsigned int limit;
274  unsigned int i;
275 
276  l1 = strlen (s1);
277  l2 = strlen (s2);
278  limit = l1 > l2 ? l2 : l1;
279 
280  for (i = 0; i < limit; i++)
281  {
282  if (s1[i] != s2[i])
283  return i;
284  }
285  return limit;
286 }
287 
288 
298 static struct RegexCombineCtx *
299 get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
300 {
301  struct RegexCombineCtx *p;
302  struct RegexCombineCtx *best;
303  unsigned int i;
304  unsigned int l;
305  unsigned int best_l;
306 
307  best_l = 0;
308  best = NULL;
309 
310  for (i = 0; i < ctx->size; i++)
311  {
312  p = ctx->children[i];
313  if (NULL == p)
314  continue;
315 
316  l = get_prefix_length (p->s, regex);
317  if (l > best_l)
318  {
319  GNUNET_break (0 == best_l);
320  best = p;
321  best_l = l;
322  }
323  }
324  return best;
325 }
326 
327 
328 static void
330  const char *regex,
331  struct RegexCombineCtx **children)
332 {
333  char tmp[2];
334  long unsigned int i;
335  size_t l;
336  struct RegexCombineCtx *newctx;
337  unsigned int count;
338 
339  if ('(' != regex[0])
340  {
341  GNUNET_assert (0);
342  }
343 
344  /* Does the regex cover *all* possible children? Then don't add any,
345  * as it will be covered by the post-regex "(a-z)*"
346  */
347  l = strlen (regex);
348  count = 0;
349  for (i = 1UL; i < l; i++)
350  {
351  if ((regex[i] != '|') && (regex[i] != ')') )
352  {
353  count++;
354  }
355  }
356  if (count == ctx->size)
357  {
358  return;
359  }
360 
361  /* Add every component as a child node */
362  tmp[1] = '\0';
363  for (i = 1UL; i < l; i++)
364  {
365  if ((regex[i] != '|') && (regex[i] != ')') )
366  {
367  tmp[0] = regex[i];
368  newctx = new_regex_ctx (ctx->size);
369  newctx->s = GNUNET_strdup (tmp);
370  if (children != NULL)
371  GNUNET_memcpy (newctx->children,
372  children,
373  sizeof(*children) * ctx->size);
374  ctx->children[c2i (tmp[0], ctx->size)] = newctx;
375  }
376  }
377 }
378 
379 
390 static void
392  unsigned int len,
393  unsigned int prefix_l)
394 {
395  struct RegexCombineCtx *newctx;
396  unsigned int idx;
397  char *suffix;
398 
399  suffix = GNUNET_malloc (len - prefix_l + 1);
400  /*
401  * We can use GNUNET_strlcpy because ctx->s is null-terminated
402  */
403  GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
404 
405  /* Suffix saved, truncate current node so it only contains the prefix,
406  * copy any children nodes to put as grandchildren and initialize new empty
407  * children array.
408  */
409  ctx->s[prefix_l] = '\0';
410 
411  /* If the suffix is an OR expression, add multiple children */
412  if ('(' == suffix[0])
413  {
414  struct RegexCombineCtx **tmp;
415 
416  tmp = ctx->children;
417  ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
418  regex_add_multiple (ctx, suffix, tmp);
419  GNUNET_free (suffix);
420  GNUNET_free (tmp);
421  return;
422  }
423 
424  /* The suffix is a normal string, add as one node */
425  newctx = new_regex_ctx (ctx->size);
426  newctx->s = suffix;
427  move_children (newctx, ctx);
428  idx = c2i (suffix[0], ctx->size);
429  ctx->children[idx] = newctx;
430 }
431 
432 
439 static void
440 regex_add (struct RegexCombineCtx *ctx, const char *regex)
441 {
442  struct RegexCombineCtx *p;
443  struct RegexCombineCtx *newctx;
444  long unsigned int l;
445  unsigned int prefix_l;
446  const char *rest_r;
447  const char *rest_s;
448  size_t len;
449  int idx;
450 
452  "regex_add '%s' into '%s'\n",
453  regex, ctx->s);
454  l = strlen (regex);
455  if (0UL == l)
456  return;
457 
458  /* If the regex is in the form of (a|b|c), add every character separately */
459  if ('(' == regex[0])
460  {
461  regex_add_multiple (ctx, regex, NULL);
462  return;
463  }
464 
465  p = get_longest_prefix (ctx, regex);
466  if (NULL != p)
467  {
468  /* There is some prefix match, reduce regex and try again */
469  prefix_l = get_prefix_length (p->s, regex);
470  rest_s = &p->s[prefix_l];
471  rest_r = &regex[prefix_l];
472  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
473  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
474  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
475  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
476  len = strlen (p->s);
477  if (prefix_l < len)
478  {
479  regex_split (p, len, prefix_l);
480  }
481  regex_add (p, rest_r);
482  return;
483  }
484 
485  /* There is no prefix match, add new */
486  idx = c2i (regex[0], ctx->size);
487  if ((NULL == ctx->children[idx]) && (NULL != ctx->s))
488  {
489  /* this was the end before, add empty string */
490  newctx = new_regex_ctx (ctx->size);
491  newctx->s = GNUNET_strdup ("");
492  ctx->children[idx] = newctx;
493  }
494  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
495  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
496  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
497  newctx = new_regex_ctx (ctx->size);
498  newctx->s = GNUNET_strdup (regex);
499  ctx->children[idx] = newctx;
500 }
501 
502 
508 static void
510 {
511  unsigned int i;
512 
513  if (NULL == ctx)
514  return;
515 
516  for (i = 0; i < ctx->size; i++)
517  {
518  regex_ctx_destroy (ctx->children[i]);
519  }
520  GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
521  GNUNET_free (ctx->children);
522  GNUNET_free (ctx);
523 }
524 
525 
540 char *
541 REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size)
542 {
543  unsigned int i;
544  char *combined;
545  const char *current;
546  struct RegexCombineCtx *ctx;
547 
548  ctx = new_regex_ctx (alphabet_size);
549  for (i = 0; regexes[i]; i++)
550  {
551  current = regexes[i];
552  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
553  regex_add (ctx, current);
554  debugctx (ctx, 0);
555  }
556  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
557  debugctx (ctx, 0);
558 
559  combined = regex_combine (ctx);
560 
561  regex_ctx_destroy (ctx);
562 
563  return combined;
564 }
565 
566 
576 char **
578 {
579  struct GNUNET_DISK_FileHandle *f;
580  unsigned int nr;
581  unsigned int offset;
582  off_t size;
583  size_t len;
584  char *buffer;
585  char *regex;
586  char **regexes;
587 
588  f = GNUNET_DISK_file_open (filename,
591  if (NULL == f)
592  {
594  "Can't open file %s for reading\n", filename);
595  return NULL;
596  }
597  if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size))
598  {
600  "Can't get size of file %s\n", filename);
602  return NULL;
603  }
605  "using file %s, size %llu\n",
606  filename, (unsigned long long) size);
607 
608  buffer = GNUNET_malloc (size + 1);
609  GNUNET_DISK_file_read (f, buffer, size);
611  regexes = GNUNET_malloc (sizeof(char *));
612  nr = 1;
613  offset = 0;
614  regex = NULL;
615  do
616  {
617  if (NULL == regex)
618  regex = GNUNET_malloc (size + 1);
619  len = (size_t) sscanf (&buffer[offset], "%s", regex);
620  if (0 == len)
621  break;
622  len = strlen (regex);
623  offset += len + 1;
624  if (len < 1)
625  continue;
626  regex[len] = '\0';
627  regex = GNUNET_realloc (regex, len + 1);
628  GNUNET_array_grow (regexes, nr, nr + 1);
629  GNUNET_assert (NULL == regexes[nr - 2]);
630  regexes[nr - 2] = regex;
631  regexes[nr - 1] = NULL;
632  regex = NULL;
633  }
634  while (offset < size);
635  GNUNET_free_non_null (regex);
636  GNUNET_free (buffer);
637 
638  return regexes;
639 }
640 
641 
647 void
649 {
650  unsigned int i;
651 
652  for (i = 0; regexes[i]; i++)
653  GNUNET_free (regexes[i]);
654  GNUNET_free (regexes);
655 }
656 
657 
658 /* end of regex_test_lib.c */
Open the file for reading.
int GNUNET_DISK_file_close(struct GNUNET_DISK_FileHandle *h)
Close an open file.
Definition: disk.c:1345
static void space(int n)
Printf spaces to indent the regex tree.
char * s
Token.
struct RegexCombineCtx ** children
Child nodes with same prefix and token.
ssize_t GNUNET_DISK_file_read(const struct GNUNET_DISK_FileHandle *h, void *result, size_t len)
Read the contents of a binary file into a buffer.
Definition: disk.c:732
Struct to hold the tree formed by prefix-combining the regexes.
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
static struct RegexCombineCtx * get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
Return the child context with the longest prefix match with the regex.
Nobody is allowed to do anything to the file.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
#define GNUNET_NO
Definition: gnunet_common.h:78
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static void regex_add_multiple(struct RegexCombineCtx *ctx, const char *regex, struct RegexCombineCtx **children)
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
unsigned int size
Alphabet size (how many children there are)
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void debugctx(struct RegexCombineCtx *ctx, int level)
Printf the combined regex ctx.
static void move_children(struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
static struct LoggingHandle * l
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void regex_ctx_destroy(struct RegexCombineCtx *ctx)
Free all resources used by the context node and all its children.
char * REGEX_TEST_combine(char *const regexes[], unsigned int alphabet_size)
Combine an array of regexes into a single prefix-shared regex.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
static int c2i(char c, int size)
Char 2 int.
static char * filename
static char * regex_combine(struct RegexCombineCtx *ctx)
Extract a string from all prefix-combined regexes.
static void regex_split(struct RegexCombineCtx *ctx, unsigned int len, unsigned int prefix_l)
Add a single regex to a context, splitting the exisiting state.
void REGEX_TEST_free_from_file(char **regexes)
Free all memory reserved for a set of regexes created by read_from_file.
int GNUNET_DISK_file_handle_size(struct GNUNET_DISK_FileHandle *fh, off_t *size)
Get the size of an open file.
Definition: disk.c:206
char ** REGEX_TEST_read_from_file(const char *filename)
Read a set of regexes from a file, one per line and return them in an array suitable for REGEX_TEST_c...
#define GNUNET_log(kind,...)
static struct RegexCombineCtx * new_regex_ctx(unsigned int alphabet_size)
Create and initialize a new RegexCombineCtx.
#define GNUNET_YES
Definition: gnunet_common.h:77
struct GNUNET_DISK_FileHandle * GNUNET_DISK_file_open(const char *fn, enum GNUNET_DISK_OpenFlags flags, enum GNUNET_DISK_AccessPermissions perm)
Open a file.
Definition: disk.c:1268
static void regex_add(struct RegexCombineCtx *ctx, const char *regex)
Add a single regex to a context, combining with exisiting regex by-prefix.
size_t GNUNET_strlcpy(char *dst, const char *src, size_t n)
Like strlcpy but portable.
Definition: strings.c:219
Handle used to access files (and pipes).
static unsigned int get_prefix_length(const char *s1, const char *s2)
Get the number of matching characters on the prefix of both strings.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_free(ptr)
Wrapper around free.
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...