GNUnet  0.10.x
regex_test_lib.c
Go to the documentation of this file.
1 /*
2  * This file is part of GNUnet
3  * Copyright (C) 2012-2017 GNUnet e.V.
4  *
5  * GNUnet is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Affero General Public License as published
7  * by the Free Software Foundation, either version 3 of the License,
8  * or (at your option) any later version.
9  *
10  * GNUnet is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Affero General Public License for more details.
14  *
15  * You should have received a copy of the GNU Affero General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
28 #include "platform.h"
29 #include "gnunet_util_lib.h"
30 
31 
40 
44  unsigned int size;
45 
49  char *s;
50 };
51 
52 
63 static int
64 c2i(char c, int size)
65 {
66  switch (size)
67  {
68  case 2:
69  case 8:
70  return c - '0';
71  break;
72 
73  case 16:
74  if (c >= '0' && c <= '9')
75  return c - '0';
76  else if (c >= 'A' && c <= 'F')
77  return c - 'A' + 10;
78  else if (c >= 'a' && c <= 'f')
79  return c - 'a' + 10;
80  else
81  {
83  "Cannot convert char %c in base %u\n",
84  c, size);
85  GNUNET_assert(0);
86  }
87  break;
88 
89  default:
90  GNUNET_assert(0);
91  }
92 }
93 
94 
100 static void
101 space(int n)
102 {
103  for (int i = 0; i < n; i++)
104  fprintf(stderr, "| ");
105 }
106 
107 
114 static void
115 debugctx(struct RegexCombineCtx *ctx, int level)
116 {
117 #if DEBUG_REGEX
118  if (NULL != ctx->s)
119  {
120  space(level - 1);
121  fprintf(stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s);
122  }
123  else
124  fprintf(stderr, "ROOT (base %u)\n", ctx->size);
125  for (unsigned int i = 0; i < ctx->size; i++)
126  {
127  if (NULL != ctx->children[i])
128  {
129  space(level);
130  debugctx(ctx->children[i], level + 1);
131  }
132  }
133  fflush(stderr);
134 #endif
135 }
136 
137 
144 static void
146  const char *regex);
147 
148 
154 static struct RegexCombineCtx *
155 new_regex_ctx(unsigned int alphabet_size)
156 {
157  struct RegexCombineCtx *ctx;
158  size_t array_size;
159 
160  array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
161  ctx = GNUNET_new(struct RegexCombineCtx);
162  ctx->children = GNUNET_malloc(array_size);
163  ctx->size = alphabet_size;
164 
165  return ctx;
166 }
167 
168 
169 static void
171  const struct RegexCombineCtx *src)
172 {
173  size_t array_size;
174 
175  array_size = sizeof(struct RegexCombineCtx *) * src->size;
176  GNUNET_memcpy(dst->children,
177  src->children,
178  array_size);
179  for (unsigned int i = 0; i < src->size; i++)
180  {
181  src->children[i] = NULL;
182  }
183 }
184 
185 
193 static char *
195 {
196  struct RegexCombineCtx *p;
197  unsigned int i;
198  size_t len;
199  char *regex;
200  char *tmp;
201  char *s;
202  int opt;
203 
204  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
205  regex = GNUNET_strdup("");
206  opt = GNUNET_NO;
207  for (i = 0; i < ctx->size; i++)
208  {
209  p = ctx->children[i];
210  if (NULL == p)
211  continue;
213  "adding '%s' to innner %s\n",
214  p->s, ctx->s);
215  s = regex_combine(p);
216  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
217  if (strlen(s) == 0)
218  {
219  opt = GNUNET_YES;
220  }
221  else
222  {
223  GNUNET_asprintf(&tmp, "%s%s|", regex, s);
224  GNUNET_free_non_null(regex);
225  regex = tmp;
226  }
228  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s);
229  }
230 
231  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
232  len = strlen(regex);
233  if (0 == len)
234  {
235  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
236  GNUNET_free(regex);
237  return NULL == ctx->s ? NULL : GNUNET_strdup(ctx->s);
238  }
239 
240  if ('|' == regex[len - 1])
241  regex[len - 1] = '\0';
242 
243  if (NULL != ctx->s)
244  {
245  if (opt)
246  GNUNET_asprintf(&s, "%s(%s)?", ctx->s, regex);
247  else
248  GNUNET_asprintf(&s, "%s(%s)", ctx->s, regex);
249  GNUNET_free(regex);
250  regex = s;
251  }
252 
253  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
254  return regex;
255 }
256 
257 
266 static unsigned int
267 get_prefix_length(const char *s1, const char *s2)
268 {
269  unsigned int l1;
270  unsigned int l2;
271  unsigned int limit;
272  unsigned int i;
273 
274  l1 = strlen(s1);
275  l2 = strlen(s2);
276  limit = l1 > l2 ? l2 : l1;
277 
278  for (i = 0; i < limit; i++)
279  {
280  if (s1[i] != s2[i])
281  return i;
282  }
283  return limit;
284 }
285 
286 
296 static struct RegexCombineCtx *
297 get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
298 {
299  struct RegexCombineCtx *p;
300  struct RegexCombineCtx *best;
301  unsigned int i;
302  unsigned int l;
303  unsigned int best_l;
304 
305  best_l = 0;
306  best = NULL;
307 
308  for (i = 0; i < ctx->size; i++)
309  {
310  p = ctx->children[i];
311  if (NULL == p)
312  continue;
313 
314  l = get_prefix_length(p->s, regex);
315  if (l > best_l)
316  {
317  GNUNET_break(0 == best_l);
318  best = p;
319  best_l = l;
320  }
321  }
322  return best;
323 }
324 
325 static void
327  const char *regex,
328  struct RegexCombineCtx **children)
329 {
330  char tmp[2];
331  long unsigned int i;
332  size_t l;
333  struct RegexCombineCtx *newctx;
334  unsigned int count;
335 
336  if ('(' != regex[0])
337  {
338  GNUNET_assert(0);
339  }
340 
341  /* Does the regex cover *all* possible children? Then don't add any,
342  * as it will be covered by the post-regex "(a-z)*"
343  */
344  l = strlen(regex);
345  count = 0;
346  for (i = 1UL; i < l; i++)
347  {
348  if (regex[i] != '|' && regex[i] != ')')
349  {
350  count++;
351  }
352  }
353  if (count == ctx->size)
354  {
355  return;
356  }
357 
358  /* Add every component as a child node */
359  tmp[1] = '\0';
360  for (i = 1UL; i < l; i++)
361  {
362  if (regex[i] != '|' && regex[i] != ')')
363  {
364  tmp[0] = regex[i];
365  newctx = new_regex_ctx(ctx->size);
366  newctx->s = GNUNET_strdup(tmp);
367  if (children != NULL)
368  GNUNET_memcpy(newctx->children,
369  children,
370  sizeof(*children) * ctx->size);
371  ctx->children[c2i(tmp[0], ctx->size)] = newctx;
372  }
373  }
374 }
375 
386 static void
388  unsigned int len,
389  unsigned int prefix_l)
390 {
391  struct RegexCombineCtx *newctx;
392  unsigned int idx;
393  char *suffix;
394 
395  suffix = GNUNET_malloc(len - prefix_l + 1);
396  /*
397  * We can use GNUNET_strlcpy because ctx->s is null-terminated
398  */
399  GNUNET_strlcpy(suffix, &ctx->s[prefix_l], len - prefix_l + 1);
400 
401  /* Suffix saved, truncate current node so it only contains the prefix,
402  * copy any children nodes to put as grandchildren and initialize new empty
403  * children array.
404  */
405  ctx->s[prefix_l] = '\0';
406 
407  /* If the suffix is an OR expression, add multiple children */
408  if ('(' == suffix[0])
409  {
410  struct RegexCombineCtx **tmp;
411 
412  tmp = ctx->children;
413  ctx->children = GNUNET_malloc(sizeof(*tmp) * ctx->size);
414  regex_add_multiple(ctx, suffix, tmp);
415  GNUNET_free(suffix);
416  GNUNET_free(tmp);
417  return;
418  }
419 
420  /* The suffix is a normal string, add as one node */
421  newctx = new_regex_ctx(ctx->size);
422  newctx->s = suffix;
423  move_children(newctx, ctx);
424  idx = c2i(suffix[0], ctx->size);
425  ctx->children[idx] = newctx;
426 }
427 
428 
435 static void
436 regex_add(struct RegexCombineCtx *ctx, const char *regex)
437 {
438  struct RegexCombineCtx *p;
439  struct RegexCombineCtx *newctx;
440  long unsigned int l;
441  unsigned int prefix_l;
442  const char *rest_r;
443  const char *rest_s;
444  size_t len;
445  int idx;
446 
448  "regex_add '%s' into '%s'\n",
449  regex, ctx->s);
450  l = strlen(regex);
451  if (0UL == l)
452  return;
453 
454  /* If the regex is in the form of (a|b|c), add every character separately */
455  if ('(' == regex[0])
456  {
457  regex_add_multiple(ctx, regex, NULL);
458  return;
459  }
460 
461  p = get_longest_prefix(ctx, regex);
462  if (NULL != p)
463  {
464  /* There is some prefix match, reduce regex and try again */
465  prefix_l = get_prefix_length(p->s, regex);
466  rest_s = &p->s[prefix_l];
467  rest_r = &regex[prefix_l];
468  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
469  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
470  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
471  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
472  len = strlen(p->s);
473  if (prefix_l < len)
474  {
475  regex_split(p, len, prefix_l);
476  }
477  regex_add(p, rest_r);
478  return;
479  }
480 
481  /* There is no prefix match, add new */
482  idx = c2i(regex[0], ctx->size);
483  if (NULL == ctx->children[idx] && NULL != ctx->s)
484  {
485  /* this was the end before, add empty string */
486  newctx = new_regex_ctx(ctx->size);
487  newctx->s = GNUNET_strdup("");
488  ctx->children[idx] = newctx;
489  }
490  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, " no match\n");
491  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
492  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
493  newctx = new_regex_ctx(ctx->size);
494  newctx->s = GNUNET_strdup(regex);
495  ctx->children[idx] = newctx;
496 }
497 
498 
504 static void
506 {
507  unsigned int i;
508 
509  if (NULL == ctx)
510  return;
511 
512  for (i = 0; i < ctx->size; i++)
513  {
514  regex_ctx_destroy(ctx->children[i]);
515  }
516  GNUNET_free_non_null(ctx->s); /* 's' on root node is null */
517  GNUNET_free(ctx->children);
518  GNUNET_free(ctx);
519 }
520 
521 
536 char *
537 REGEX_TEST_combine(char * const regexes[], unsigned int alphabet_size)
538 {
539  unsigned int i;
540  char *combined;
541  const char *current;
542  struct RegexCombineCtx *ctx;
543 
544  ctx = new_regex_ctx(alphabet_size);
545  for (i = 0; regexes[i]; i++)
546  {
547  current = regexes[i];
548  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
549  regex_add(ctx, current);
550  debugctx(ctx, 0);
551  }
552  GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
553  debugctx(ctx, 0);
554 
555  combined = regex_combine(ctx);
556 
557  regex_ctx_destroy(ctx);
558 
559  return combined;
560 }
561 
562 
572 char **
574 {
575  struct GNUNET_DISK_FileHandle *f;
576  unsigned int nr;
577  unsigned int offset;
578  off_t size;
579  size_t len;
580  char *buffer;
581  char *regex;
582  char **regexes;
583 
584  f = GNUNET_DISK_file_open(filename,
587  if (NULL == f)
588  {
590  "Can't open file %s for reading\n", filename);
591  return NULL;
592  }
593  if (GNUNET_OK != GNUNET_DISK_file_handle_size(f, &size))
594  {
596  "Can't get size of file %s\n", filename);
598  return NULL;
599  }
601  "using file %s, size %llu\n",
602  filename, (unsigned long long)size);
603 
604  buffer = GNUNET_malloc(size + 1);
605  GNUNET_DISK_file_read(f, buffer, size);
607  regexes = GNUNET_malloc(sizeof(char *));
608  nr = 1;
609  offset = 0;
610  regex = NULL;
611  do
612  {
613  if (NULL == regex)
614  regex = GNUNET_malloc(size + 1);
615  len = (size_t)sscanf(&buffer[offset], "%s", regex);
616  if (0 == len)
617  break;
618  len = strlen(regex);
619  offset += len + 1;
620  if (len < 1)
621  continue;
622  regex[len] = '\0';
623  regex = GNUNET_realloc(regex, len + 1);
624  GNUNET_array_grow(regexes, nr, nr + 1);
625  GNUNET_assert(NULL == regexes[nr - 2]);
626  regexes[nr - 2] = regex;
627  regexes[nr - 1] = NULL;
628  regex = NULL;
629  }
630  while (offset < size);
631  GNUNET_free_non_null(regex);
632  GNUNET_free(buffer);
633 
634  return regexes;
635 }
636 
637 
643 void
645 {
646  unsigned int i;
647 
648  for (i = 0; regexes[i]; i++)
649  GNUNET_free(regexes[i]);
650  GNUNET_free(regexes);
651 }
652 
653 /* end of regex_test_lib.c */
Open the file for reading.
int GNUNET_DISK_file_close(struct GNUNET_DISK_FileHandle *h)
Close an open file.
Definition: disk.c:1339
static void space(int n)
Printf spaces to indent the regex tree.
char * s
Token.
struct RegexCombineCtx ** children
Child nodes with same prefix and token.
ssize_t GNUNET_DISK_file_read(const struct GNUNET_DISK_FileHandle *h, void *result, size_t len)
Read the contents of a binary file into a buffer.
Definition: disk.c:729
Struct to hold the tree formed by prefix-combining the regexes.
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
static struct RegexCombineCtx * get_longest_prefix(struct RegexCombineCtx *ctx, const char *regex)
Return the child context with the longest prefix match with the regex.
Nobody is allowed to do anything to the file.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
#define GNUNET_NO
Definition: gnunet_common.h:78
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static void regex_add_multiple(struct RegexCombineCtx *ctx, const char *regex, struct RegexCombineCtx **children)
#define GNUNET_strdup(a)
Wrapper around GNUNET_xstrdup_.
unsigned int size
Alphabet size (how many children there are)
#define GNUNET_break(cond)
Use this for internal assertion violations that are not fatal (can be handled) but should not occur...
static struct GNUNET_DNSSTUB_Context * ctx
Context for DNS resolution.
static void debugctx(struct RegexCombineCtx *ctx, int level)
Printf the combined regex ctx.
static void move_children(struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
static struct LoggingHandle * l
int GNUNET_asprintf(char **buf, const char *format,...)
Like asprintf, just portable.
static void regex_ctx_destroy(struct RegexCombineCtx *ctx)
Free all resources used by the context node and all its children.
char * REGEX_TEST_combine(char *const regexes[], unsigned int alphabet_size)
Combine an array of regexes into a single prefix-shared regex.
#define GNUNET_array_grow(arr, size, tsize)
Grow a well-typed (!) array.
static struct GNUNET_OS_Process * p
Helper process we started.
Definition: gnunet-qr.c:59
#define GNUNET_realloc(ptr, size)
Wrapper around realloc.
static int c2i(char c, int size)
Char 2 int.
static char * filename
static char * regex_combine(struct RegexCombineCtx *ctx)
Extract a string from all prefix-combined regexes.
static void regex_split(struct RegexCombineCtx *ctx, unsigned int len, unsigned int prefix_l)
Add a single regex to a context, splitting the exisiting state.
void REGEX_TEST_free_from_file(char **regexes)
Free all memory reserved for a set of regexes created by read_from_file.
int GNUNET_DISK_file_handle_size(struct GNUNET_DISK_FileHandle *fh, off_t *size)
Get the size of an open file.
Definition: disk.c:203
char ** REGEX_TEST_read_from_file(const char *filename)
Read a set of regexes from a file, one per line and return them in an array suitable for REGEX_TEST_c...
#define GNUNET_log(kind,...)
static struct RegexCombineCtx * new_regex_ctx(unsigned int alphabet_size)
Create and initialize a new RegexCombineCtx.
#define GNUNET_YES
Definition: gnunet_common.h:77
struct GNUNET_DISK_FileHandle * GNUNET_DISK_file_open(const char *fn, enum GNUNET_DISK_OpenFlags flags, enum GNUNET_DISK_AccessPermissions perm)
Open a file.
Definition: disk.c:1262
static void regex_add(struct RegexCombineCtx *ctx, const char *regex)
Add a single regex to a context, combining with exisiting regex by-prefix.
size_t GNUNET_strlcpy(char *dst, const char *src, size_t n)
Like strlcpy but portable.
Definition: strings.c:219
Handle used to access files (and pipes).
static unsigned int get_prefix_length(const char *s1, const char *s2)
Get the number of matching characters on the prefix of both strings.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_free(ptr)
Wrapper around free.
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...