GNUnet  0.19.4
fs_sharetree.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2005-2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
20 
27 #include "platform.h"
28 
29 #include "gnunet_fs_service.h"
30 #include "gnunet_scheduler_lib.h"
31 #include <pthread.h>
32 
33 
39 {
44 
49 
53  const char *value;
54 
58  unsigned int count;
59 };
60 
61 
66 {
70  struct MetaCounter *prev;
71 
75  struct MetaCounter *next;
76 
80  const char *plugin_name;
81 
85  const char *data_mime_type;
86 
90  const char *data;
91 
95  size_t data_size;
96 
100  enum EXTRACTOR_MetaType type;
101 
105  enum EXTRACTOR_MetaFormat format;
106 
111  unsigned int count;
112 };
113 
114 
120 {
127 
134 
139 
143  unsigned int move_threshold;
144 };
145 
146 
155 static int
156 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
157 {
158  struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
159  struct KeywordCounter *cnt;
160  struct GNUNET_HashCode hc;
161  size_t klen;
162 
163  klen = strlen (keyword) + 1;
164  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
165  cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
166  if (cnt == NULL)
167  {
168  cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
169  cnt->value = (const char *) &cnt[1];
170  GNUNET_memcpy (&cnt[1], keyword, klen);
173  &hc, cnt,
175  }
176  cnt->count++;
177  return GNUNET_OK;
178 }
179 
180 
198 static int
199 add_to_meta_counter (void *cls, const char *plugin_name,
200  enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat
201  format,
202  const char *data_mime_type, const char *data, size_t
203  data_len)
204 {
205  struct GNUNET_CONTAINER_MultiHashMap *map = cls;
206  struct GNUNET_HashCode key;
207  struct MetaCounter *cnt;
208 
209  GNUNET_CRYPTO_hash (data, data_len, &key);
211  if (NULL == cnt)
212  {
213  cnt = GNUNET_new (struct MetaCounter);
214  cnt->data = data;
215  cnt->data_size = data_len;
216  cnt->plugin_name = plugin_name;
217  cnt->type = type;
218  cnt->format = format;
222  &key, cnt,
224  }
225  cnt->count++;
226  return 0;
227 }
228 
229 
238 static int
239 remove_high_frequency_keywords (void *cls, const char *keyword, int
240  is_mandatory)
241 {
242  struct TrimContext *tc = cls;
243  struct KeywordCounter *counter;
244  struct GNUNET_HashCode hc;
245  size_t klen;
246 
247  klen = strlen (keyword) + 1;
248  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
249  counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
250  GNUNET_assert (NULL != counter);
251  if (counter->count < tc->move_threshold)
252  return GNUNET_OK;
253  GNUNET_FS_uri_ksk_remove_keyword (tc->pos->ksk_uri,
254  counter->value);
255  return GNUNET_OK;
256 }
257 
258 
268 static int
270  void *value)
271 {
272  struct TrimContext *tc = cls;
273  struct KeywordCounter *counter = value;
274 
275  if (counter->count >= tc->move_threshold)
276  {
277  if (NULL == tc->pos->ksk_uri)
278  tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
279  &counter->value);
280  else
281  GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
282  GNUNET_NO);
283  }
285  GNUNET_CONTAINER_multihashmap_remove (tc->keywordcounter,
286  key,
287  counter));
288  GNUNET_free (counter);
289  return GNUNET_YES;
290 }
291 
292 
302 static int
304  void *value)
305 {
306  struct TrimContext *tc = cls;
307  struct MetaCounter *counter = value;
308 
309  if (counter->count >= tc->move_threshold)
310  {
311  if (NULL == tc->pos->meta)
312  tc->pos->meta = GNUNET_FS_meta_data_create ();
313  GNUNET_FS_meta_data_insert (tc->pos->meta,
314  counter->plugin_name,
315  counter->type,
316  counter->format,
317  counter->data_mime_type, counter->data,
318  counter->data_size);
319  }
322  key,
323  counter));
324  GNUNET_free (counter);
325  return GNUNET_YES;
326 }
327 
328 
336 static void
338  struct GNUNET_FS_ShareTreeItem *tree)
339 {
340  struct GNUNET_FS_ShareTreeItem *pos;
341  unsigned int num_children;
342 
343  /* first, trim all children */
344  num_children = 0;
345  for (pos = tree->children_head; NULL != pos; pos = pos->next)
346  {
347  share_tree_trim (tc, pos);
348  num_children++;
349  }
350 
351  /* consider adding filename to directory meta data */
352  if (tree->is_directory == GNUNET_YES)
353  {
354  const char *user = getenv ("USER");
355  if ((user == NULL) ||
356  (0 != strncasecmp (user, tree->short_filename, strlen (user))))
357  {
358  /* only use filename if it doesn't match $USER */
359  if (NULL == tree->meta)
360  tree->meta = GNUNET_FS_meta_data_create ();
361  GNUNET_FS_meta_data_insert (tree->meta, "<libgnunetfs>",
363  EXTRACTOR_METAFORMAT_UTF8,
364  "text/plain", tree->short_filename,
365  strlen (tree->short_filename) + 1);
366  }
367  }
368 
369  if (1 >= num_children)
370  return; /* nothing to trim */
371 
372  /* now, count keywords and meta data in children */
373  for (pos = tree->children_head; NULL != pos; pos = pos->next)
374  {
375  if (NULL != pos->meta)
377  tc->metacounter);
378  if (NULL != pos->ksk_uri)
380  tc->keywordcounter);
381  }
382 
383  /* calculate threshold for moving keywords / meta data */
384  tc->move_threshold = 1 + (num_children / 2);
385 
386  /* remove high-frequency keywords from children */
387  for (pos = tree->children_head; NULL != pos; pos = pos->next)
388  {
389  tc->pos = pos;
390  if (NULL != pos->ksk_uri)
391  {
392  struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
393  GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
395  GNUNET_FS_uri_destroy (ksk_uri_copy);
396  }
397  }
398 
399  /* add high-frequency meta data and keywords to parent */
400  tc->pos = tree;
401  GNUNET_CONTAINER_multihashmap_iterate (tc->keywordcounter,
403  tc);
406  tc);
407 }
408 
409 
416 void
418 {
419  struct TrimContext tc;
420 
421  if (toplevel == NULL)
422  return;
423  tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
424  tc.metacounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
425  share_tree_trim (&tc, toplevel);
426  GNUNET_CONTAINER_multihashmap_destroy (tc.keywordcounter);
428 }
429 
430 
436 void
438 {
439  struct GNUNET_FS_ShareTreeItem *pos;
440 
441  while (NULL != (pos = toplevel->children_head))
443  if (NULL != toplevel->parent)
445  toplevel->parent->children_tail,
446  toplevel);
447  if (NULL != toplevel->meta)
448  GNUNET_FS_meta_data_destroy (toplevel->meta);
449  if (NULL != toplevel->ksk_uri)
450  GNUNET_FS_uri_destroy (toplevel->ksk_uri);
451  GNUNET_free (toplevel->filename);
452  GNUNET_free (toplevel->short_filename);
453  GNUNET_free (toplevel);
454 }
455 
456 
457 /* end fs_sharetree.c */
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:156
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:199
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:269
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up.
Definition: fs_sharetree.c:337
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:303
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:239
char * getenv()
static struct GNUNET_CONTAINER_MultiPeerMap * map
Handle to the map used to store old latency values for peers.
struct GNUNET_HashCode key
The key used in the DHT.
uint32_t data
The data value.
static char * value
Value of the record to add/remove.
static char * plugin_name
Name of our plugin.
API for file sharing via GNUnet.
API to schedule computations using continuation passing style.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
#define EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME
void GNUNET_FS_share_tree_free(struct GNUNET_FS_ShareTreeItem *toplevel)
Release memory of a share item tree.
Definition: fs_sharetree.c:437
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:780
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1144
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:720
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:677
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:987
void GNUNET_FS_share_tree_trim(struct GNUNET_FS_ShareTreeItem *toplevel)
Process a share item tree, moving frequent keywords up and copying frequent metadata up.
Definition: fs_sharetree.c:417
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:752
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY
There must only be one value per key; storing a value should fail if a value under the same key alrea...
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_free(ptr)
Wrapper around free.
int GNUNET_FS_meta_data_iterate(const struct GNUNET_FS_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Definition: meta_data.c:422
struct GNUNET_FS_MetaData * GNUNET_FS_meta_data_create()
Create a fresh struct FS_MetaData token.
Definition: meta_data.c:131
int GNUNET_FS_meta_data_insert(struct GNUNET_FS_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
Definition: meta_data.c:258
void GNUNET_FS_meta_data_destroy(struct GNUNET_FS_MetaData *md)
Free meta data.
Definition: meta_data.c:170
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:431
Internal representation of the hash map.
A node of a directory tree (produced by dirscanner)
int is_directory
GNUNET_YES if this is a directory
struct GNUNET_FS_ShareTreeItem * parent
This is a doubly-linked tree NULL for top-level entries.
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
char * short_filename
Base name of the file/directory.
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
struct GNUNET_FS_MetaData * meta
Metadata for this file or directory.
char * filename
Name of the file/directory.
struct GNUNET_FS_ShareTreeItem * children_tail
This is a doubly-linked tree NULL for files and empty directories.
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:167
A 512-bit hashcode.
Entry for each unique keyword to track how often it occurred.
Definition: fs_sharetree.c:39
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:58
const char * value
Keyword that was found.
Definition: fs_sharetree.c:53
struct KeywordCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:48
struct KeywordCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:43
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:66
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:105
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:100
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:80
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match).
Definition: fs_sharetree.c:111
struct MetaCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:75
size_t data_size
Number of bytes in 'data'.
Definition: fs_sharetree.c:95
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:85
const char * data
The actual meta data.
Definition: fs_sharetree.c:90
struct MetaCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:70
A structure that forms a singly-linked list that serves as a stack for metadata-processing function.
Definition: fs_sharetree.c:120
struct GNUNET_FS_ShareTreeItem * pos
Position we are currently manipulating.
Definition: fs_sharetree.c:138
unsigned int move_threshold
Number of times an item has to be found to be moved to the parent.
Definition: fs_sharetree.c:143
struct GNUNET_CONTAINER_MultiHashMap * keywordcounter
Map from the hash over the keyword to an 'struct KeywordCounter *' counter that says how often this k...
Definition: fs_sharetree.c:126
struct GNUNET_CONTAINER_MultiHashMap * metacounter
Map from the hash over the metadata to an 'struct MetaCounter *' counter that says how often this met...
Definition: fs_sharetree.c:133
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model