GNUnet  0.11.x
fs_sharetree.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2005-2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
20 
27 #include "platform.h"
28 #include "gnunet_fs_service.h"
29 #include "gnunet_scheduler_lib.h"
30 #include <pthread.h>
31 
32 
38 {
43 
48 
52  const char *value;
53 
57  unsigned int count;
58 };
59 
60 
65 {
69  struct MetaCounter *prev;
70 
74  struct MetaCounter *next;
75 
79  const char *plugin_name;
80 
84  const char *data_mime_type;
85 
89  const char *data;
90 
94  size_t data_size;
95 
100 
105 
110  unsigned int count;
111 };
112 
113 
119 {
126 
133 
138 
142  unsigned int move_threshold;
143 };
144 
145 
154 static int
155 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
156 {
157  struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
158  struct KeywordCounter *cnt;
159  struct GNUNET_HashCode hc;
160  size_t klen;
161 
162  klen = strlen (keyword) + 1;
163  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
164  cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
165  if (cnt == NULL)
166  {
167  cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
168  cnt->value = (const char *) &cnt[1];
169  GNUNET_memcpy (&cnt[1], keyword, klen);
172  &hc, cnt,
174  }
175  cnt->count++;
176  return GNUNET_OK;
177 }
178 
179 
197 static int
198 add_to_meta_counter (void *cls, const char *plugin_name,
200  format,
201  const char *data_mime_type, const char *data, size_t
202  data_len)
203 {
204  struct GNUNET_CONTAINER_MultiHashMap *map = cls;
205  struct GNUNET_HashCode key;
206  struct MetaCounter *cnt;
207 
208  GNUNET_CRYPTO_hash (data, data_len, &key);
209  cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
210  if (NULL == cnt)
211  {
212  cnt = GNUNET_new (struct MetaCounter);
213  cnt->data = data;
214  cnt->data_size = data_len;
215  cnt->plugin_name = plugin_name;
216  cnt->type = type;
217  cnt->format = format;
221  &key, cnt,
223  }
224  cnt->count++;
225  return 0;
226 }
227 
228 
237 static int
238 remove_high_frequency_keywords (void *cls, const char *keyword, int
239  is_mandatory)
240 {
241  struct TrimContext *tc = cls;
242  struct KeywordCounter *counter;
243  struct GNUNET_HashCode hc;
244  size_t klen;
245 
246  klen = strlen (keyword) + 1;
247  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
249  GNUNET_assert (NULL != counter);
250  if (counter->count < tc->move_threshold)
251  return GNUNET_OK;
253  counter->value);
254  return GNUNET_OK;
255 }
256 
257 
267 static int
269  void *value)
270 {
271  struct TrimContext *tc = cls;
272  struct KeywordCounter *counter = value;
273 
274  if (counter->count >= tc->move_threshold)
275  {
276  if (NULL == tc->pos->ksk_uri)
278  &counter->value);
279  else
281  GNUNET_NO);
282  }
285  key,
286  counter));
287  GNUNET_free (counter);
288  return GNUNET_YES;
289 }
290 
291 
301 static int
303  void *value)
304 {
305  struct TrimContext *tc = cls;
306  struct MetaCounter *counter = value;
307 
308  if (counter->count >= tc->move_threshold)
309  {
310  if (NULL == tc->pos->meta)
313  counter->plugin_name,
314  counter->type,
315  counter->format,
316  counter->data_mime_type, counter->data,
317  counter->data_size);
318  }
321  key,
322  counter));
323  GNUNET_free (counter);
324  return GNUNET_YES;
325 }
326 
327 
335 static void
337  struct GNUNET_FS_ShareTreeItem *tree)
338 {
339  struct GNUNET_FS_ShareTreeItem *pos;
340  unsigned int num_children;
341 
342  /* first, trim all children */
343  num_children = 0;
344  for (pos = tree->children_head; NULL != pos; pos = pos->next)
345  {
346  share_tree_trim (tc, pos);
347  num_children++;
348  }
349 
350  /* consider adding filename to directory meta data */
351  if (tree->is_directory == GNUNET_YES)
352  {
353  const char *user = getenv ("USER");
354  if ((user == NULL) ||
355  (0 != strncasecmp (user, tree->short_filename, strlen (user))))
356  {
357  /* only use filename if it doesn't match $USER */
358  if (NULL == tree->meta)
360  GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
363  "text/plain", tree->short_filename,
364  strlen (tree->short_filename) + 1);
365  }
366  }
367 
368  if (1 >= num_children)
369  return; /* nothing to trim */
370 
371  /* now, count keywords and meta data in children */
372  for (pos = tree->children_head; NULL != pos; pos = pos->next)
373  {
374  if (NULL != pos->meta)
376  tc->metacounter);
377  if (NULL != pos->ksk_uri)
379  tc->keywordcounter);
380  }
381 
382  /* calculate threshold for moving keywords / meta data */
383  tc->move_threshold = 1 + (num_children / 2);
384 
385  /* remove high-frequency keywords from children */
386  for (pos = tree->children_head; NULL != pos; pos = pos->next)
387  {
388  tc->pos = pos;
389  if (NULL != pos->ksk_uri)
390  {
391  struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
392  GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
394  GNUNET_FS_uri_destroy (ksk_uri_copy);
395  }
396  }
397 
398  /* add high-frequency meta data and keywords to parent */
399  tc->pos = tree;
402  tc);
405  tc);
406 }
407 
408 
415 void
417 {
418  struct TrimContext tc;
419 
420  if (toplevel == NULL)
421  return;
424  share_tree_trim (&tc, toplevel);
427 }
428 
429 
435 void
437 {
438  struct GNUNET_FS_ShareTreeItem *pos;
439 
440  while (NULL != (pos = toplevel->children_head))
442  if (NULL != toplevel->parent)
444  toplevel->parent->children_tail,
445  toplevel);
446  if (NULL != toplevel->meta)
448  if (NULL != toplevel->ksk_uri)
449  GNUNET_FS_uri_destroy (toplevel->ksk_uri);
450  GNUNET_free_non_null (toplevel->filename);
452  GNUNET_free (toplevel);
453 }
454 
455 
456 /* end fs_sharetree.c */
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
void GNUNET_FS_share_tree_trim(struct GNUNET_FS_ShareTreeItem *toplevel)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:416
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:104
EXTRACTOR_MetaFormat
Format in which the extracted meta data is presented.
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1155
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:762
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
const char * value
Keyword that was found.
Definition: fs_sharetree.c:52
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:198
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:418
struct GNUNET_FS_ShareTreeItem * parent
This is a doubly-linked tree NULL for top-level entries.
struct GNUNET_FS_ShareTreeItem * children_tail
This is a doubly-linked tree NULL for files and empty directories.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
0-terminated, UTF-8 encoded string.
#define GNUNET_NO
Definition: gnunet_common.h:78
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
struct MetaCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:69
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:268
struct GNUNET_CONTAINER_MultiHashMap * metacounter
Map from the hash over the metadata to an &#39;struct MetaCounter *&#39; counter that says how often this met...
Definition: fs_sharetree.c:132
A structure that forms a singly-linked list that serves as a stack for metadata-processing function...
Definition: fs_sharetree.c:118
EXTRACTOR_MetaType
Enumeration defining various sources of keywords.
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match)...
Definition: fs_sharetree.c:110
Entry for each unique keyword to track how often it occured.
Definition: fs_sharetree.c:37
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:998
Internal representation of the hash map.
static struct GNUNET_CONTAINER_MultiPeerMap * map
Handle to the map used to store old latency values for peers.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct GNUNET_CONTAINER_MultiHashMap * keywordcounter
Map from the hash over the keyword to an &#39;struct KeywordCounter *&#39; counter that says how often this k...
Definition: fs_sharetree.c:125
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:730
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:336
A node of a directory tree (produced by dirscanner)
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:302
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:48
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_create(void)
Create a fresh meta data container.
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:57
int GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
void GNUNET_CONTAINER_meta_data_destroy(struct GNUNET_CONTAINER_MetaData *md)
Free meta data.
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:678
A 512-bit hashcode.
int GNUNET_CONTAINER_meta_data_insert(struct GNUNET_CONTAINER_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
char * short_filename
Base name of the file/directory.
There must only be one value per key; storing a value should fail if a value under the same key alrea...
struct GNUNET_HashCode key
The key used in the DHT.
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:79
char * getenv()
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:99
int is_directory
GNUNET_YES if this is a directory
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:155
struct GNUNET_FS_ShareTreeItem * pos
Position we are currently manipulating.
Definition: fs_sharetree.c:137
char * filename
Name of the file/directory.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
struct GNUNET_CONTAINER_MetaData * meta
Metadata for this file or directory.
struct KeywordCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:47
struct KeywordCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:42
void GNUNET_FS_share_tree_free(struct GNUNET_FS_ShareTreeItem *toplevel)
Release memory of a share item tree.
Definition: fs_sharetree.c:436
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:165
int GNUNET_CONTAINER_meta_data_iterate(const struct GNUNET_CONTAINER_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:64
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
size_t data_size
Number of bytes in &#39;data&#39;.
Definition: fs_sharetree.c:94
struct MetaCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:74
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model
#define GNUNET_YES
Definition: gnunet_common.h:77
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:790
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:84
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MulitHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
uint32_t data
The data value.
static char * plugin_name
Name of our plugin.
#define GNUNET_malloc(size)
Wrapper around malloc.
unsigned int move_threshold
Number of times an item has to be found to be moved to the parent.
Definition: fs_sharetree.c:142
#define GNUNET_free(ptr)
Wrapper around free.
const char * data
The actual meta data.
Definition: fs_sharetree.c:89
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:238