GNUnet  0.10.x
fs_sharetree.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2005-2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19  */
20 
27 #include "platform.h"
28 #include "gnunet_fs_service.h"
29 #include "gnunet_scheduler_lib.h"
30 #include <pthread.h>
31 
32 
42 
47 
51  const char *value;
52 
56  unsigned int count;
57 };
58 
59 
63 struct MetaCounter {
67  struct MetaCounter *prev;
68 
72  struct MetaCounter *next;
73 
77  const char *plugin_name;
78 
82  const char *data_mime_type;
83 
87  const char *data;
88 
92  size_t data_size;
93 
98 
103 
108  unsigned int count;
109 };
110 
111 
116 struct TrimContext {
123 
130 
135 
139  unsigned int move_threshold;
140 };
141 
142 
151 static int
152 add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
153 {
154  struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
155  struct KeywordCounter *cnt;
156  struct GNUNET_HashCode hc;
157  size_t klen;
158 
159  klen = strlen(keyword) + 1;
160  GNUNET_CRYPTO_hash(keyword, klen - 1, &hc);
161  cnt = GNUNET_CONTAINER_multihashmap_get(mcm, &hc);
162  if (cnt == NULL)
163  {
164  cnt = GNUNET_malloc(sizeof(struct KeywordCounter) + klen);
165  cnt->value = (const char *)&cnt[1];
166  GNUNET_memcpy(&cnt[1], keyword, klen);
169  &hc, cnt,
171  }
172  cnt->count++;
173  return GNUNET_OK;
174 }
175 
176 
194 static int
195 add_to_meta_counter(void *cls, const char *plugin_name,
196  enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
197  const char *data_mime_type, const char *data, size_t data_len)
198 {
199  struct GNUNET_CONTAINER_MultiHashMap *map = cls;
200  struct GNUNET_HashCode key;
201  struct MetaCounter *cnt;
202 
203  GNUNET_CRYPTO_hash(data, data_len, &key);
204  cnt = GNUNET_CONTAINER_multihashmap_get(map, &key);
205  if (NULL == cnt)
206  {
207  cnt = GNUNET_new(struct MetaCounter);
208  cnt->data = data;
209  cnt->data_size = data_len;
210  cnt->plugin_name = plugin_name;
211  cnt->type = type;
212  cnt->format = format;
216  &key, cnt,
218  }
219  cnt->count++;
220  return 0;
221 }
222 
223 
232 static int
233 remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
234 {
235  struct TrimContext *tc = cls;
236  struct KeywordCounter *counter;
237  struct GNUNET_HashCode hc;
238  size_t klen;
239 
240  klen = strlen(keyword) + 1;
241  GNUNET_CRYPTO_hash(keyword, klen - 1, &hc);
243  GNUNET_assert(NULL != counter);
244  if (counter->count < tc->move_threshold)
245  return GNUNET_OK;
247  counter->value);
248  return GNUNET_OK;
249 }
250 
251 
261 static int
262 migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode * key, void *value)
263 {
264  struct TrimContext *tc = cls;
265  struct KeywordCounter *counter = value;
266 
267  if (counter->count >= tc->move_threshold)
268  {
269  if (NULL == tc->pos->ksk_uri)
271  else
273  }
276  key,
277  counter));
278  GNUNET_free(counter);
279  return GNUNET_YES;
280 }
281 
282 
292 static int
293 migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode * key, void *value)
294 {
295  struct TrimContext *tc = cls;
296  struct MetaCounter *counter = value;
297 
298  if (counter->count >= tc->move_threshold)
299  {
300  if (NULL == tc->pos->meta)
303  counter->plugin_name,
304  counter->type,
305  counter->format,
306  counter->data_mime_type, counter->data,
307  counter->data_size);
308  }
311  key,
312  counter));
313  GNUNET_free(counter);
314  return GNUNET_YES;
315 }
316 
317 
325 static void
327  struct GNUNET_FS_ShareTreeItem *tree)
328 {
329  struct GNUNET_FS_ShareTreeItem *pos;
330  unsigned int num_children;
331 
332  /* first, trim all children */
333  num_children = 0;
334  for (pos = tree->children_head; NULL != pos; pos = pos->next)
335  {
336  share_tree_trim(tc, pos);
337  num_children++;
338  }
339 
340  /* consider adding filename to directory meta data */
341  if (tree->is_directory == GNUNET_YES)
342  {
343  const char *user = getenv("USER");
344  if ((user == NULL) ||
345  (0 != strncasecmp(user, tree->short_filename, strlen(user))))
346  {
347  /* only use filename if it doesn't match $USER */
348  if (NULL == tree->meta)
350  GNUNET_CONTAINER_meta_data_insert(tree->meta, "<libgnunetfs>",
353  "text/plain", tree->short_filename,
354  strlen(tree->short_filename) + 1);
355  }
356  }
357 
358  if (1 >= num_children)
359  return; /* nothing to trim */
360 
361  /* now, count keywords and meta data in children */
362  for (pos = tree->children_head; NULL != pos; pos = pos->next)
363  {
364  if (NULL != pos->meta)
366  if (NULL != pos->ksk_uri)
368  }
369 
370  /* calculate threshold for moving keywords / meta data */
371  tc->move_threshold = 1 + (num_children / 2);
372 
373  /* remove high-frequency keywords from children */
374  for (pos = tree->children_head; NULL != pos; pos = pos->next)
375  {
376  tc->pos = pos;
377  if (NULL != pos->ksk_uri)
378  {
379  struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup(pos->ksk_uri);
381  GNUNET_FS_uri_destroy(ksk_uri_copy);
382  }
383  }
384 
385  /* add high-frequency meta data and keywords to parent */
386  tc->pos = tree;
389  tc);
392  tc);
393 }
394 
395 
402 void
404 {
405  struct TrimContext tc;
406 
407  if (toplevel == NULL)
408  return;
411  share_tree_trim(&tc, toplevel);
414 }
415 
416 
422 void
424 {
425  struct GNUNET_FS_ShareTreeItem *pos;
426 
427  while (NULL != (pos = toplevel->children_head))
429  if (NULL != toplevel->parent)
431  toplevel->parent->children_tail,
432  toplevel);
433  if (NULL != toplevel->meta)
435  if (NULL != toplevel->ksk_uri)
436  GNUNET_FS_uri_destroy(toplevel->ksk_uri);
437  GNUNET_free_non_null(toplevel->filename);
439  GNUNET_free(toplevel);
440 }
441 
442 /* end fs_sharetree.c */
443 
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
void GNUNET_FS_share_tree_trim(struct GNUNET_FS_ShareTreeItem *toplevel)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:403
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:102
EXTRACTOR_MetaFormat
Format in which the extracted meta data is presented.
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1152
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:759
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
const char * value
Keyword that was found.
Definition: fs_sharetree.c:51
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:195
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:410
struct GNUNET_FS_ShareTreeItem * parent
This is a doubly-linked tree NULL for top-level entries.
struct GNUNET_FS_ShareTreeItem * children_tail
This is a doubly-linked tree NULL for files and empty directories.
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
0-terminated, UTF-8 encoded string.
#define GNUNET_NO
Definition: gnunet_common.h:78
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:75
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
struct MetaCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:67
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:262
struct GNUNET_CONTAINER_MultiHashMap * metacounter
Map from the hash over the metadata to an &#39;struct MetaCounter *&#39; counter that says how often this met...
Definition: fs_sharetree.c:129
A structure that forms a singly-linked list that serves as a stack for metadata-processing function...
Definition: fs_sharetree.c:116
EXTRACTOR_MetaType
Enumeration defining various sources of keywords.
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match)...
Definition: fs_sharetree.c:108
Entry for each unique keyword to track how often it occured.
Definition: fs_sharetree.c:37
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:995
Internal representation of the hash map.
static struct GNUNET_CONTAINER_MultiPeerMap * map
Handle to the map used to store old latency values for peers.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct GNUNET_CONTAINER_MultiHashMap * keywordcounter
Map from the hash over the keyword to an &#39;struct KeywordCounter *&#39; counter that says how often this k...
Definition: fs_sharetree.c:122
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:727
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:326
A node of a directory tree (produced by dirscanner)
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:293
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:44
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_create(void)
Create a fresh meta data container.
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:56
int GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
void GNUNET_CONTAINER_meta_data_destroy(struct GNUNET_CONTAINER_MetaData *md)
Free meta data.
static char * plugin_name
Solver plugin name as string.
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:675
A 512-bit hashcode.
int GNUNET_CONTAINER_meta_data_insert(struct GNUNET_CONTAINER_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
char * short_filename
Base name of the file/directory.
There must only be one value per key; storing a value should fail if a value under the same key alrea...
struct GNUNET_HashCode key
The key used in the DHT.
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:77
char * getenv()
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:97
int is_directory
GNUNET_YES if this is a directory
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:152
struct GNUNET_FS_ShareTreeItem * pos
Position we are currently manipulating.
Definition: fs_sharetree.c:134
char * filename
Name of the file/directory.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
struct GNUNET_CONTAINER_MetaData * meta
Metadata for this file or directory.
struct KeywordCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:46
struct KeywordCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:41
void GNUNET_FS_share_tree_free(struct GNUNET_FS_ShareTreeItem *toplevel)
Release memory of a share item tree.
Definition: fs_sharetree.c:423
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:162
int GNUNET_CONTAINER_meta_data_iterate(const struct GNUNET_CONTAINER_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:63
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
size_t data_size
Number of bytes in &#39;data&#39;.
Definition: fs_sharetree.c:92
struct MetaCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:72
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model
#define GNUNET_YES
Definition: gnunet_common.h:77
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:787
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:82
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MulitHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
uint32_t data
The data value.
#define GNUNET_malloc(size)
Wrapper around malloc.
unsigned int move_threshold
Number of times an item has to be found to be moved to the parent.
Definition: fs_sharetree.c:139
#define GNUNET_free(ptr)
Wrapper around free.
const char * data
The actual meta data.
Definition: fs_sharetree.c:87
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:233