GNUnet  0.10.x
fs_sharetree.c
Go to the documentation of this file.
1 /*
2  This file is part of GNUnet
3  Copyright (C) 2005-2012 GNUnet e.V.
4 
5  GNUnet is free software: you can redistribute it and/or modify it
6  under the terms of the GNU Affero General Public License as published
7  by the Free Software Foundation, either version 3 of the License,
8  or (at your option) any later version.
9 
10  GNUnet is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Affero General Public License for more details.
14 
15  You should have received a copy of the GNU Affero General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  SPDX-License-Identifier: AGPL3.0-or-later
19 */
20 
27 #include "platform.h"
28 #include "gnunet_fs_service.h"
29 #include "gnunet_scheduler_lib.h"
30 #include <pthread.h>
31 
32 
38 {
39 
44 
49 
53  const char *value;
54 
58  unsigned int count;
59 
60 };
61 
62 
67 {
68 
72  struct MetaCounter *prev;
73 
77  struct MetaCounter *next;
78 
82  const char *plugin_name;
83 
87  const char *data_mime_type;
88 
92  const char *data;
93 
97  size_t data_size;
98 
103 
108 
113  unsigned int count;
114 
115 };
116 
117 
123 {
124 
131 
138 
143 
147  unsigned int move_threshold;
148 
149 };
150 
151 
160 static int
161 add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
162 {
163  struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
164  struct KeywordCounter *cnt;
165  struct GNUNET_HashCode hc;
166  size_t klen;
167 
168  klen = strlen (keyword) + 1;
169  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
170  cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
171  if (cnt == NULL)
172  {
173  cnt = GNUNET_malloc (sizeof (struct KeywordCounter) + klen);
174  cnt->value = (const char *) &cnt[1];
175  GNUNET_memcpy (&cnt[1], keyword, klen);
178  &hc, cnt,
180  }
181  cnt->count++;
182  return GNUNET_OK;
183 }
184 
185 
203 static int
204 add_to_meta_counter (void *cls, const char *plugin_name,
205  enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
206  const char *data_mime_type, const char *data, size_t data_len)
207 {
208  struct GNUNET_CONTAINER_MultiHashMap *map = cls;
209  struct GNUNET_HashCode key;
210  struct MetaCounter *cnt;
211 
212  GNUNET_CRYPTO_hash (data, data_len, &key);
213  cnt = GNUNET_CONTAINER_multihashmap_get (map, &key);
214  if (NULL == cnt)
215  {
216  cnt = GNUNET_new (struct MetaCounter);
217  cnt->data = data;
218  cnt->data_size = data_len;
219  cnt->plugin_name = plugin_name;
220  cnt->type = type;
221  cnt->format = format;
225  &key, cnt,
227  }
228  cnt->count++;
229  return 0;
230 }
231 
232 
241 static int
242 remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
243 {
244  struct TrimContext *tc = cls;
245  struct KeywordCounter *counter;
246  struct GNUNET_HashCode hc;
247  size_t klen;
248 
249  klen = strlen (keyword) + 1;
250  GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
252  GNUNET_assert (NULL != counter);
253  if (counter->count < tc->move_threshold)
254  return GNUNET_OK;
256  counter->value);
257  return GNUNET_OK;
258 }
259 
260 
270 static int
271 migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode * key, void *value)
272 {
273  struct TrimContext *tc = cls;
274  struct KeywordCounter *counter = value;
275 
276  if (counter->count >= tc->move_threshold)
277  {
278  if (NULL == tc->pos->ksk_uri)
279  tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1, &counter->value);
280  else
282  }
285  key,
286  counter));
287  GNUNET_free (counter);
288  return GNUNET_YES;
289 }
290 
291 
301 static int
302 migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode * key, void *value)
303 {
304  struct TrimContext *tc = cls;
305  struct MetaCounter *counter = value;
306 
307  if (counter->count >= tc->move_threshold)
308  {
309  if (NULL == tc->pos->meta)
312  counter->plugin_name,
313  counter->type,
314  counter->format,
315  counter->data_mime_type, counter->data,
316  counter->data_size);
317  }
320  key,
321  counter));
322  GNUNET_free (counter);
323  return GNUNET_YES;
324 }
325 
326 
334 static void
336  struct GNUNET_FS_ShareTreeItem *tree)
337 {
338  struct GNUNET_FS_ShareTreeItem *pos;
339  unsigned int num_children;
340 
341  /* first, trim all children */
342  num_children = 0;
343  for (pos = tree->children_head; NULL != pos; pos = pos->next)
344  {
345  share_tree_trim (tc, pos);
346  num_children++;
347  }
348 
349  /* consider adding filename to directory meta data */
350  if (tree->is_directory == GNUNET_YES)
351  {
352  const char *user = getenv ("USER");
353  if ( (user == NULL) ||
354  (0 != strncasecmp (user, tree->short_filename, strlen(user))))
355  {
356  /* only use filename if it doesn't match $USER */
357  if (NULL == tree->meta)
359  GNUNET_CONTAINER_meta_data_insert (tree->meta, "<libgnunetfs>",
362  "text/plain", tree->short_filename,
363  strlen (tree->short_filename) + 1);
364  }
365  }
366 
367  if (1 >= num_children)
368  return; /* nothing to trim */
369 
370  /* now, count keywords and meta data in children */
371  for (pos = tree->children_head; NULL != pos; pos = pos->next)
372  {
373  if (NULL != pos->meta)
375  if (NULL != pos->ksk_uri)
377  }
378 
379  /* calculate threshold for moving keywords / meta data */
380  tc->move_threshold = 1 + (num_children / 2);
381 
382  /* remove high-frequency keywords from children */
383  for (pos = tree->children_head; NULL != pos; pos = pos->next)
384  {
385  tc->pos = pos;
386  if (NULL != pos->ksk_uri)
387  {
388  struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
390  GNUNET_FS_uri_destroy (ksk_uri_copy);
391  }
392  }
393 
394  /* add high-frequency meta data and keywords to parent */
395  tc->pos = tree;
398  tc);
401  tc);
402 }
403 
404 
411 void
413 {
414  struct TrimContext tc;
415 
416  if (toplevel == NULL)
417  return;
420  share_tree_trim (&tc, toplevel);
423 }
424 
425 
431 void
433 {
434  struct GNUNET_FS_ShareTreeItem *pos;
435 
436  while (NULL != (pos = toplevel->children_head))
438  if (NULL != toplevel->parent)
440  toplevel->parent->children_tail,
441  toplevel);
442  if (NULL != toplevel->meta)
444  if (NULL != toplevel->ksk_uri)
445  GNUNET_FS_uri_destroy (toplevel->ksk_uri);
446  GNUNET_free_non_null (toplevel->filename);
448  GNUNET_free (toplevel);
449 }
450 
451 /* end fs_sharetree.c */
452 
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
void GNUNET_FS_share_tree_trim(struct GNUNET_FS_ShareTreeItem *toplevel)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:412
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:107
EXTRACTOR_MetaFormat
Format in which the extracted meta data is presented.
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1146
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:751
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
const char * value
Keyword that was found.
Definition: fs_sharetree.c:53
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:204
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:417
struct GNUNET_FS_ShareTreeItem * parent
This is a doubly-linked tree NULL for top-level entries.
struct GNUNET_FS_ShareTreeItem * children_tail
This is a doubly-linked tree NULL for files and empty directories.
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
0-terminated, UTF-8 encoded string.
#define GNUNET_NO
Definition: gnunet_common.h:81
#define GNUNET_OK
Named constants for return values.
Definition: gnunet_common.h:78
#define GNUNET_free_non_null(ptr)
Free the memory pointed to by ptr if ptr is not NULL.
struct MetaCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:72
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:271
struct GNUNET_CONTAINER_MultiHashMap * metacounter
Map from the hash over the metadata to an &#39;struct MetaCounter *&#39; counter that says how often this met...
Definition: fs_sharetree.c:137
A structure that forms a singly-linked list that serves as a stack for metadata-processing function...
Definition: fs_sharetree.c:122
EXTRACTOR_MetaType
Enumeration defining various sources of keywords.
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match)...
Definition: fs_sharetree.c:113
Entry for each unique keyword to track how often it occured.
Definition: fs_sharetree.c:37
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:988
Internal representation of the hash map.
static struct GNUNET_CONTAINER_MultiPeerMap * map
Handle to the map used to store old latency values for peers.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
struct GNUNET_CONTAINER_MultiHashMap * keywordcounter
Map from the hash over the keyword to an &#39;struct KeywordCounter *&#39; counter that says how often this k...
Definition: fs_sharetree.c:130
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:719
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up...
Definition: fs_sharetree.c:335
A node of a directory tree (produced by dirscanner)
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:302
#define GNUNET_memcpy(dst, src, n)
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:44
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MetaData * GNUNET_CONTAINER_meta_data_create(void)
Create a fresh meta data container.
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:58
int GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
void GNUNET_CONTAINER_meta_data_destroy(struct GNUNET_CONTAINER_MetaData *md)
Free meta data.
static char * plugin_name
Solver plugin name as string.
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:670
A 512-bit hashcode.
int GNUNET_CONTAINER_meta_data_insert(struct GNUNET_CONTAINER_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
char * short_filename
Base name of the file/directory.
There must only be one value per key; storing a value should fail if a value under the same key alrea...
struct GNUNET_HashCode key
The key used in the DHT.
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:82
char * getenv()
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:102
int is_directory
GNUNET_YES if this is a directory
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:161
struct GNUNET_FS_ShareTreeItem * pos
Position we are currently manipulating.
Definition: fs_sharetree.c:142
char * filename
Name of the file/directory.
int GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
struct GNUNET_CONTAINER_MetaData * meta
Metadata for this file or directory.
struct KeywordCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:48
struct KeywordCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:43
void GNUNET_FS_share_tree_free(struct GNUNET_FS_ShareTreeItem *toplevel)
Release memory of a share item tree.
Definition: fs_sharetree.c:432
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:168
int GNUNET_CONTAINER_meta_data_iterate(const struct GNUNET_CONTAINER_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:66
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
size_t data_size
Number of bytes in &#39;data&#39;.
Definition: fs_sharetree.c:97
struct MetaCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:77
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model
#define GNUNET_YES
Definition: gnunet_common.h:80
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:779
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:87
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MulitHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
uint32_t data
The data value.
#define GNUNET_malloc(size)
Wrapper around malloc.
unsigned int move_threshold
Number of times an item has to be found to be moved to the parent.
Definition: fs_sharetree.c:147
#define GNUNET_free(ptr)
Wrapper around free.
const char * data
The actual meta data.
Definition: fs_sharetree.c:92
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:242