GNUnet 0.22.0
fs_sharetree.c
Go to the documentation of this file.
1/*
2 This file is part of GNUnet
3 Copyright (C) 2005-2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20
27#include "platform.h"
28
29#include "gnunet_fs_service.h"
31#include <pthread.h>
32
33
39{
44
49
53 const char *value;
54
58 unsigned int count;
59};
60
61
66{
71
76
80 const char *plugin_name;
81
85 const char *data_mime_type;
86
90 const char *data;
91
95 size_t data_size;
96
100 enum EXTRACTOR_MetaType type;
101
105 enum EXTRACTOR_MetaFormat format;
106
111 unsigned int count;
112};
113
114
120{
127
134
139
143 unsigned int move_threshold;
144};
145
146
155static int
156add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
157{
158 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
159 struct KeywordCounter *cnt;
160 struct GNUNET_HashCode hc;
161 size_t klen;
162
163 klen = strlen (keyword) + 1;
164 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
165 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
166 if (cnt == NULL)
167 {
168 cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
169 cnt->value = (const char *) &cnt[1];
170 GNUNET_memcpy (&cnt[1], keyword, klen);
173 &hc, cnt,
175 }
176 cnt->count++;
177 return GNUNET_OK;
178}
179
180
198static int
199add_to_meta_counter (void *cls, const char *plugin_name,
200 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat
201 format,
202 const char *data_mime_type, const char *data, size_t
203 data_len)
204{
206 struct GNUNET_HashCode key;
207 struct MetaCounter *cnt;
208
209 GNUNET_CRYPTO_hash (data, data_len, &key);
211 if (NULL == cnt)
212 {
213 cnt = GNUNET_new (struct MetaCounter);
214 cnt->data = data;
215 cnt->data_size = data_len;
217 cnt->type = type;
218 cnt->format = format;
222 &key, cnt,
224 }
225 cnt->count++;
226 return 0;
227}
228
229
238static int
239remove_high_frequency_keywords (void *cls, const char *keyword, int
240 is_mandatory)
241{
242 struct TrimContext *tc = cls;
243 struct KeywordCounter *counter;
244 struct GNUNET_HashCode hc;
245 size_t klen;
246
247 klen = strlen (keyword) + 1;
248 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
249 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
250 GNUNET_assert (NULL != counter);
251 if (counter->count < tc->move_threshold)
252 return GNUNET_OK;
254 counter->value);
255 return GNUNET_OK;
256}
257
258
268static int
270 void *value)
271{
272 struct TrimContext *tc = cls;
273 struct KeywordCounter *counter = value;
274
275 if (counter->count >= tc->move_threshold)
276 {
277 if (NULL == tc->pos->ksk_uri)
278 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
279 &counter->value);
280 else
281 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
282 GNUNET_NO);
283 }
286 key,
287 counter));
288 GNUNET_free (counter);
289 return GNUNET_YES;
290}
291
292
302static int
304 void *value)
305{
306 struct TrimContext *tc = cls;
307 struct MetaCounter *counter = value;
308
309 if (counter->count >= tc->move_threshold)
310 {
311 if (NULL == tc->pos->meta)
312 tc->pos->meta = GNUNET_FS_meta_data_create ();
313 GNUNET_FS_meta_data_insert (tc->pos->meta,
314 counter->plugin_name,
315 counter->type,
316 counter->format,
317 counter->data_mime_type, counter->data,
318 counter->data_size);
319 }
322 key,
323 counter));
324 GNUNET_free (counter);
325 return GNUNET_YES;
326}
327
328
336static void
338 struct GNUNET_FS_ShareTreeItem *tree)
339{
340 struct GNUNET_FS_ShareTreeItem *pos;
341 unsigned int num_children;
342
343 /* first, trim all children */
344 num_children = 0;
345 for (pos = tree->children_head; NULL != pos; pos = pos->next)
346 {
347 share_tree_trim (tc, pos);
348 num_children++;
349 }
350
351 /* consider adding filename to directory meta data */
352 if (tree->is_directory == GNUNET_YES)
353 {
354 const char *user = getenv ("USER");
355 if ((user == NULL) ||
356 (0 != strncasecmp (user, tree->short_filename, strlen (user))))
357 {
358 /* only use filename if it doesn't match $USER */
359 if (NULL == tree->meta)
361 GNUNET_FS_meta_data_insert (tree->meta, "<libgnunetfs>",
363 EXTRACTOR_METAFORMAT_UTF8,
364 "text/plain", tree->short_filename,
365 strlen (tree->short_filename) + 1);
366 }
367 }
368
369 if (1 >= num_children)
370 return; /* nothing to trim */
371
372 /* now, count keywords and meta data in children */
373 for (pos = tree->children_head; NULL != pos; pos = pos->next)
374 {
375 if (NULL != pos->meta)
377 tc->metacounter);
378 if (NULL != pos->ksk_uri)
380 tc->keywordcounter);
381 }
382
383 /* calculate threshold for moving keywords / meta data */
384 tc->move_threshold = 1 + (num_children / 2);
385
386 /* remove high-frequency keywords from children */
387 for (pos = tree->children_head; NULL != pos; pos = pos->next)
388 {
389 tc->pos = pos;
390 if (NULL != pos->ksk_uri)
391 {
392 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
393 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
395 GNUNET_FS_uri_destroy (ksk_uri_copy);
396 }
397 }
398
399 /* add high-frequency meta data and keywords to parent */
400 tc->pos = tree;
403 tc);
406 tc);
407}
408
409
416void
418{
419 struct TrimContext tc;
420
421 if (toplevel == NULL)
422 return;
423 tc.keywordcounter = GNUNET_CONTAINER_multihashmap_create (1024, GNUNET_NO);
425 share_tree_trim (&tc, toplevel);
428}
429
430
436void
438{
439 struct GNUNET_FS_ShareTreeItem *pos;
440
441 while (NULL != (pos = toplevel->children_head))
443 if (NULL != toplevel->parent)
445 toplevel->parent->children_tail,
446 toplevel);
447 if (NULL != toplevel->meta)
449 if (NULL != toplevel->ksk_uri)
450 GNUNET_FS_uri_destroy (toplevel->ksk_uri);
451 GNUNET_free (toplevel->filename);
452 GNUNET_free (toplevel->short_filename);
453 GNUNET_free (toplevel);
454}
455
456
457/* end fs_sharetree.c */
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:156
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:199
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:269
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up.
Definition: fs_sharetree.c:337
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:303
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:239
char * getenv()
static char * plugin_name
Name of our plugin.
static char * data
The data to insert into the dht.
struct GNUNET_HashCode key
The key used in the DHT.
static char * value
Value of the record to add/remove.
static uint32_t type
Type string converted to DNS type value.
API for file sharing via GNUnet.
API to schedule computations using continuation passing style.
#define GNUNET_CONTAINER_DLL_remove(head, tail, element)
Remove an element from a DLL.
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:987
#define EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME
void GNUNET_FS_share_tree_free(struct GNUNET_FS_ShareTreeItem *toplevel)
Release memory of a share item tree.
Definition: fs_sharetree.c:437
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:780
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1144
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:720
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:677
void GNUNET_FS_share_tree_trim(struct GNUNET_FS_ShareTreeItem *toplevel)
Process a share item tree, moving frequent keywords up and copying frequent metadata up.
Definition: fs_sharetree.c:417
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:752
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
void GNUNET_CONTAINER_multihashmap_destroy(struct GNUNET_CONTAINER_MultiHashMap *map)
Destroy a hash map.
struct GNUNET_CONTAINER_MultiHashMap * GNUNET_CONTAINER_multihashmap_create(unsigned int len, int do_not_copy_keys)
Create a multi hash map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY
There must only be one value per key; storing a value should fail if a value under the same key alrea...
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
@ GNUNET_YES
@ GNUNET_NO
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
#define GNUNET_malloc(size)
Wrapper around malloc.
#define GNUNET_free(ptr)
Wrapper around free.
int GNUNET_FS_meta_data_iterate(const struct GNUNET_FS_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Definition: meta_data.c:418
struct GNUNET_FS_MetaData * GNUNET_FS_meta_data_create(void)
Create a fresh meta data container.
Definition: meta_data.c:127
int GNUNET_FS_meta_data_insert(struct GNUNET_FS_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
Definition: meta_data.c:254
void GNUNET_FS_meta_data_destroy(struct GNUNET_FS_MetaData *md)
Free meta data.
Definition: meta_data.c:166
static struct GNUNET_CONTAINER_MultiPeerMap * map
Peermap of PeerIdentities to "struct PeerEntry" (for fast lookup).
Definition: peer.c:63
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:430
Internal representation of the hash map.
A node of a directory tree (produced by dirscanner)
int is_directory
GNUNET_YES if this is a directory
struct GNUNET_FS_ShareTreeItem * parent
This is a doubly-linked tree NULL for top-level entries.
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
char * short_filename
Base name of the file/directory.
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
struct GNUNET_FS_MetaData * meta
Metadata for this file or directory.
char * filename
Name of the file/directory.
struct GNUNET_FS_ShareTreeItem * children_tail
This is a doubly-linked tree NULL for files and empty directories.
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:167
A 512-bit hashcode.
Entry for each unique keyword to track how often it occurred.
Definition: fs_sharetree.c:39
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:58
const char * value
Keyword that was found.
Definition: fs_sharetree.c:53
struct KeywordCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:48
struct KeywordCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:43
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:66
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:105
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:100
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:80
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match).
Definition: fs_sharetree.c:111
struct MetaCounter * next
This is a doubly-linked list.
Definition: fs_sharetree.c:75
size_t data_size
Number of bytes in 'data'.
Definition: fs_sharetree.c:95
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:85
const char * data
The actual meta data.
Definition: fs_sharetree.c:90
struct MetaCounter * prev
This is a doubly-linked list.
Definition: fs_sharetree.c:70
A structure that forms a singly-linked list that serves as a stack for metadata-processing function.
Definition: fs_sharetree.c:120
struct GNUNET_FS_ShareTreeItem * pos
Position we are currently manipulating.
Definition: fs_sharetree.c:138
unsigned int move_threshold
Number of times an item has to be found to be moved to the parent.
Definition: fs_sharetree.c:143
struct GNUNET_CONTAINER_MultiHashMap * keywordcounter
Map from the hash over the keyword to an 'struct KeywordCounter *' counter that says how often this k...
Definition: fs_sharetree.c:126
struct GNUNET_CONTAINER_MultiHashMap * metacounter
Map from the hash over the metadata to an 'struct MetaCounter *' counter that says how often this met...
Definition: fs_sharetree.c:133