GNUnet 0.22.0
fs_sharetree.c File Reference

code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree More...

#include "platform.h"
#include "gnunet_fs_service.h"
#include "gnunet_scheduler_lib.h"
#include <pthread.h>
Include dependency graph for fs_sharetree.c:

Go to the source code of this file.

Data Structures

struct  KeywordCounter
 Entry for each unique keyword to track how often it occurred. More...
 
struct  MetaCounter
 Aggregate information we keep for meta data in each directory. More...
 
struct  TrimContext
 A structure that forms a singly-linked list that serves as a stack for metadata-processing function. More...
 

Functions

static int add_to_keyword_counter (void *cls, const char *keyword, int is_mandatory)
 Add the given keyword to the keyword statistics tracker. More...
 
static int add_to_meta_counter (void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
 Function called on each meta data item. More...
 
static int remove_high_frequency_keywords (void *cls, const char *keyword, int is_mandatory)
 Remove keywords above the threshold. More...
 
static int migrate_and_drop_keywords (void *cls, const struct GNUNET_HashCode *key, void *value)
 Move "frequent" keywords over to the target ksk uri, free the counters. More...
 
static int migrate_and_drop_metadata (void *cls, const struct GNUNET_HashCode *key, void *value)
 Copy "frequent" metadata items over to the target metadata container, free the counters. More...
 
static void share_tree_trim (struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
 Process a share item tree, moving frequent keywords up and copying frequent metadata up. More...
 
void GNUNET_FS_share_tree_trim (struct GNUNET_FS_ShareTreeItem *toplevel)
 Process a share item tree, moving frequent keywords up and copying frequent metadata up. More...
 
void GNUNET_FS_share_tree_free (struct GNUNET_FS_ShareTreeItem *toplevel)
 Release memory of a share item tree. More...
 

Detailed Description

code to manipulate the 'struct GNUNET_FS_ShareTreeItem' tree

Author
LRN
Christian Grothoff

Definition in file fs_sharetree.c.

Function Documentation

◆ add_to_keyword_counter()

static int add_to_keyword_counter ( void *  cls,
const char *  keyword,
int  is_mandatory 
)
static

Add the given keyword to the keyword statistics tracker.

Parameters
clsthe multihashmap we store the keyword counters in
keywordthe keyword to count
is_mandatoryignored
Returns
always GNUNET_OK

Definition at line 156 of file fs_sharetree.c.

157{
158 struct GNUNET_CONTAINER_MultiHashMap *mcm = cls;
159 struct KeywordCounter *cnt;
160 struct GNUNET_HashCode hc;
161 size_t klen;
162
163 klen = strlen (keyword) + 1;
164 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
165 cnt = GNUNET_CONTAINER_multihashmap_get (mcm, &hc);
166 if (cnt == NULL)
167 {
168 cnt = GNUNET_malloc (sizeof(struct KeywordCounter) + klen);
169 cnt->value = (const char *) &cnt[1];
170 GNUNET_memcpy (&cnt[1], keyword, klen);
173 &hc, cnt,
175 }
176 cnt->count++;
177 return GNUNET_OK;
178}
void GNUNET_CRYPTO_hash(const void *block, size_t size, struct GNUNET_HashCode *ret)
Compute hash of a given block.
Definition: crypto_hash.c:41
void * GNUNET_CONTAINER_multihashmap_get(const struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key)
Given a key find a value in the map matching the key.
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_put(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, void *value, enum GNUNET_CONTAINER_MultiHashMapOption opt)
Store a key-value pair in the map.
@ GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY
There must only be one value per key; storing a value should fail if a value under the same key alrea...
#define GNUNET_memcpy(dst, src, n)
Call memcpy() but check for n being 0 first.
@ GNUNET_OK
#define GNUNET_assert(cond)
Use this for fatal errors that cannot be handled.
#define GNUNET_malloc(size)
Wrapper around malloc.
Internal representation of the hash map.
A 512-bit hashcode.
Entry for each unique keyword to track how often it occurred.
Definition: fs_sharetree.c:39
unsigned int count
How many files have this keyword?
Definition: fs_sharetree.c:58
const char * value
Keyword that was found.
Definition: fs_sharetree.c:53

References KeywordCounter::count, GNUNET_assert, GNUNET_CONTAINER_multihashmap_get(), GNUNET_CONTAINER_multihashmap_put(), GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY, GNUNET_CRYPTO_hash(), GNUNET_malloc, GNUNET_memcpy, GNUNET_OK, and KeywordCounter::value.

Referenced by share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ add_to_meta_counter()

static int add_to_meta_counter ( void *  cls,
const char *  plugin_name,
enum EXTRACTOR_MetaType  type,
enum EXTRACTOR_MetaFormat  format,
const char *  data_mime_type,
const char *  data,
size_t  data_len 
)
static

Function called on each meta data item.

Increments the respective counter.

Parameters
clsthe container multihashmap to update
plugin_namename of the plugin that produced this value; special values can be used (e.g. '<zlib>' for zlib being used in the main libextractor library and yielding meta data).
typelibextractor-type describing the meta data
formatbasic format information about data
data_mime_typemime-type of data (not of the original file); can be NULL (if mime-type is not known)
dataactual meta-data found
data_lennumber of bytes in data
Returns
0 to continue extracting / iterating

Definition at line 199 of file fs_sharetree.c.

204{
206 struct GNUNET_HashCode key;
207 struct MetaCounter *cnt;
208
209 GNUNET_CRYPTO_hash (data, data_len, &key);
211 if (NULL == cnt)
212 {
213 cnt = GNUNET_new (struct MetaCounter);
214 cnt->data = data;
215 cnt->data_size = data_len;
217 cnt->type = type;
218 cnt->format = format;
222 &key, cnt,
224 }
225 cnt->count++;
226 return 0;
227}
static char * plugin_name
Name of our plugin.
static char * data
The data to insert into the dht.
struct GNUNET_HashCode key
The key used in the DHT.
static uint32_t type
Type string converted to DNS type value.
#define GNUNET_new(type)
Allocate a struct or union of the given type.
static struct GNUNET_CONTAINER_MultiPeerMap * map
Peermap of PeerIdentities to "struct PeerEntry" (for fast lookup).
Definition: peer.c:63
Aggregate information we keep for meta data in each directory.
Definition: fs_sharetree.c:66
enum EXTRACTOR_MetaFormat format
Format of the data.
Definition: fs_sharetree.c:105
enum EXTRACTOR_MetaType type
Type of the data.
Definition: fs_sharetree.c:100
const char * plugin_name
Name of the plugin that provided that piece of metadata.
Definition: fs_sharetree.c:80
unsigned int count
How many files have meta entries matching this value? (type and format do not have to match).
Definition: fs_sharetree.c:111
size_t data_size
Number of bytes in 'data'.
Definition: fs_sharetree.c:95
const char * data_mime_type
MIME-type of the metadata itself.
Definition: fs_sharetree.c:85
const char * data
The actual meta data.
Definition: fs_sharetree.c:90

References MetaCounter::count, data, MetaCounter::data, MetaCounter::data_mime_type, MetaCounter::data_size, MetaCounter::format, GNUNET_assert, GNUNET_CONTAINER_multihashmap_get(), GNUNET_CONTAINER_multihashmap_put(), GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY, GNUNET_CRYPTO_hash(), GNUNET_new, GNUNET_OK, key, map, plugin_name, MetaCounter::plugin_name, type, and MetaCounter::type.

Referenced by share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ remove_high_frequency_keywords()

static int remove_high_frequency_keywords ( void *  cls,
const char *  keyword,
int  is_mandatory 
)
static

Remove keywords above the threshold.

Parameters
clsthe 'struct TrimContext' with the pos to remove the keywords from
keywordthe keyword to check
is_mandatoryignored
Returns
always GNUNET_OK

Definition at line 239 of file fs_sharetree.c.

241{
242 struct TrimContext *tc = cls;
243 struct KeywordCounter *counter;
244 struct GNUNET_HashCode hc;
245 size_t klen;
246
247 klen = strlen (keyword) + 1;
248 GNUNET_CRYPTO_hash (keyword, klen - 1, &hc);
249 counter = GNUNET_CONTAINER_multihashmap_get (tc->keywordcounter, &hc);
250 GNUNET_assert (NULL != counter);
251 if (counter->count < tc->move_threshold)
252 return GNUNET_OK;
254 counter->value);
255 return GNUNET_OK;
256}
void GNUNET_FS_uri_ksk_remove_keyword(struct GNUNET_FS_Uri *uri, const char *keyword)
Remove the given keyword from the set of keywords represented by the URI.
Definition: fs_uri.c:780
static struct GNUNET_SCHEDULER_TaskContext tc
Task context of the current task.
Definition: scheduler.c:430
A structure that forms a singly-linked list that serves as a stack for metadata-processing function.
Definition: fs_sharetree.c:120

References KeywordCounter::count, GNUNET_assert, GNUNET_CONTAINER_multihashmap_get(), GNUNET_CRYPTO_hash(), GNUNET_FS_uri_ksk_remove_keyword(), GNUNET_OK, tc, and KeywordCounter::value.

Referenced by share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ migrate_and_drop_keywords()

static int migrate_and_drop_keywords ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Move "frequent" keywords over to the target ksk uri, free the counters.

Parameters
clsthe 'struct TrimContext'
keykey of the entry
valuethe 'struct KeywordCounter'
Returns
GNUNET_YES (always)

Definition at line 269 of file fs_sharetree.c.

271{
272 struct TrimContext *tc = cls;
273 struct KeywordCounter *counter = value;
274
275 if (counter->count >= tc->move_threshold)
276 {
277 if (NULL == tc->pos->ksk_uri)
278 tc->pos->ksk_uri = GNUNET_FS_uri_ksk_create_from_args (1,
279 &counter->value);
280 else
281 GNUNET_FS_uri_ksk_add_keyword (tc->pos->ksk_uri, counter->value,
282 GNUNET_NO);
283 }
286 key,
287 counter));
288 GNUNET_free (counter);
289 return GNUNET_YES;
290}
static char * value
Value of the record to add/remove.
struct GNUNET_FS_Uri * GNUNET_FS_uri_ksk_create_from_args(unsigned int argc, const char **argv)
Create an FS URI from a user-supplied command line of keywords.
Definition: fs_uri.c:1144
void GNUNET_FS_uri_ksk_add_keyword(struct GNUNET_FS_Uri *uri, const char *keyword, int is_mandatory)
Add the given keyword to the set of keywords represented by the URI.
Definition: fs_uri.c:752
enum GNUNET_GenericReturnValue GNUNET_CONTAINER_multihashmap_remove(struct GNUNET_CONTAINER_MultiHashMap *map, const struct GNUNET_HashCode *key, const void *value)
Remove the given key-value pair from the map.
@ GNUNET_YES
@ GNUNET_NO
#define GNUNET_free(ptr)
Wrapper around free.

References KeywordCounter::count, GNUNET_assert, GNUNET_CONTAINER_multihashmap_remove(), GNUNET_free, GNUNET_FS_uri_ksk_add_keyword(), GNUNET_FS_uri_ksk_create_from_args(), GNUNET_NO, GNUNET_YES, key, tc, value, and KeywordCounter::value.

Referenced by share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ migrate_and_drop_metadata()

static int migrate_and_drop_metadata ( void *  cls,
const struct GNUNET_HashCode key,
void *  value 
)
static

Copy "frequent" metadata items over to the target metadata container, free the counters.

Parameters
clsthe 'struct TrimContext'
keykey of the entry
valuethe 'struct KeywordCounter'
Returns
GNUNET_YES (always)

Definition at line 303 of file fs_sharetree.c.

305{
306 struct TrimContext *tc = cls;
307 struct MetaCounter *counter = value;
308
309 if (counter->count >= tc->move_threshold)
310 {
311 if (NULL == tc->pos->meta)
312 tc->pos->meta = GNUNET_FS_meta_data_create ();
313 GNUNET_FS_meta_data_insert (tc->pos->meta,
314 counter->plugin_name,
315 counter->type,
316 counter->format,
317 counter->data_mime_type, counter->data,
318 counter->data_size);
319 }
322 key,
323 counter));
324 GNUNET_free (counter);
325 return GNUNET_YES;
326}
struct GNUNET_FS_MetaData * GNUNET_FS_meta_data_create(void)
Create a fresh meta data container.
Definition: meta_data.c:127
int GNUNET_FS_meta_data_insert(struct GNUNET_FS_MetaData *md, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_size)
Extend metadata.
Definition: meta_data.c:254

References MetaCounter::count, MetaCounter::data, MetaCounter::data_mime_type, MetaCounter::data_size, MetaCounter::format, GNUNET_assert, GNUNET_CONTAINER_multihashmap_remove(), GNUNET_free, GNUNET_FS_meta_data_create(), GNUNET_FS_meta_data_insert(), GNUNET_YES, key, MetaCounter::plugin_name, tc, MetaCounter::type, and value.

Referenced by share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ share_tree_trim()

static void share_tree_trim ( struct TrimContext tc,
struct GNUNET_FS_ShareTreeItem tree 
)
static

Process a share item tree, moving frequent keywords up and copying frequent metadata up.

Parameters
tctrim context with hash maps to use
treetree to trim

Definition at line 337 of file fs_sharetree.c.

339{
340 struct GNUNET_FS_ShareTreeItem *pos;
341 unsigned int num_children;
342
343 /* first, trim all children */
344 num_children = 0;
345 for (pos = tree->children_head; NULL != pos; pos = pos->next)
346 {
347 share_tree_trim (tc, pos);
348 num_children++;
349 }
350
351 /* consider adding filename to directory meta data */
352 if (tree->is_directory == GNUNET_YES)
353 {
354 const char *user = getenv ("USER");
355 if ((user == NULL) ||
356 (0 != strncasecmp (user, tree->short_filename, strlen (user))))
357 {
358 /* only use filename if it doesn't match $USER */
359 if (NULL == tree->meta)
361 GNUNET_FS_meta_data_insert (tree->meta, "<libgnunetfs>",
363 EXTRACTOR_METAFORMAT_UTF8,
364 "text/plain", tree->short_filename,
365 strlen (tree->short_filename) + 1);
366 }
367 }
368
369 if (1 >= num_children)
370 return; /* nothing to trim */
371
372 /* now, count keywords and meta data in children */
373 for (pos = tree->children_head; NULL != pos; pos = pos->next)
374 {
375 if (NULL != pos->meta)
377 tc->metacounter);
378 if (NULL != pos->ksk_uri)
380 tc->keywordcounter);
381 }
382
383 /* calculate threshold for moving keywords / meta data */
384 tc->move_threshold = 1 + (num_children / 2);
385
386 /* remove high-frequency keywords from children */
387 for (pos = tree->children_head; NULL != pos; pos = pos->next)
388 {
389 tc->pos = pos;
390 if (NULL != pos->ksk_uri)
391 {
392 struct GNUNET_FS_Uri *ksk_uri_copy = GNUNET_FS_uri_dup (pos->ksk_uri);
393 GNUNET_FS_uri_ksk_get_keywords (ksk_uri_copy,
395 GNUNET_FS_uri_destroy (ksk_uri_copy);
396 }
397 }
398
399 /* add high-frequency meta data and keywords to parent */
400 tc->pos = tree;
403 tc);
406 tc);
407}
static int add_to_keyword_counter(void *cls, const char *keyword, int is_mandatory)
Add the given keyword to the keyword statistics tracker.
Definition: fs_sharetree.c:156
static int add_to_meta_counter(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Function called on each meta data item.
Definition: fs_sharetree.c:199
static int migrate_and_drop_keywords(void *cls, const struct GNUNET_HashCode *key, void *value)
Move "frequent" keywords over to the target ksk uri, free the counters.
Definition: fs_sharetree.c:269
static void share_tree_trim(struct TrimContext *tc, struct GNUNET_FS_ShareTreeItem *tree)
Process a share item tree, moving frequent keywords up and copying frequent metadata up.
Definition: fs_sharetree.c:337
static int migrate_and_drop_metadata(void *cls, const struct GNUNET_HashCode *key, void *value)
Copy "frequent" metadata items over to the target metadata container, free the counters.
Definition: fs_sharetree.c:303
static int remove_high_frequency_keywords(void *cls, const char *keyword, int is_mandatory)
Remove keywords above the threshold.
Definition: fs_sharetree.c:239
char * getenv()
struct GNUNET_FS_Uri * GNUNET_FS_uri_dup(const struct GNUNET_FS_Uri *uri)
Duplicate URI.
Definition: fs_uri.c:987
#define EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME
int GNUNET_FS_uri_ksk_get_keywords(const struct GNUNET_FS_Uri *uri, GNUNET_FS_KeywordIterator iterator, void *iterator_cls)
Iterate over all keywords in this keyword URI.
Definition: fs_uri.c:720
void GNUNET_FS_uri_destroy(struct GNUNET_FS_Uri *uri)
Free URI.
Definition: fs_uri.c:677
int GNUNET_CONTAINER_multihashmap_iterate(struct GNUNET_CONTAINER_MultiHashMap *map, GNUNET_CONTAINER_MultiHashMapIteratorCallback it, void *it_cls)
Iterate over all entries in the map.
int GNUNET_FS_meta_data_iterate(const struct GNUNET_FS_MetaData *md, EXTRACTOR_MetaDataProcessor iter, void *iter_cls)
Iterate over MD entries.
Definition: meta_data.c:418
A node of a directory tree (produced by dirscanner)
int is_directory
GNUNET_YES if this is a directory
struct GNUNET_FS_Uri * ksk_uri
Keywords for this file or directory (derived from metadata).
char * short_filename
Base name of the file/directory.
struct GNUNET_FS_ShareTreeItem * next
This is a doubly-linked list.
struct GNUNET_FS_ShareTreeItem * children_head
This is a doubly-linked tree NULL for files and empty directories.
struct GNUNET_FS_MetaData * meta
Metadata for this file or directory.
A Universal Resource Identifier (URI), opaque.
Definition: fs_api.h:167

References add_to_keyword_counter(), add_to_meta_counter(), GNUNET_FS_ShareTreeItem::children_head, EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME, getenv(), GNUNET_CONTAINER_multihashmap_iterate(), GNUNET_FS_meta_data_create(), GNUNET_FS_meta_data_insert(), GNUNET_FS_meta_data_iterate(), GNUNET_FS_uri_destroy(), GNUNET_FS_uri_dup(), GNUNET_FS_uri_ksk_get_keywords(), GNUNET_YES, GNUNET_FS_ShareTreeItem::is_directory, GNUNET_FS_ShareTreeItem::ksk_uri, GNUNET_FS_ShareTreeItem::meta, migrate_and_drop_keywords(), migrate_and_drop_metadata(), GNUNET_FS_ShareTreeItem::next, remove_high_frequency_keywords(), share_tree_trim(), GNUNET_FS_ShareTreeItem::short_filename, and tc.

Referenced by GNUNET_FS_share_tree_trim(), and share_tree_trim().

Here is the call graph for this function:
Here is the caller graph for this function: