GNUnet 0.21.0
gnunet-chk.py
Go to the documentation of this file.
1#!/home/buildbot/bb-worker/sandbox/bin/python
2# This file is part of GNUnet.
3# (C) 2013, 2018 Christian Grothoff (and other contributing authors)
4#
5# GNUnet is free software: you can redistribute it and/or modify it
6# under the terms of the GNU Affero General Public License as published
7# by the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9#
10# GNUnet is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13# Affero General Public License for more details.
14#
15# You should have received a copy of the GNU Affero General Public License
16# along with this program. If not, see <http://www.gnu.org/licenses/>.
17#
18# SPDX-License-Identifier: AGPL3.0-or-later
19#
20# File: gnunet-chk.py
21# Brief: Computes GNUNET style Content Hash Key for a given file
22# Author: Sree Harsha Totakura
23
24from hashlib import sha512
25import logging
26import os
27import getopt
28import sys
29from Crypto.Cipher import AES
30from functools import reduce
31
32# Defaults
33DBLOCK_SIZE = (32 * 1024) # Data block size
34
35# Pick a multiple of 2 here to achieve 8-byte alignment! We also
36# probably want DBlocks to have (roughly) the same size as IBlocks.
37# With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
38# byte = 2 * 512 bits). DO NOT CHANGE!
39CHK_PER_INODE = 256
40
41CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
42
43CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
44
45GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
46
47GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
48
49
51 """Returns an ASCII encoding of the given data block like
53
54 data: A bytearray representing the block of data which has to be encoded
55 """
56 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
57 assert (None != data)
58 assert (bytearray == type(data))
59 size = len(data)
60 assert (0 != size)
61 vbit = 0
62 wpos = 0
63 rpos = 0
64 bits = 0
65 out = ""
66 while (rpos < size) or (vbit > 0):
67 if (rpos < size) and (vbit < 5):
68 bits = (bits << 8) | data[rpos] # eat 8 more bits
69 rpos += 1
70 vbit += 8
71 if (vbit < 5):
72 bits <<= (5 - vbit) # zero-padding
73 assert (vbit == ((size * 8) % 5))
74 vbit = 5
75 out += echart[(bits >> (vbit - 5)) & 31]
76 wpos += 1
77 vbit -= 5
78 assert (0 == vbit)
79 return out
80
81
82def sha512_hash(data):
83 """ Returns the sha512 hash of the given data.
84
85 data: string to hash
86 """
87 hash_obj = sha512()
88 hash_obj.update(data)
89 return hash_obj.digest()
90
91
92class AESKey(object):
93 """Class for AES Keys. Contains the main key and the initialization
94 vector. """
95
96 key = None # The actual AES key
97 iv = None # The initialization vector
98 cipher = None # The cipher object
99 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
100 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
101
102 def __init__(self, passphrase):
103 """Creates a new AES key.
104
105 passphrase: string containing the passphrase to get the AES key and
106 initialization vector
107 """
108 passphrase = bytearray(passphrase)
109 self.key = bytearray(self.KEY_SIZE)
110 self.iv = bytearray(self.IV_SIZE)
111 if (len(passphrase) > self.KEY_SIZE):
112 self.key = passphrase[:self.KEY_SIZE]
113 passphrase = passphrase[self.KEY_SIZE:]
114 if (len(passphrase) > self.IV_SIZE):
115 self.iv = passphrase[:self.IV_SIZE]
116 else:
117 self.iv[0:len(passphrase)] = passphrase
118 else:
119 self.key[0:len(passphrase)] = passphrase
120 self.key = str(self.key)
121 self.iv = str(self.iv)
122 assert (len(self.key) == self.KEY_SIZE)
123 assert (len(self.iv) == self.IV_SIZE)
124
125
126def setup_aes_cipher_(aes_key):
127 """Initializes the AES object with settings similar to those in GNUnet.
128
129 aes_key: the AESKey object
130 Returns the newly initialized AES object
131 """
132 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
133
134
135def aes_pad_(data):
136 """Adds padding to the data such that the size of the data is a multiple of
137 16 bytes
138
139 data: the data string
140 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
141 as padding; data is the new data string with padded bytes at the end
142 """
143 pad_len = len(data) % 16
144 if (0 != pad_len):
145 pad_len = 16 - pad_len
146 pad_bytes = bytearray(15)
147 data += str(pad_bytes[:pad_len])
148 return (pad_len, data)
149
150
151def aes_encrypt(aes_key, data):
152 """Encrypts the given data using AES.
153
154 aes_key: the AESKey object to use for AES encryption
155 data: the data string to encrypt
156 """
157 (pad_len, data) = aes_pad_(data)
158 cipher = setup_aes_cipher_(aes_key)
159 enc_data = cipher.encrypt(data)
160 if (0 != pad_len):
161 enc_data = enc_data[:-pad_len]
162 return enc_data
163
164
165def aes_decrypt(aes_key, data):
166 """Decrypts the given data using AES
167
168 aes_key: the AESKey object to use for AES decryption
169 data: the data string to decrypt
170 """
171 (pad_len, data) = aes_pad_(data)
172 cipher = setup_aes_cipher_(aes_key)
173 ptext = cipher.decrypt(data)
174 if (0 != pad_len):
175 ptext = ptext[:-pad_len]
176 return ptext
177
178
179class Chk(object):
180 """Class for the content hash key."""
181 key = None
182 query = None
183 fsize = None
184
185 def __init__(self, key, query):
186 assert (len(key) == CHK_HASH_SIZE)
187 assert (len(query) == CHK_QUERY_SIZE)
188 self.key = key
189 self.query = query
190
191 def setSize(self, size):
192 self.fsize = size
193
194 def uri(self):
195 sizestr = repr(self.fsize)
196 if isinstance(self.fsize, int):
197 sizestr = sizestr[:-1]
198 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
199 encode_data_to_string(bytearray(self.key)) + "." + \
200 encode_data_to_string(bytearray(self.query)) + "." + \
201 sizestr
202
203
205 """Computes the depth of the hash tree.
206
207 size: the size of the file whose tree's depth has to be computed
208 Returns the depth of the tree. Always > 0.
209 """
210 depth = 1
211 fl = DBLOCK_SIZE
212 while (fl < size):
213 depth += 1
214 if ((fl * CHK_PER_INODE) < fl):
215 return depth
216 fl = fl * CHK_PER_INODE
217 return depth
218
219
221 """Calculate how many bytes of payload a block tree of the given depth MAY
222 correspond to at most (this function ignores the fact that some blocks will
223 only be present partially due to the total file size cutting some blocks
224 off at the end).
225
226 depth: depth of the block. depth==0 is a DBLOCK.
227 Returns the number of bytes of payload a subtree of this depth may
228 correspond to.
229 """
230 rsize = DBLOCK_SIZE
231 for cnt in range(0, depth):
232 rsize *= CHK_PER_INODE
233 return rsize
234
235
236def compute_chk_offset_(depth, end_offset):
237 """Compute the offset of the CHK for the current block in the IBlock
238 above
239
240 depth: depth of the IBlock in the tree (aka overall number of tree levels
241 minus depth); 0 == DBLOCK
242 end_offset: current offset in the overall file, at the *beginning* of the
243 block for DBLOCK (depth == 0), otherwise at the *end* of the
244 block (exclusive)
245 Returns the offset in the list of CHKs in the above IBlock
246 """
247 bds = compute_tree_size_(depth)
248 if (depth > 0):
249 end_offset -= 1
250 ret = end_offset // bds
251 return ret % CHK_PER_INODE
252
253
254def compute_iblock_size_(depth, offset):
255 """Compute the size of the current IBLOCK. The encoder is triggering the
256 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
257 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
258 function is to calculate how long it should be.
259
260 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
261 (this function is for IBLOCKs only!)
262 offset: current offset in the payload (!) of the overall file, must be > 0
263 (since this function is called at the end of a block).
264 Returns the number of elements to be in the corresponding IBlock
265 """
266 assert (depth > 0)
267 assert (offset > 0)
268 bds = compute_tree_size_(depth)
269 mod = offset % bds
270 if mod is 0:
271 ret = CHK_PER_INODE
272 else:
273 bds /= CHK_PER_INODE
274 ret = mod // bds
275 if (mod % bds) is not 0:
276 ret += 1
277 return ret
278
279
280def compute_rootchk(readin, size):
281 """Returns the content hash key after generating the hash tree for the given
282 input stream.
283
284 readin: the stream where to read data from
285 size: the size of data to be read
286 """
287 depth = compute_depth_(size)
288 current_depth = 0
289 chks = [None] * (depth * CHK_PER_INODE) # list buffer
290 read_offset = 0
291 logging.debug("Beginning to calculate tree hash with depth: " + repr(depth))
292 while True:
293 if (depth == current_depth):
294 off = CHK_PER_INODE * (depth - 1)
295 assert (chks[off] is not None)
296 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
297 "' from " + repr(off) + "\n")
298 uri_chk = chks[off]
299 assert (size == read_offset)
300 uri_chk.setSize(size)
301 return uri_chk
302 if (0 == current_depth):
303 pt_size = min(DBLOCK_SIZE, size - read_offset)
304 try:
305 pt_block = readin.read(pt_size)
306 except IOError:
307 logging.warning("Error reading input file stream")
308 return None
309 else:
310 pt_elements = compute_iblock_size_(current_depth, read_offset)
311 pt_block = ""
312 pt_block = \
313 reduce((lambda ba, chk:
314 ba + (chk.key + chk.query)),
315 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
316 pt_block)
317 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
318 assert (len(pt_block) == pt_size)
319 assert (pt_size <= DBLOCK_SIZE)
320 off = compute_chk_offset_(current_depth, read_offset)
321 logging.debug("Encoding data at offset " + repr(read_offset) + \
322 " and depth " + repr(current_depth) + " with block " \
323 "size " + repr(pt_size) + " and target CHK offset " + \
324 repr(current_depth * CHK_PER_INODE))
325 pt_hash = sha512_hash(pt_block)
326 pt_aes_key = AESKey(pt_hash)
327 pt_enc = aes_encrypt(pt_aes_key, pt_block)
328 pt_enc_hash = sha512_hash(pt_enc)
329 chk = Chk(pt_hash, pt_enc_hash)
330 chks[(current_depth * CHK_PER_INODE) + off] = chk
331 if (0 == current_depth):
332 read_offset += pt_size
333 if (read_offset == size) or \
334 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
335 current_depth += 1
336 else:
337 if (CHK_PER_INODE == off) or (read_offset == size):
338 current_depth += 1
339 else:
340 current_depth = 0
341
342
344 """Returns the CHK URI of the file at the given path.
345
346 path: the path of the file whose CHK has to be calculated
347 """
348 size = os.path.getsize(path)
349 readin = open(path, "rb")
350 chk = compute_rootchk(readin, size)
351 readin.close()
352 return chk.uri()
353
354
355def usage():
356 """Prints help about using this script."""
357 print(
358 """
359Usage: gnunet-chk.py [options] file
360Prints the Content Hash Key of given file in GNUNET-style URI.
361
362Options:
363 -h, --help : prints this message
364"""
365 )
366
367
368if '__main__' == __name__:
369 try:
370 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
371 except getopt.GetoptError as err:
372 print(err)
373 print("Exception occurred")
374 usage()
375 sys.exit(2)
376 for option, value in opts:
377 if option in ("-h", "--help"):
378 usage()
379 sys.exit(0)
380 if len(args) != 1:
381 print("Incorrect number of arguments passed")
382 usage()
383 sys.exit(1)
384 print(chkuri_from_path(args[0]))
def __init__(self, passphrase)
Definition: gnunet-chk.py:102
def setSize(self, size)
Definition: gnunet-chk.py:191
def uri(self)
Definition: gnunet-chk.py:194
def __init__(self, key, query)
Definition: gnunet-chk.py:185
static uint32_t type
Type string converted to DNS type value.
char * GNUNET_STRINGS_data_to_string(const void *data, size_t size, char *out, size_t out_size)
Convert binary data to ASCII encoding using CrockfordBase32.
Definition: strings.c:709
#define min(x, y)
def compute_tree_size_(depth)
Definition: gnunet-chk.py:220
def aes_encrypt(aes_key, data)
Definition: gnunet-chk.py:151
def compute_iblock_size_(depth, offset)
Definition: gnunet-chk.py:254
def setup_aes_cipher_(aes_key)
Definition: gnunet-chk.py:126
def chkuri_from_path(path)
Definition: gnunet-chk.py:343
def usage()
Definition: gnunet-chk.py:355
def aes_pad_(data)
Definition: gnunet-chk.py:135
def sha512_hash(data)
Definition: gnunet-chk.py:82
def encode_data_to_string(data)
Definition: gnunet-chk.py:50
def compute_chk_offset_(depth, end_offset)
Definition: gnunet-chk.py:236
def compute_rootchk(readin, size)
Definition: gnunet-chk.py:280
def aes_decrypt(aes_key, data)
Definition: gnunet-chk.py:165
def compute_depth_(size)
Definition: gnunet-chk.py:204