GNUnet  0.11.x
gnunet-chk.py
Go to the documentation of this file.
1 #!/home/buildbot/bb-worker/sandbox/bin/python3
2 # This file is part of GNUnet.
3 # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
4 #
5 # GNUnet is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published
7 # by the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Affero General Public License for more details.
14 #
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18 # SPDX-License-Identifier: AGPL3.0-or-later
19 #
20 # File: gnunet-chk.py
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
23 
24 from hashlib import sha512
25 import logging
26 import os
27 import getopt
28 import sys
29 from Crypto.Cipher import AES
30 from functools import reduce
31 
32 # Defaults
33 DBLOCK_SIZE = (32 * 1024) # Data block size
34 
35 # Pick a multiple of 2 here to achieve 8-byte alignment! We also
36 # probably want DBlocks to have (roughly) the same size as IBlocks.
37 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
38 # byte = 2 * 512 bits). DO NOT CHANGE!
39 CHK_PER_INODE = 256
40 
41 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
42 
43 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
44 
45 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
46 
47 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
48 
49 
51  """Returns an ASCII encoding of the given data block like
52  GNUNET_STRINGS_data_to_string() function.
53 
54  data: A bytearray representing the block of data which has to be encoded
55  """
56  echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
57  assert (None != data)
58  assert (bytearray == type(data))
59  size = len(data)
60  assert (0 != size)
61  vbit = 0
62  wpos = 0
63  rpos = 0
64  bits = 0
65  out = ""
66  while (rpos < size) or (vbit > 0):
67  if (rpos < size) and (vbit < 5):
68  bits = (bits << 8) | data[rpos] # eat 8 more bits
69  rpos += 1
70  vbit += 8
71  if (vbit < 5):
72  bits <<= (5 - vbit) # zero-padding
73  assert (vbit == ((size * 8) % 5))
74  vbit = 5
75  out += echart[(bits >> (vbit - 5)) & 31]
76  wpos += 1
77  vbit -= 5
78  assert (0 == vbit)
79  return out
80 
81 
82 def sha512_hash(data):
83  """ Returns the sha512 hash of the given data.
84 
85  data: string to hash
86  """
87  hash_obj = sha512()
88  hash_obj.update(data)
89  return hash_obj.digest()
90 
91 
92 class AESKey(object):
93  """Class for AES Keys. Contains the main key and the initialization
94  vector. """
95 
96  key = None # The actual AES key
97  iv = None # The initialization vector
98  cipher = None # The cipher object
99  KEY_SIZE = 32 # AES 256-bit key = 32 bytes
100  IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
101 
102  def __init__(self, passphrase):
103  """Creates a new AES key.
104 
105  passphrase: string containing the passphrase to get the AES key and
106  initialization vector
107  """
108  passphrase = bytearray(passphrase)
109  self.key = bytearray(self.KEY_SIZE)
110  self.iv = bytearray(self.IV_SIZE)
111  if (len(passphrase) > self.KEY_SIZE):
112  self.key = passphrase[:self.KEY_SIZE]
113  passphrase = passphrase[self.KEY_SIZE:]
114  if (len(passphrase) > self.IV_SIZE):
115  self.iv = passphrase[:self.IV_SIZE]
116  else:
117  self.iv[0:len(passphrase)] = passphrase
118  else:
119  self.key[0:len(passphrase)] = passphrase
120  self.key = str(self.key)
121  self.iv = str(self.iv)
122  assert (len(self.key) == self.KEY_SIZE)
123  assert (len(self.iv) == self.IV_SIZE)
124 
125 
126 def setup_aes_cipher_(aes_key):
127  """Initializes the AES object with settings similar to those in GNUnet.
128 
129  aes_key: the AESKey object
130  Returns the newly initialized AES object
131  """
132  return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
133 
134 
135 def aes_pad_(data):
136  """Adds padding to the data such that the size of the data is a multiple of
137  16 bytes
138 
139  data: the data string
140  Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
141  as padding; data is the new data string with padded bytes at the end
142  """
143  pad_len = len(data) % 16
144  if (0 != pad_len):
145  pad_len = 16 - pad_len
146  pad_bytes = bytearray(15)
147  data += str(pad_bytes[:pad_len])
148  return (pad_len, data)
149 
150 
151 def aes_encrypt(aes_key, data):
152  """Encrypts the given data using AES.
153 
154  aes_key: the AESKey object to use for AES encryption
155  data: the data string to encrypt
156  """
157  (pad_len, data) = aes_pad_(data)
158  cipher = setup_aes_cipher_(aes_key)
159  enc_data = cipher.encrypt(data)
160  if (0 != pad_len):
161  enc_data = enc_data[:-pad_len]
162  return enc_data
163 
164 
165 def aes_decrypt(aes_key, data):
166  """Decrypts the given data using AES
167 
168  aes_key: the AESKey object to use for AES decryption
169  data: the data string to decrypt
170  """
171  (pad_len, data) = aes_pad_(data)
172  cipher = setup_aes_cipher_(aes_key)
173  ptext = cipher.decrypt(data)
174  if (0 != pad_len):
175  ptext = ptext[:-pad_len]
176  return ptext
177 
178 
179 class Chk(object):
180  """Class for the content hash key."""
181  key = None
182  query = None
183  fsize = None
184 
185  def __init__(self, key, query):
186  assert (len(key) == CHK_HASH_SIZE)
187  assert (len(query) == CHK_QUERY_SIZE)
188  self.key = key
189  self.query = query
190 
191  def setSize(self, size):
192  self.fsize = size
193 
194  def uri(self):
195  sizestr = repr(self.fsize)
196  if isinstance(self.fsize, int):
197  sizestr = sizestr[:-1]
198  return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
199  encode_data_to_string(bytearray(self.key)) + "." + \
200  encode_data_to_string(bytearray(self.query)) + "." + \
201  sizestr
202 
203 
204 def compute_depth_(size):
205  """Computes the depth of the hash tree.
206 
207  size: the size of the file whose tree's depth has to be computed
208  Returns the depth of the tree. Always > 0.
209  """
210  depth = 1
211  fl = DBLOCK_SIZE
212  while (fl < size):
213  depth += 1
214  if ((fl * CHK_PER_INODE) < fl):
215  return depth
216  fl = fl * CHK_PER_INODE
217  return depth
218 
219 
221  """Calculate how many bytes of payload a block tree of the given depth MAY
222  correspond to at most (this function ignores the fact that some blocks will
223  only be present partially due to the total file size cutting some blocks
224  off at the end).
225 
226  depth: depth of the block. depth==0 is a DBLOCK.
227  Returns the number of bytes of payload a subtree of this depth may
228  correspond to.
229  """
230  rsize = DBLOCK_SIZE
231  for cnt in range(0, depth):
232  rsize *= CHK_PER_INODE
233  return rsize
234 
235 
236 def compute_chk_offset_(depth, end_offset):
237  """Compute the offset of the CHK for the current block in the IBlock
238  above
239 
240  depth: depth of the IBlock in the tree (aka overall number of tree levels
241  minus depth); 0 == DBLOCK
242  end_offset: current offset in the overall file, at the *beginning* of the
243  block for DBLOCK (depth == 0), otherwise at the *end* of the
244  block (exclusive)
245  Returns the offset in the list of CHKs in the above IBlock
246  """
247  bds = compute_tree_size_(depth)
248  if (depth > 0):
249  end_offset -= 1
250  ret = end_offset // bds
251  return ret % CHK_PER_INODE
252 
253 
254 def compute_iblock_size_(depth, offset):
255  """Compute the size of the current IBLOCK. The encoder is triggering the
256  calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
257  construction. The IBLOCK maybe a full or a partial IBLOCK, and this
258  function is to calculate how long it should be.
259 
260  depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
261  (this function is for IBLOCKs only!)
262  offset: current offset in the payload (!) of the overall file, must be > 0
263  (since this function is called at the end of a block).
264  Returns the number of elements to be in the corresponding IBlock
265  """
266  assert (depth > 0)
267  assert (offset > 0)
268  bds = compute_tree_size_(depth)
269  mod = offset % bds
270  if mod is 0:
271  ret = CHK_PER_INODE
272  else:
273  bds /= CHK_PER_INODE
274  ret = mod // bds
275  if (mod % bds) is not 0:
276  ret += 1
277  return ret
278 
279 
280 def compute_rootchk(readin, size):
281  """Returns the content hash key after generating the hash tree for the given
282  input stream.
283 
284  readin: the stream where to read data from
285  size: the size of data to be read
286  """
287  depth = compute_depth_(size)
288  current_depth = 0
289  chks = [None] * (depth * CHK_PER_INODE) # list buffer
290  read_offset = 0
291  logging.debug("Beginning to calculate tree hash with depth: " + repr(depth))
292  while True:
293  if (depth == current_depth):
294  off = CHK_PER_INODE * (depth - 1)
295  assert (chks[off] is not None)
296  logging.debug("Encoding done, reading CHK `" + chks[off].query + \
297  "' from " + repr(off) + "\n")
298  uri_chk = chks[off]
299  assert (size == read_offset)
300  uri_chk.setSize(size)
301  return uri_chk
302  if (0 == current_depth):
303  pt_size = min(DBLOCK_SIZE, size - read_offset)
304  try:
305  pt_block = readin.read(pt_size)
306  except IOError:
307  logging.warning("Error reading input file stream")
308  return None
309  else:
310  pt_elements = compute_iblock_size_(current_depth, read_offset)
311  pt_block = ""
312  pt_block = \
313  reduce((lambda ba, chk:
314  ba + (chk.key + chk.query)),
315  chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
316  pt_block)
317  pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
318  assert (len(pt_block) == pt_size)
319  assert (pt_size <= DBLOCK_SIZE)
320  off = compute_chk_offset_(current_depth, read_offset)
321  logging.debug("Encoding data at offset " + repr(read_offset) + \
322  " and depth " + repr(current_depth) + " with block " \
323  "size " + repr(pt_size) + " and target CHK offset " + \
324  repr(current_depth * CHK_PER_INODE))
325  pt_hash = sha512_hash(pt_block)
326  pt_aes_key = AESKey(pt_hash)
327  pt_enc = aes_encrypt(pt_aes_key, pt_block)
328  pt_enc_hash = sha512_hash(pt_enc)
329  chk = Chk(pt_hash, pt_enc_hash)
330  chks[(current_depth * CHK_PER_INODE) + off] = chk
331  if (0 == current_depth):
332  read_offset += pt_size
333  if (read_offset == size) or \
334  (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
335  current_depth += 1
336  else:
337  if (CHK_PER_INODE == off) or (read_offset == size):
338  current_depth += 1
339  else:
340  current_depth = 0
341 
342 
344  """Returns the CHK URI of the file at the given path.
345 
346  path: the path of the file whose CHK has to be calculated
347  """
348  size = os.path.getsize(path)
349  readin = open(path, "rb")
350  chk = compute_rootchk(readin, size)
351  readin.close()
352  return chk.uri()
353 
354 
355 def usage():
356  """Prints help about using this script."""
357  print(
358  """
359 Usage: gnunet-chk.py [options] file
360 Prints the Content Hash Key of given file in GNUNET-style URI.
361 
362 Options:
363  -h, --help : prints this message
364 """
365  )
366 
367 
368 if '__main__' == __name__:
369  try:
370  opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
371  except getopt.GetoptError as err:
372  print(err)
373  print("Exception occurred")
374  usage()
375  sys.exit(2)
376  for option, value in opts:
377  if option in ("-h", "--help"):
378  usage()
379  sys.exit(0)
380  if len(args) != 1:
381  print("Incorrect number of arguments passed")
382  usage()
383  sys.exit(1)
384  print(chkuri_from_path(args[0]))
def compute_iblock_size_(depth, offset)
Definition: gnunet-chk.py:254
def uri(self)
Definition: gnunet-chk.py:194
def setSize(self, size)
Definition: gnunet-chk.py:191
def compute_depth_(size)
Definition: gnunet-chk.py:204
def sha512_hash(data)
Definition: gnunet-chk.py:82
def setup_aes_cipher_(aes_key)
Definition: gnunet-chk.py:126
def compute_chk_offset_(depth, end_offset)
Definition: gnunet-chk.py:236
def usage()
Definition: gnunet-chk.py:355
def aes_pad_(data)
Definition: gnunet-chk.py:135
def __init__(self, passphrase)
Definition: gnunet-chk.py:102
def aes_encrypt(aes_key, data)
Definition: gnunet-chk.py:151
def aes_decrypt(aes_key, data)
Definition: gnunet-chk.py:165
def encode_data_to_string(data)
Definition: gnunet-chk.py:50
def compute_tree_size_(depth)
Definition: gnunet-chk.py:220
#define min(x, y)
def chkuri_from_path(path)
Definition: gnunet-chk.py:343
def __init__(self, key, query)
Definition: gnunet-chk.py:185
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
def compute_rootchk(readin, size)
Definition: gnunet-chk.py:280