GNUnet  0.11.x
gnunet-chk.py
Go to the documentation of this file.
1 #!/usr/bin/python3
2 # This file is part of GNUnet.
3 # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
4 #
5 # GNUnet is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published
7 # by the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Affero General Public License for more details.
14 #
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18 # SPDX-License-Identifier: AGPL3.0-or-later
19 #
20 # File: gnunet-chk.py
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
23 
24 from hashlib import sha512
25 import logging
26 import os
27 import getopt
28 import sys
29 from Crypto.Cipher import AES
30 from functools import reduce
31 
32 
33 # Defaults
34 DBLOCK_SIZE = (32 * 1024) # Data block size
35 
36 # Pick a multiple of 2 here to achive 8-byte alignment! We also
37 # probably want DBlocks to have (roughly) the same size as IBlocks.
38 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
39 # byte = 2 * 512 bits). DO NOT CHANGE!
40 CHK_PER_INODE = 256
41 
42 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
43 
44 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
45 
46 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
47 
48 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
49 
50 
52  """Returns an ASCII encoding of the given data block like
53  GNUNET_STRINGS_data_to_string() function.
54 
55  data: A bytearray representing the block of data which has to be encoded
56  """
57  echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
58  assert (None != data)
59  assert (bytearray == type(data))
60  size = len(data)
61  assert (0 != size)
62  vbit = 0
63  wpos = 0
64  rpos = 0
65  bits = 0
66  out = ""
67  while (rpos < size) or (vbit > 0):
68  if (rpos < size) and (vbit < 5):
69  bits = (bits << 8) | data[rpos] # eat 8 more bits
70  rpos += 1
71  vbit += 8
72  if (vbit < 5):
73  bits <<= (5 - vbit) # zero-padding
74  assert (vbit == ((size * 8) % 5))
75  vbit = 5
76  out += echart[(bits >> (vbit - 5)) & 31]
77  wpos += 1
78  vbit -= 5
79  assert (0 == vbit)
80  return out
81 
82 
83 def sha512_hash(data):
84  """ Returns the sha512 hash of the given data.
85 
86  data: string to hash
87  """
88  hash_obj = sha512()
89  hash_obj.update(data)
90  return hash_obj.digest()
91 
92 
93 class AESKey(object):
94  """Class for AES Keys. Contains the main key and the initialization
95  vector. """
96 
97  key = None # The actual AES key
98  iv = None # The initialization vector
99  cipher = None # The cipher object
100  KEY_SIZE = 32 # AES 256-bit key = 32 bytes
101  IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
102 
103  def __init__(self, passphrase):
104  """Creates a new AES key.
105 
106  passphrase: string containing the passphrase to get the AES key and
107  initialization vector
108  """
109  passphrase = bytearray(passphrase)
110  self.key = bytearray(self.KEY_SIZE)
111  self.iv = bytearray(self.IV_SIZE)
112  if (len(passphrase) > self.KEY_SIZE):
113  self.key = passphrase[:self.KEY_SIZE]
114  passphrase = passphrase[self.KEY_SIZE:]
115  if (len(passphrase) > self.IV_SIZE):
116  self.iv = passphrase[:self.IV_SIZE]
117  else:
118  self.iv[0:len(passphrase)] = passphrase
119  else:
120  self.key[0:len(passphrase)] = passphrase
121  self.key = str(self.key)
122  self.iv = str(self.iv)
123  assert (len(self.key) == self.KEY_SIZE)
124  assert (len(self.iv) == self.IV_SIZE)
125 
126 
127 def setup_aes_cipher_(aes_key):
128  """Initializes the AES object with settings similar to those in GNUnet.
129 
130  aes_key: the AESKey object
131  Returns the newly initialized AES object
132  """
133  return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
134 
135 
136 def aes_pad_(data):
137  """Adds padding to the data such that the size of the data is a multiple of
138  16 bytes
139 
140  data: the data string
141  Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
142  as padding; data is the new data string with padded bytes at the end
143  """
144  pad_len = len(data) % 16
145  if (0 != pad_len):
146  pad_len = 16 - pad_len
147  pad_bytes = bytearray(15)
148  data += str(pad_bytes[:pad_len])
149  return (pad_len, data)
150 
151 
152 def aes_encrypt(aes_key, data):
153  """Encrypts the given data using AES.
154 
155  aes_key: the AESKey object to use for AES encryption
156  data: the data string to encrypt
157  """
158  (pad_len, data) = aes_pad_(data)
159  cipher = setup_aes_cipher_(aes_key)
160  enc_data = cipher.encrypt(data)
161  if (0 != pad_len):
162  enc_data = enc_data[:-pad_len]
163  return enc_data
164 
165 
166 def aes_decrypt(aes_key, data):
167  """Decrypts the given data using AES
168 
169  aes_key: the AESKey object to use for AES decryption
170  data: the data string to decrypt
171  """
172  (pad_len, data) = aes_pad_(data)
173  cipher = setup_aes_cipher_(aes_key)
174  ptext = cipher.decrypt(data)
175  if (0 != pad_len):
176  ptext = ptext[:-pad_len]
177  return ptext
178 
179 
180 class Chk(object):
181  """Class for the content hash key."""
182  key = None
183  query = None
184  fsize = None
185 
186  def __init__(self, key, query):
187  assert (len(key) == CHK_HASH_SIZE)
188  assert (len(query) == CHK_QUERY_SIZE)
189  self.key = key
190  self.query = query
191 
192  def setSize(self, size):
193  self.fsize = size
194 
195  def uri(self):
196  sizestr = repr(self.fsize)
197  if isinstance(self.fsize, int):
198  sizestr = sizestr[:-1]
199  return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
200  encode_data_to_string(bytearray(self.key)) + "." + \
201  encode_data_to_string(bytearray(self.query)) + "." + \
202  sizestr
203 
204 
205 def compute_depth_(size):
206  """Computes the depth of the hash tree.
207 
208  size: the size of the file whose tree's depth has to be computed
209  Returns the depth of the tree. Always > 0.
210  """
211  depth = 1
212  fl = DBLOCK_SIZE
213  while (fl < size):
214  depth += 1
215  if ((fl * CHK_PER_INODE) < fl):
216  return depth
217  fl = fl * CHK_PER_INODE
218  return depth
219 
220 
222  """Calculate how many bytes of payload a block tree of the given depth MAY
223  correspond to at most (this function ignores the fact that some blocks will
224  only be present partially due to the total file size cutting some blocks
225  off at the end).
226 
227  depth: depth of the block. depth==0 is a DBLOCK.
228  Returns the number of bytes of payload a subtree of this depth may
229  correspond to.
230  """
231  rsize = DBLOCK_SIZE
232  for cnt in range(0, depth):
233  rsize *= CHK_PER_INODE
234  return rsize
235 
236 
237 def compute_chk_offset_(depth, end_offset):
238  """Compute the offset of the CHK for the current block in the IBlock
239  above
240 
241  depth: depth of the IBlock in the tree (aka overall number of tree levels
242  minus depth); 0 == DBLOCK
243  end_offset: current offset in the overall file, at the *beginning* of the
244  block for DBLOCK (depth == 0), otherwise at the *end* of the
245  block (exclusive)
246  Returns the offset in the list of CHKs in the above IBlock
247  """
248  bds = compute_tree_size_(depth)
249  if (depth > 0):
250  end_offset -= 1
251  ret = end_offset // bds
252  return ret % CHK_PER_INODE
253 
254 
255 def compute_iblock_size_(depth, offset):
256  """Compute the size of the current IBLOCK. The encoder is triggering the
257  calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
258  construction. The IBLOCK maybe a full or a partial IBLOCK, and this
259  function is to calculate how long it should be.
260 
261  depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
262  (this function is for IBLOCKs only!)
263  offset: current offset in the payload (!) of the overall file, must be > 0
264  (since this function is called at the end of a block).
265  Returns the number of elements to be in the corresponding IBlock
266  """
267  assert (depth > 0)
268  assert (offset > 0)
269  bds = compute_tree_size_(depth)
270  mod = offset % bds
271  if mod is 0:
272  ret = CHK_PER_INODE
273  else:
274  bds /= CHK_PER_INODE
275  ret = mod // bds
276  if (mod % bds) is not 0:
277  ret += 1
278  return ret
279 
280 
281 def compute_rootchk(readin, size):
282  """Returns the content hash key after generating the hash tree for the given
283  input stream.
284 
285  readin: the stream where to read data from
286  size: the size of data to be read
287  """
288  depth = compute_depth_(size)
289  current_depth = 0
290  chks = [None] * (depth * CHK_PER_INODE) # list buffer
291  read_offset = 0
292  logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
293  while True:
294  if (depth == current_depth):
295  off = CHK_PER_INODE * (depth - 1)
296  assert (chks[off] is not None)
297  logging.debug("Encoding done, reading CHK `" + chks[off].query + \
298  "' from " + repr(off) + "\n")
299  uri_chk = chks[off]
300  assert (size == read_offset)
301  uri_chk.setSize(size)
302  return uri_chk
303  if (0 == current_depth):
304  pt_size = min(DBLOCK_SIZE, size - read_offset)
305  try:
306  pt_block = readin.read(pt_size)
307  except IOError:
308  logging.warning("Error reading input file stream")
309  return None
310  else:
311  pt_elements = compute_iblock_size_(current_depth, read_offset)
312  pt_block = ""
313  pt_block = \
314  reduce((lambda ba, chk:
315  ba + (chk.key + chk.query)),
316  chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
317  pt_block)
318  pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
319  assert (len(pt_block) == pt_size)
320  assert (pt_size <= DBLOCK_SIZE)
321  off = compute_chk_offset_(current_depth, read_offset)
322  logging.debug("Encoding data at offset " + repr(read_offset) + \
323  " and depth " + repr(current_depth) + " with block " \
324  "size " + repr(pt_size) + " and target CHK offset " + \
325  repr(current_depth * CHK_PER_INODE))
326  pt_hash = sha512_hash(pt_block)
327  pt_aes_key = AESKey(pt_hash)
328  pt_enc = aes_encrypt(pt_aes_key, pt_block)
329  pt_enc_hash = sha512_hash(pt_enc)
330  chk = Chk(pt_hash, pt_enc_hash)
331  chks[(current_depth * CHK_PER_INODE) + off] = chk
332  if (0 == current_depth):
333  read_offset += pt_size
334  if (read_offset == size) or \
335  (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
336  current_depth += 1
337  else:
338  if (CHK_PER_INODE == off) or (read_offset == size):
339  current_depth += 1
340  else:
341  current_depth = 0
342 
343 
345  """Returns the CHK URI of the file at the given path.
346 
347  path: the path of the file whose CHK has to be calculated
348  """
349  size = os.path.getsize(path)
350  readin = open(path, "rb")
351  chk = compute_rootchk(readin, size)
352  readin.close()
353  return chk.uri()
354 
355 
356 def usage():
357  """Prints help about using this script."""
358  print("""
359 Usage: gnunet-chk.py [options] file
360 Prints the Content Hash Key of given file in GNUNET-style URI.
361 
362 Options:
363  -h, --help : prints this message
364 """)
365 
366 
367 if '__main__' == __name__:
368  try:
369  opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
370  except getopt.GetoptError as err:
371  print(err)
372  print("Exception occured")
373  usage()
374  sys.exit(2)
375  for option, value in opts:
376  if option in("-h", "--help"):
377  usage()
378  sys.exit(0)
379  if len(args) != 1:
380  print("Incorrect number of arguments passed")
381  usage()
382  sys.exit(1)
383  print(chkuri_from_path(args[0]))
def compute_iblock_size_(depth, offset)
Definition: gnunet-chk.py:255
def uri(self)
Definition: gnunet-chk.py:195
def setSize(self, size)
Definition: gnunet-chk.py:192
def compute_depth_(size)
Definition: gnunet-chk.py:205
def sha512_hash(data)
Definition: gnunet-chk.py:83
def setup_aes_cipher_(aes_key)
Definition: gnunet-chk.py:127
def compute_chk_offset_(depth, end_offset)
Definition: gnunet-chk.py:237
def usage()
Definition: gnunet-chk.py:356
def aes_pad_(data)
Definition: gnunet-chk.py:136
def __init__(self, passphrase)
Definition: gnunet-chk.py:103
def aes_encrypt(aes_key, data)
Definition: gnunet-chk.py:152
def aes_decrypt(aes_key, data)
Definition: gnunet-chk.py:166
def encode_data_to_string(data)
Definition: gnunet-chk.py:51
def compute_tree_size_(depth)
Definition: gnunet-chk.py:221
def chkuri_from_path(path)
Definition: gnunet-chk.py:344
def __init__(self, key, query)
Definition: gnunet-chk.py:186
enum GNUNET_TESTBED_UnderlayLinkModelType type
the type of this model
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...
def compute_rootchk(readin, size)
Definition: gnunet-chk.py:281