GNUnet  0.19.3
consttime_memcmp.c
Go to the documentation of this file.
1 /*
2 The MIT License (MIT)
3 
4 Copyright (c) 2015 Christophe Meessen
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24 
25 /* Minimally modified for libgnunetutil: added license header
26  (from https://github.com/chmike/cst_time_memcmp, LICENSE file), and
27  renamed the exported symbol: */
28 #define consttime_memcmp GNUNET_memcmp_ct_
29 /* Rest of the file is 'original' */
30 
31 
32 #include "platform.h"
33 #include <stddef.h>
34 #include <inttypes.h>
35 
36 /*
37  * "constant time" memcmp. Time taken depends on the buffer length, of
38  * course, but not on the content of the buffers.
39  *
40  * Just like the ordinary memcmp function, the return value is
41  * tri-state: <0, 0, or >0. However, applications that need a
42  * constant-time memory comparison function usually need only a
43  * two-state result, signalling only whether the inputs were identical
44  * or different, but not signalling which of the inputs was larger.
45  * This code could be made significantly faster and simpler if the
46  * requirement for a tri-state result were removed.
47  *
48  * In order to protect against adversaries who can observe timing,
49  * cache hits or misses, page faults, etc., and who can use such
50  * observations to learn something about the relationship between the
51  * contents of the two buffers, we have to perform exactly the same
52  * instructions and memory accesses regardless of the contents of the
53  * buffers. We can't stop as soon as we find a difference, we can't
54  * take different conditional branches depending on the data, and we
55  * can't use different pointers or array indexes depending on the data.
56  *
57  * Further reading:
58  *
59  * .Rs
60  * .%A Paul C. Kocher
61  * .%T Timing Attacks on Implementations of Diffie-Hellman, RSA, DSS, and Other Systems
62  * .%D 1996
63  * .%J CRYPTO 1996
64  * .%P 104-113
65  * .%U http://www.cryptography.com/timingattack/paper.html
66  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fwww.cryptography.com%2Ftimingattack%2Fpaper.html&date=2012-10-17
67  * .Re
68  *
69  * .Rs
70  * .%A D. Boneh
71  * .%A D. Brumley
72  * .%T Remote timing attacks are practical
73  * .%D August 2003
74  * .%J Proceedings of the 12th Usenix Security Symposium, 2003
75  * .%U https://crypto.stanford.edu/~dabo/abstracts/ssl-timing.html
76  * .%U http://www.webcitation.org/query?url=https%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fabstracts%2Fssl-timing.html&date=2012-10-17
77  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fpubs%2Fpapers%2Fssl-timing.pdf&date=2012-10-17
78  * .Es
79  *
80  * .Rs
81  * .%A Coda Hale
82  * .%T A Lesson In Timing Attacks (or, Don't use MessageDigest.isEquals)
83  * .%D 13 Aug 2009
84  * .%U http://codahale.com/a-lesson-in-timing-attacks/
85  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcodahale.com%2Fa-lesson-in-timing-attacks%2F&date=2012-10-17
86  * .Re
87  *
88  */
89 
90 /*
91  * A note on portability:
92  *
93  * We assume that char is exactly 8 bits, the same as uint8_t, and that
94  * integer types with exactly 16 bits and exactly 32 bits exist. (If
95  * there is ever a need to change this, then the actual requirement is
96  * that we need a type that is at least two bits wider than char, and
97  * another type that is at least two bits wider than that, or we need to
98  * fake it somehow.)
99  *
100  * We do not assume any particular size for the plain "int" type, except
101  * that it is at least 16 bits, as is guaranteed by the C language
102  * standard.
103  *
104  * We do not assume that signed integer overflow is harmless. We
105  * ensure that signed integer overflow does not occur, so that
106  * implementation-defined overflow behaviour is not invoked.
107  *
108  * We rely on the C standard's guarantees regarding the wraparound
109  * behaviour of unsigned integer arithmetic, and on the analogous
110  * guarantees regarding conversions from signed types to narrower
111  * unsigned types.
112  *
113  * We do not assume that the platform uses two's complement arithmetic.
114  */
115 
116 /*
117  * How hard do we have to try to prevent unwanted compiler optimisations?
118  *
119  * Try compiling with "#define USE_VOLATILE_TEMPORARY 0", and examine
120  * the compiler output. If the only conditional tests in the entire
121  * function are to test whether len is zero, then all is well, but try
122  * again with different optimisation flags to be sure. If the compiler
123  * emitted code with conditional tests that do anything other than
124  * testing whether len is zero, then that's a problem, so try again with
125  * "#define USE_VOLATILE_TEMPORARY 1". If it's still bad, then you are
126  * out of luck.
127  */
128 #define USE_VOLATILE_TEMPORARY 0
129 
130 int
131 consttime_memcmp (const void *b1, const void *b2, size_t len)
132 {
133  const uint8_t *c1, *c2;
134  uint16_t d, r, m;
135 
136 #if USE_VOLATILE_TEMPORARY
137  volatile uint16_t v;
138 #else
139  uint16_t v;
140 #endif
141 
142  c1 = b1;
143  c2 = b2;
144 
145  r = 0;
146  while (len)
147  {
148  /*
149  * Take the low 8 bits of r (in the range 0x00 to 0xff,
150  * or 0 to 255);
151  * As explained elsewhere, the low 8 bits of r will be zero
152  * if and only if all bytes compared so far were identical;
153  * Zero-extend to a 16-bit type (in the range 0x0000 to
154  * 0x00ff);
155  * Add 255, yielding a result in the range 255 to 510;
156  * Save that in a volatile variable to prevent
157  * the compiler from trying any shortcuts (the
158  * use of a volatile variable depends on "#ifdef
159  * USE_VOLATILE_TEMPORARY", and most compilers won't
160  * need it);
161  * Divide by 256 yielding a result of 1 if the original
162  * value of r was non-zero, or 0 if r was zero;
163  * Subtract 1, yielding 0 if r was non-zero, or -1 if r
164  * was zero;
165  * Convert to uint16_t, yielding 0x0000 if r was
166  * non-zero, or 0xffff if r was zero;
167  * Save in m.
168  */v = ((uint16_t) (uint8_t) r) + 255;
169  m = v / 256 - 1;
170 
171  /*
172  * Get the values from *c1 and *c2 as uint8_t (each will
173  * be in the range 0 to 255, or 0x00 to 0xff);
174  * Convert them to signed int values (still in the
175  * range 0 to 255);
176  * Subtract them using signed arithmetic, yielding a
177  * result in the range -255 to +255;
178  * Convert to uint16_t, yielding a result in the range
179  * 0xff01 to 0xffff (for what was previously -255 to
180  * -1), or 0, or in the range 0x0001 to 0x00ff (for what
181  * was previously +1 to +255).
182  */d = (uint16_t) ((int) *c1 - (int) *c2);
183 
184  /*
185  * If the low 8 bits of r were previously 0, then m
186  * is now 0xffff, so (d & m) is the same as d, so we
187  * effectively copy d to r;
188  * Otherwise, if r was previously non-zero, then m is
189  * now 0, so (d & m) is zero, so leave r unchanged.
190  * Note that the low 8 bits of d will be zero if and
191  * only if d == 0, which happens when *c1 == *c2.
192  * The low 8 bits of r are thus zero if and only if the
193  * entirety of r is zero, which happens if and only if
194  * all bytes compared so far were equal. As soon as a
195  * non-zero value is stored in r, it remains unchanged
196  * for the remainder of the loop.
197  */r |= (d & m);
198 
199  /*
200  * Increment pointers, decrement length, and loop.
201  */
202  ++c1;
203  ++c2;
204  --len;
205  }
206 
207  /*
208  * At this point, r is an unsigned value, which will be 0 if the
209  * final result should be zero, or in the range 0x0001 to 0x00ff
210  * (1 to 255) if the final result should be positive, or in the
211  * range 0xff01 to 0xffff (65281 to 65535) if the final result
212  * should be negative.
213  *
214  * We want to convert the unsigned values in the range 0xff01
215  * to 0xffff to signed values in the range -255 to -1, while
216  * converting the other unsigned values to equivalent signed
217  * values (0, or +1 to +255).
218  *
219  * On a machine with two's complement arithmetic, simply copying
220  * the underlying bits (with sign extension if int is wider than
221  * 16 bits) would do the job, so something like this might work:
222  *
223  * return (int16_t)r;
224  *
225  * However, that invokes implementation-defined behaviour,
226  * because values larger than 32767 can't fit in a signed 16-bit
227  * integer without overflow.
228  *
229  * To avoid any implementation-defined behaviour, we go through
230  * these contortions:
231  *
232  * a. Calculate ((uint32_t)r + 0x8000). The cast to uint32_t
233  * it to prevent problems on platforms where int is narrower
234  * than 32 bits. If int is a larger than 32-bits, then the
235  * usual arithmetic conversions cause this addition to be
236  * done in unsigned int arithmetic. If int is 32 bits
237  * or narrower, then this addition is done in uint32_t
238  * arithmetic. In either case, no overflow or wraparound
239  * occurs, and the result from this step has a value that
240  * will be one of 0x00008000 (32768), or in the range
241  * 0x00008001 to 0x000080ff (32769 to 33023), or in the range
242  * 0x00017f01 to 0x00017fff (98049 to 98303).
243  *
244  * b. Cast the result from (a) to uint16_t. This effectively
245  * discards the high bits of the result, in a way that is
246  * well defined by the C language. The result from this step
247  * will be of type uint16_t, and its value will be one of
248  * 0x8000 (32768), or in the range 0x8001 to 0x80ff (32769 to
249  * 33023), or in the range 0x7f01 to 0x7fff (32513 to
250  * 32767).
251  *
252  * c. Cast the result from (b) to int32_t. We use int32_t
253  * instead of int because we need a type that's strictly
254  * larger than 16 bits, and the C standard allows
255  * implementations where int is only 16 bits. The result
256  * from this step will be of type int32_t, and its value will
257  * be one of 0x00008000 (32768), or in the range 0x00008001
258  * to 0x000080ff (32769 to 33023), or in the range 0x00007f01
259  * to 0x00007fff (32513 to 32767).
260  *
261  * d. Take the result from (c) and subtract 0x8000 (32768) using
262  * signed int32_t arithmetic. The result from this step will
263  * be of type int32_t and the value will be one of
264  * 0x00000000 (0), or in the range 0x00000001 to 0x000000ff
265  * (+1 to +255), or in the range 0xffffff01 to 0xffffffff
266  * (-255 to -1).
267  *
268  * e. Cast the result from (d) to int. This does nothing
269  * interesting, except to make explicit what would have been
270  * implicit in the return statement. The final result is an
271  * int in the range -255 to +255.
272  *
273  * Unfortunately, compilers don't seem to be good at figuring
274  * out that most of this can be optimised away by careful choice
275  * of register width and sign extension.
276  *
277  */return (/*e*/ int) (/*d*/
278  (/*c*/ int32_t) (/*b*/ uint16_t) (/*a*/ (uint32_t) r + 0x8000)
279  - 0x8000);
280 }
#define consttime_memcmp
static struct GNUNET_ARM_MonitorHandle * m
Monitor connection with ARM.
Definition: gnunet-arm.c:104
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...