GNUnet  0.11.x
consttime_memcmp.c
Go to the documentation of this file.
1 /*
2 The MIT License (MIT)
3 
4 Copyright (c) 2015 Christophe Meessen
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24 
25 /* Minimally modified for libgnunetutil: added license header
26  (from https://github.com/chmike/cst_time_memcmp, LICENSE file), and
27  renamed the exported symbol: */
28 #define consttime_memcmp GNUNET_memcmp_ct_
29 /* Rest of the file is 'original' */
30 
31 
32 #include <stddef.h>
33 #include <inttypes.h>
34 
35 /*
36  * "constant time" memcmp. Time taken depends on the buffer length, of
37  * course, but not on the content of the buffers.
38  *
39  * Just like the ordinary memcmp function, the return value is
40  * tri-state: <0, 0, or >0. However, applications that need a
41  * constant-time memory comparison function usually need only a
42  * two-state result, signalling only whether the inputs were identical
43  * or different, but not signalling which of the inputs was larger.
44  * This code could be made significantly faster and simpler if the
45  * requirement for a tri-state result were removed.
46  *
47  * In order to protect against adversaries who can observe timing,
48  * cache hits or misses, page faults, etc., and who can use such
49  * observations to learn something about the relationship between the
50  * contents of the two buffers, we have to perform exactly the same
51  * instructions and memory accesses regardless of the contents of the
52  * buffers. We can't stop as soon as we find a difference, we can't
53  * take different conditional branches depending on the data, and we
54  * can't use different pointers or array indexes depending on the data.
55  *
56  * Further reading:
57  *
58  * .Rs
59  * .%A Paul C. Kocher
60  * .%T Timing Attacks on Implementations of Diffie-Hellman, RSA, DSS, and Other Systems
61  * .%D 1996
62  * .%J CRYPTO 1996
63  * .%P 104-113
64  * .%U http://www.cryptography.com/timingattack/paper.html
65  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fwww.cryptography.com%2Ftimingattack%2Fpaper.html&date=2012-10-17
66  * .Re
67  *
68  * .Rs
69  * .%A D. Boneh
70  * .%A D. Brumley
71  * .%T Remote timing attacks are practical
72  * .%D August 2003
73  * .%J Proceedings of the 12th Usenix Security Symposium, 2003
74  * .%U https://crypto.stanford.edu/~dabo/abstracts/ssl-timing.html
75  * .%U http://www.webcitation.org/query?url=https%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fabstracts%2Fssl-timing.html&date=2012-10-17
76  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fpubs%2Fpapers%2Fssl-timing.pdf&date=2012-10-17
77  * .Es
78  *
79  * .Rs
80  * .%A Coda Hale
81  * .%T A Lesson In Timing Attacks (or, Don't use MessageDigest.isEquals)
82  * .%D 13 Aug 2009
83  * .%U http://codahale.com/a-lesson-in-timing-attacks/
84  * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcodahale.com%2Fa-lesson-in-timing-attacks%2F&date=2012-10-17
85  * .Re
86  *
87  */
88 
89 /*
90  * A note on portability:
91  *
92  * We assume that char is exactly 8 bits, the same as uint8_t, and that
93  * integer types with exactly 16 bits and exactly 32 bits exist. (If
94  * there is ever a need to change this, then the actual requirement is
95  * that we need a type that is at least two bits wider than char, and
96  * another type that is at least two bits wider than that, or we need to
97  * fake it somehow.)
98  *
99  * We do not assume any particular size for the plain "int" type, except
100  * that it is at least 16 bits, as is guaranteed by the C language
101  * standard.
102  *
103  * We do not assume that signed integer overflow is harmless. We
104  * ensure that signed integer overflow does not occur, so that
105  * implementation-defined overflow behaviour is not invoked.
106  *
107  * We rely on the C standard's guarantees regarding the wraparound
108  * behaviour of unsigned integer arithmetic, and on the analagous
109  * guarantees regarding conversions from signed types to narrower
110  * unsigned types.
111  *
112  * We do not assume that the platform uses two's complement arithmetic.
113  */
114 
115 /*
116  * How hard do we have to try to prevent unwanted compiler optimisations?
117  *
118  * Try compiling with "#define USE_VOLATILE_TEMPORARY 0", and examine
119  * the compiler output. If the only conditional tests in the entire
120  * function are to test whether len is zero, then all is well, but try
121  * again with different optimisation flags to be sure. If the compiler
122  * emitted code with conditional tests that do anything other than
123  * testing whether len is zero, then that's a problem, so try again with
124  * "#define USE_VOLATILE_TEMPORARY 1". If it's still bad, then you are
125  * out of luck.
126  */
127 #define USE_VOLATILE_TEMPORARY 0
128 
129 int
130 consttime_memcmp (const void *b1, const void *b2, size_t len)
131 {
132  const uint8_t *c1, *c2;
133  uint16_t d, r, m;
134 
135 #if USE_VOLATILE_TEMPORARY
136  volatile uint16_t v;
137 #else
138  uint16_t v;
139 #endif
140 
141  c1 = b1;
142  c2 = b2;
143 
144  r = 0;
145  while (len)
146  {
147  /*
148  * Take the low 8 bits of r (in the range 0x00 to 0xff,
149  * or 0 to 255);
150  * As explained elsewhere, the low 8 bits of r will be zero
151  * if and only if all bytes compared so far were identical;
152  * Zero-extend to a 16-bit type (in the range 0x0000 to
153  * 0x00ff);
154  * Add 255, yielding a result in the range 255 to 510;
155  * Save that in a volatile variable to prevent
156  * the compiler from trying any shortcuts (the
157  * use of a volatile variable depends on "#ifdef
158  * USE_VOLATILE_TEMPORARY", and most compilers won't
159  * need it);
160  * Divide by 256 yielding a result of 1 if the original
161  * value of r was non-zero, or 0 if r was zero;
162  * Subtract 1, yielding 0 if r was non-zero, or -1 if r
163  * was zero;
164  * Convert to uint16_t, yielding 0x0000 if r was
165  * non-zero, or 0xffff if r was zero;
166  * Save in m.
167  */v = ((uint16_t) (uint8_t) r) + 255;
168  m = v / 256 - 1;
169 
170  /*
171  * Get the values from *c1 and *c2 as uint8_t (each will
172  * be in the range 0 to 255, or 0x00 to 0xff);
173  * Convert them to signed int values (still in the
174  * range 0 to 255);
175  * Subtract them using signed arithmetic, yielding a
176  * result in the range -255 to +255;
177  * Convert to uint16_t, yielding a result in the range
178  * 0xff01 to 0xffff (for what was previously -255 to
179  * -1), or 0, or in the range 0x0001 to 0x00ff (for what
180  * was previously +1 to +255).
181  */d = (uint16_t) ((int) *c1 - (int) *c2);
182 
183  /*
184  * If the low 8 bits of r were previously 0, then m
185  * is now 0xffff, so (d & m) is the same as d, so we
186  * effectively copy d to r;
187  * Otherwise, if r was previously non-zero, then m is
188  * now 0, so (d & m) is zero, so leave r unchanged.
189  * Note that the low 8 bits of d will be zero if and
190  * only if d == 0, which happens when *c1 == *c2.
191  * The low 8 bits of r are thus zero if and only if the
192  * entirety of r is zero, which happens if and only if
193  * all bytes compared so far were equal. As soon as a
194  * non-zero value is stored in r, it remains unchanged
195  * for the remainder of the loop.
196  */r |= (d & m);
197 
198  /*
199  * Increment pointers, decrement length, and loop.
200  */
201  ++c1;
202  ++c2;
203  --len;
204  }
205 
206  /*
207  * At this point, r is an unsigned value, which will be 0 if the
208  * final result should be zero, or in the range 0x0001 to 0x00ff
209  * (1 to 255) if the final result should be positive, or in the
210  * range 0xff01 to 0xffff (65281 to 65535) if the final result
211  * should be negative.
212  *
213  * We want to convert the unsigned values in the range 0xff01
214  * to 0xffff to signed values in the range -255 to -1, while
215  * converting the other unsigned values to equivalent signed
216  * values (0, or +1 to +255).
217  *
218  * On a machine with two's complement arithmetic, simply copying
219  * the underlying bits (with sign extension if int is wider than
220  * 16 bits) would do the job, so something like this might work:
221  *
222  * return (int16_t)r;
223  *
224  * However, that invokes implementation-defined behaviour,
225  * because values larger than 32767 can't fit in a signed 16-bit
226  * integer without overflow.
227  *
228  * To avoid any implementation-defined behaviour, we go through
229  * these contortions:
230  *
231  * a. Calculate ((uint32_t)r + 0x8000). The cast to uint32_t
232  * it to prevent problems on platforms where int is narrower
233  * than 32 bits. If int is a larger than 32-bits, then the
234  * usual arithmetic conversions cause this addition to be
235  * done in unsigned int arithmetic. If int is 32 bits
236  * or narrower, then this addition is done in uint32_t
237  * arithmetic. In either case, no overflow or wraparound
238  * occurs, and the result from this step has a value that
239  * will be one of 0x00008000 (32768), or in the range
240  * 0x00008001 to 0x000080ff (32769 to 33023), or in the range
241  * 0x00017f01 to 0x00017fff (98049 to 98303).
242  *
243  * b. Cast the result from (a) to uint16_t. This effectively
244  * discards the high bits of the result, in a way that is
245  * well defined by the C language. The result from this step
246  * will be of type uint16_t, and its value will be one of
247  * 0x8000 (32768), or in the range 0x8001 to 0x80ff (32769 to
248  * 33023), or in the range 0x7f01 to 0x7fff (32513 to
249  * 32767).
250  *
251  * c. Cast the result from (b) to int32_t. We use int32_t
252  * instead of int because we need a type that's strictly
253  * larger than 16 bits, and the C standard allows
254  * implementations where int is only 16 bits. The result
255  * from this step will be of type int32_t, and its value wll
256  * be one of 0x00008000 (32768), or in the range 0x00008001
257  * to 0x000080ff (32769 to 33023), or in the range 0x00007f01
258  * to 0x00007fff (32513 to 32767).
259  *
260  * d. Take the result from (c) and subtract 0x8000 (32768) using
261  * signed int32_t arithmetic. The result from this step will
262  * be of type int32_t and the value will be one of
263  * 0x00000000 (0), or in the range 0x00000001 to 0x000000ff
264  * (+1 to +255), or in the range 0xffffff01 to 0xffffffff
265  * (-255 to -1).
266  *
267  * e. Cast the result from (d) to int. This does nothing
268  * interesting, except to make explicit what would have been
269  * implicit in the return statement. The final result is an
270  * int in the range -255 to +255.
271  *
272  * Unfortunately, compilers don't seem to be good at figuring
273  * out that most of this can be optimised away by careful choice
274  * of register width and sign extension.
275  *
276  */return (/*e*/ int) (/*d*/
277  (/*c*/ int32_t) (/*b*/ uint16_t) (/*a*/ (uint32_t) r + 0x8000)
278  - 0x8000);
279 }
static struct GNUNET_ARM_MonitorHandle * m
Monitor connection with ARM.
Definition: gnunet-arm.c:104
#define consttime_memcmp
uint16_t len
length of data (which is always a uint32_t, but presumably this can be used to specify that fewer byt...