GNUnet 0.22.0
consttime_memcmp.c
Go to the documentation of this file.
1/*
2The MIT License (MIT)
3
4Copyright (c) 2015 Christophe Meessen
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in all
14copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22SOFTWARE.
23*/
24
25/* Minimally modified for libgnunetutil: added license header
26 (from https://github.com/chmike/cst_time_memcmp, LICENSE file), and
27 renamed the exported symbol: */
28#define consttime_memcmp GNUNET_memcmp_ct_
29/* Rest of the file is 'original' */
30
31
32#include "platform.h"
33#include <stddef.h>
34#include <inttypes.h>
35
36/*
37 * "constant time" memcmp. Time taken depends on the buffer length, of
38 * course, but not on the content of the buffers.
39 *
40 * Just like the ordinary memcmp function, the return value is
41 * tri-state: <0, 0, or >0. However, applications that need a
42 * constant-time memory comparison function usually need only a
43 * two-state result, signalling only whether the inputs were identical
44 * or different, but not signalling which of the inputs was larger.
45 * This code could be made significantly faster and simpler if the
46 * requirement for a tri-state result were removed.
47 *
48 * In order to protect against adversaries who can observe timing,
49 * cache hits or misses, page faults, etc., and who can use such
50 * observations to learn something about the relationship between the
51 * contents of the two buffers, we have to perform exactly the same
52 * instructions and memory accesses regardless of the contents of the
53 * buffers. We can't stop as soon as we find a difference, we can't
54 * take different conditional branches depending on the data, and we
55 * can't use different pointers or array indexes depending on the data.
56 *
57 * Further reading:
58 *
59 * .Rs
60 * .%A Paul C. Kocher
61 * .%T Timing Attacks on Implementations of Diffie-Hellman, RSA, DSS, and Other Systems
62 * .%D 1996
63 * .%J CRYPTO 1996
64 * .%P 104-113
65 * .%U http://www.cryptography.com/timingattack/paper.html
66 * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fwww.cryptography.com%2Ftimingattack%2Fpaper.html&date=2012-10-17
67 * .Re
68 *
69 * .Rs
70 * .%A D. Boneh
71 * .%A D. Brumley
72 * .%T Remote timing attacks are practical
73 * .%D August 2003
74 * .%J Proceedings of the 12th Usenix Security Symposium, 2003
75 * .%U https://crypto.stanford.edu/~dabo/abstracts/ssl-timing.html
76 * .%U http://www.webcitation.org/query?url=https%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fabstracts%2Fssl-timing.html&date=2012-10-17
77 * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcrypto.stanford.edu%2F%7Edabo%2Fpubs%2Fpapers%2Fssl-timing.pdf&date=2012-10-17
78 * .Es
79 *
80 * .Rs
81 * .%A Coda Hale
82 * .%T A Lesson In Timing Attacks (or, Don't use MessageDigest.isEquals)
83 * .%D 13 Aug 2009
84 * .%U http://codahale.com/a-lesson-in-timing-attacks/
85 * .%U http://www.webcitation.org/query?url=http%3A%2F%2Fcodahale.com%2Fa-lesson-in-timing-attacks%2F&date=2012-10-17
86 * .Re
87 *
88 */
89
90/*
91 * A note on portability:
92 *
93 * We assume that char is exactly 8 bits, the same as uint8_t, and that
94 * integer types with exactly 16 bits and exactly 32 bits exist. (If
95 * there is ever a need to change this, then the actual requirement is
96 * that we need a type that is at least two bits wider than char, and
97 * another type that is at least two bits wider than that, or we need to
98 * fake it somehow.)
99 *
100 * We do not assume any particular size for the plain "int" type, except
101 * that it is at least 16 bits, as is guaranteed by the C language
102 * standard.
103 *
104 * We do not assume that signed integer overflow is harmless. We
105 * ensure that signed integer overflow does not occur, so that
106 * implementation-defined overflow behaviour is not invoked.
107 *
108 * We rely on the C standard's guarantees regarding the wraparound
109 * behaviour of unsigned integer arithmetic, and on the analogous
110 * guarantees regarding conversions from signed types to narrower
111 * unsigned types.
112 *
113 * We do not assume that the platform uses two's complement arithmetic.
114 */
115
116/*
117 * How hard do we have to try to prevent unwanted compiler optimisations?
118 *
119 * Try compiling with "#define USE_VOLATILE_TEMPORARY 0", and examine
120 * the compiler output. If the only conditional tests in the entire
121 * function are to test whether len is zero, then all is well, but try
122 * again with different optimisation flags to be sure. If the compiler
123 * emitted code with conditional tests that do anything other than
124 * testing whether len is zero, then that's a problem, so try again with
125 * "#define USE_VOLATILE_TEMPORARY 1". If it's still bad, then you are
126 * out of luck.
127 */
128#define USE_VOLATILE_TEMPORARY 0
129
130int
131consttime_memcmp (const void *b1, const void *b2, size_t len);
132
133int
134consttime_memcmp (const void *b1, const void *b2, size_t len)
135{
136 const uint8_t *c1, *c2;
137 uint16_t d, r, m;
138
139#if USE_VOLATILE_TEMPORARY
140 volatile uint16_t v;
141#else
142 uint16_t v;
143#endif
144
145 c1 = b1;
146 c2 = b2;
147
148 r = 0;
149 while (len)
150 {
151 /*
152 * Take the low 8 bits of r (in the range 0x00 to 0xff,
153 * or 0 to 255);
154 * As explained elsewhere, the low 8 bits of r will be zero
155 * if and only if all bytes compared so far were identical;
156 * Zero-extend to a 16-bit type (in the range 0x0000 to
157 * 0x00ff);
158 * Add 255, yielding a result in the range 255 to 510;
159 * Save that in a volatile variable to prevent
160 * the compiler from trying any shortcuts (the
161 * use of a volatile variable depends on "#ifdef
162 * USE_VOLATILE_TEMPORARY", and most compilers won't
163 * need it);
164 * Divide by 256 yielding a result of 1 if the original
165 * value of r was non-zero, or 0 if r was zero;
166 * Subtract 1, yielding 0 if r was non-zero, or -1 if r
167 * was zero;
168 * Convert to uint16_t, yielding 0x0000 if r was
169 * non-zero, or 0xffff if r was zero;
170 * Save in m.
171 */v = ((uint16_t) (uint8_t) r) + 255;
172 m = v / 256 - 1;
173
174 /*
175 * Get the values from *c1 and *c2 as uint8_t (each will
176 * be in the range 0 to 255, or 0x00 to 0xff);
177 * Convert them to signed int values (still in the
178 * range 0 to 255);
179 * Subtract them using signed arithmetic, yielding a
180 * result in the range -255 to +255;
181 * Convert to uint16_t, yielding a result in the range
182 * 0xff01 to 0xffff (for what was previously -255 to
183 * -1), or 0, or in the range 0x0001 to 0x00ff (for what
184 * was previously +1 to +255).
185 */d = (uint16_t) ((int) *c1 - (int) *c2);
186
187 /*
188 * If the low 8 bits of r were previously 0, then m
189 * is now 0xffff, so (d & m) is the same as d, so we
190 * effectively copy d to r;
191 * Otherwise, if r was previously non-zero, then m is
192 * now 0, so (d & m) is zero, so leave r unchanged.
193 * Note that the low 8 bits of d will be zero if and
194 * only if d == 0, which happens when *c1 == *c2.
195 * The low 8 bits of r are thus zero if and only if the
196 * entirety of r is zero, which happens if and only if
197 * all bytes compared so far were equal. As soon as a
198 * non-zero value is stored in r, it remains unchanged
199 * for the remainder of the loop.
200 */r |= (d & m);
201
202 /*
203 * Increment pointers, decrement length, and loop.
204 */
205 ++c1;
206 ++c2;
207 --len;
208 }
209
210 /*
211 * At this point, r is an unsigned value, which will be 0 if the
212 * final result should be zero, or in the range 0x0001 to 0x00ff
213 * (1 to 255) if the final result should be positive, or in the
214 * range 0xff01 to 0xffff (65281 to 65535) if the final result
215 * should be negative.
216 *
217 * We want to convert the unsigned values in the range 0xff01
218 * to 0xffff to signed values in the range -255 to -1, while
219 * converting the other unsigned values to equivalent signed
220 * values (0, or +1 to +255).
221 *
222 * On a machine with two's complement arithmetic, simply copying
223 * the underlying bits (with sign extension if int is wider than
224 * 16 bits) would do the job, so something like this might work:
225 *
226 * return (int16_t)r;
227 *
228 * However, that invokes implementation-defined behaviour,
229 * because values larger than 32767 can't fit in a signed 16-bit
230 * integer without overflow.
231 *
232 * To avoid any implementation-defined behaviour, we go through
233 * these contortions:
234 *
235 * a. Calculate ((uint32_t)r + 0x8000). The cast to uint32_t
236 * it to prevent problems on platforms where int is narrower
237 * than 32 bits. If int is a larger than 32-bits, then the
238 * usual arithmetic conversions cause this addition to be
239 * done in unsigned int arithmetic. If int is 32 bits
240 * or narrower, then this addition is done in uint32_t
241 * arithmetic. In either case, no overflow or wraparound
242 * occurs, and the result from this step has a value that
243 * will be one of 0x00008000 (32768), or in the range
244 * 0x00008001 to 0x000080ff (32769 to 33023), or in the range
245 * 0x00017f01 to 0x00017fff (98049 to 98303).
246 *
247 * b. Cast the result from (a) to uint16_t. This effectively
248 * discards the high bits of the result, in a way that is
249 * well defined by the C language. The result from this step
250 * will be of type uint16_t, and its value will be one of
251 * 0x8000 (32768), or in the range 0x8001 to 0x80ff (32769 to
252 * 33023), or in the range 0x7f01 to 0x7fff (32513 to
253 * 32767).
254 *
255 * c. Cast the result from (b) to int32_t. We use int32_t
256 * instead of int because we need a type that's strictly
257 * larger than 16 bits, and the C standard allows
258 * implementations where int is only 16 bits. The result
259 * from this step will be of type int32_t, and its value will
260 * be one of 0x00008000 (32768), or in the range 0x00008001
261 * to 0x000080ff (32769 to 33023), or in the range 0x00007f01
262 * to 0x00007fff (32513 to 32767).
263 *
264 * d. Take the result from (c) and subtract 0x8000 (32768) using
265 * signed int32_t arithmetic. The result from this step will
266 * be of type int32_t and the value will be one of
267 * 0x00000000 (0), or in the range 0x00000001 to 0x000000ff
268 * (+1 to +255), or in the range 0xffffff01 to 0xffffffff
269 * (-255 to -1).
270 *
271 * e. Cast the result from (d) to int. This does nothing
272 * interesting, except to make explicit what would have been
273 * implicit in the return statement. The final result is an
274 * int in the range -255 to +255.
275 *
276 * Unfortunately, compilers don't seem to be good at figuring
277 * out that most of this can be optimised away by careful choice
278 * of register width and sign extension.
279 *
280 */return (/*e*/ int) (/*d*/
281 (/*c*/ int32_t) (/*b*/ uint16_t) (/*a*/ (uint32_t) r + 0x8000)
282 - 0x8000);
283}
#define consttime_memcmp
static mp_limb_t d[(((256)+GMP_NUMB_BITS - 1)/GMP_NUMB_BITS)]
static struct GNUNET_ARM_MonitorHandle * m
Monitor connection with ARM.
Definition: gnunet-arm.c:103