1 | /* Functions to compute MD5 message digest of files or memory blocks. |
2 | according to the definition of MD5 in RFC 1321 from April 1992. |
3 | Copyright (C) 1995-1997, 1999-2001, 2005-2006, 2008-2012 Free Software |
4 | Foundation, Inc. |
5 | This file is part of the GNU C Library. |
6 | |
7 | This program is free software; you can redistribute it and/or modify it |
8 | under the terms of the GNU Lesser General Public License as published by the |
9 | Free Software Foundation; either version 2.1, or (at your option) any |
10 | later version. |
11 | |
12 | This program is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | GNU Lesser General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public License |
18 | along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995. */ |
21 | |
22 | #include <config.h> |
23 | |
24 | #include "md5.h" |
25 | |
26 | #include <stdalign.h> |
27 | #include <stdint.h> |
28 | #include <stdlib.h> |
29 | #include <string.h> |
30 | #include <sys/types.h> |
31 | |
32 | #if USE_UNLOCKED_IO |
33 | # include "unlocked-io.h" |
34 | #endif |
35 | |
36 | #ifdef _LIBC |
37 | # include <endian.h> |
38 | # if __BYTE_ORDER == __BIG_ENDIAN |
39 | # define WORDS_BIGENDIAN 1 |
40 | # endif |
41 | /* We need to keep the namespace clean so define the MD5 function |
42 | protected using leading __ . */ |
43 | # define md5_init_ctx __md5_init_ctx |
44 | # define md5_process_block __md5_process_block |
45 | # define md5_process_bytes __md5_process_bytes |
46 | # define md5_finish_ctx __md5_finish_ctx |
47 | # define md5_read_ctx __md5_read_ctx |
48 | # define md5_stream __md5_stream |
49 | # define md5_buffer __md5_buffer |
50 | #endif |
51 | |
52 | #ifdef WORDS_BIGENDIAN |
53 | # define SWAP(n) \ |
54 | (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) |
55 | #else |
56 | # define SWAP(n) (n) |
57 | #endif |
58 | |
59 | #define BLOCKSIZE 32768 |
60 | #if BLOCKSIZE % 64 != 0 |
61 | # error "invalid BLOCKSIZE" |
62 | #endif |
63 | |
64 | /* This array contains the bytes used to pad the buffer to the next |
65 | 64-byte boundary. (RFC 1321, 3.1: Step 1) */ |
66 | static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ }; |
67 | |
68 | |
69 | /* Initialize structure containing state of computation. |
70 | (RFC 1321, 3.3: Step 3) */ |
71 | void |
72 | md5_init_ctx (struct md5_ctx *ctx) |
73 | { |
74 | ctx->A = 0x67452301; |
75 | ctx->B = 0xefcdab89; |
76 | ctx->C = 0x98badcfe; |
77 | ctx->D = 0x10325476; |
78 | |
79 | ctx->total[0] = ctx->total[1] = 0; |
80 | ctx->buflen = 0; |
81 | } |
82 | |
83 | /* Copy the 4 byte value from v into the memory location pointed to by *cp, |
84 | If your architecture allows unaligned access this is equivalent to |
85 | * (uint32_t *) cp = v */ |
86 | static inline void |
87 | set_uint32 (char *cp, uint32_t v) |
88 | { |
89 | memcpy (cp, &v, sizeof v); |
90 | } |
91 | |
92 | /* Put result from CTX in first 16 bytes following RESBUF. The result |
93 | must be in little endian byte order. */ |
94 | void * |
95 | md5_read_ctx (const struct md5_ctx *ctx, void *resbuf) |
96 | { |
97 | char *r = resbuf; |
98 | set_uint32 (r + 0 * sizeof ctx->A, SWAP (ctx->A)); |
99 | set_uint32 (r + 1 * sizeof ctx->B, SWAP (ctx->B)); |
100 | set_uint32 (r + 2 * sizeof ctx->C, SWAP (ctx->C)); |
101 | set_uint32 (r + 3 * sizeof ctx->D, SWAP (ctx->D)); |
102 | |
103 | return resbuf; |
104 | } |
105 | |
106 | /* Process the remaining bytes in the internal buffer and the usual |
107 | prolog according to the standard and write the result to RESBUF. */ |
108 | void * |
109 | md5_finish_ctx (struct md5_ctx *ctx, void *resbuf) |
110 | { |
111 | /* Take yet unprocessed bytes into account. */ |
112 | uint32_t bytes = ctx->buflen; |
113 | size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4; |
114 | |
115 | /* Now count remaining bytes. */ |
116 | ctx->total[0] += bytes; |
117 | if (ctx->total[0] < bytes) |
118 | ++ctx->total[1]; |
119 | |
120 | /* Put the 64-bit file length in *bits* at the end of the buffer. */ |
121 | ctx->buffer[size - 2] = SWAP (ctx->total[0] << 3); |
122 | ctx->buffer[size - 1] = SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)); |
123 | |
124 | memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes); |
125 | |
126 | /* Process last bytes. */ |
127 | md5_process_block (ctx->buffer, size * 4, ctx); |
128 | |
129 | return md5_read_ctx (ctx, resbuf); |
130 | } |
131 | |
132 | /* Compute MD5 message digest for bytes read from STREAM. The |
133 | resulting message digest number will be written into the 16 bytes |
134 | beginning at RESBLOCK. */ |
135 | int |
136 | md5_stream (FILE *stream, void *resblock) |
137 | { |
138 | struct md5_ctx ctx; |
139 | size_t sum; |
140 | |
141 | char *buffer = malloc (BLOCKSIZE + 72); |
142 | if (!buffer) |
143 | return 1; |
144 | |
145 | /* Initialize the computation context. */ |
146 | md5_init_ctx (&ctx); |
147 | |
148 | /* Iterate over full file contents. */ |
149 | while (1) |
150 | { |
151 | /* We read the file in blocks of BLOCKSIZE bytes. One call of the |
152 | computation function processes the whole buffer so that with the |
153 | next round of the loop another block can be read. */ |
154 | size_t n; |
155 | sum = 0; |
156 | |
157 | /* Read block. Take care for partial reads. */ |
158 | while (1) |
159 | { |
160 | n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream); |
161 | |
162 | sum += n; |
163 | |
164 | if (sum == BLOCKSIZE) |
165 | break; |
166 | |
167 | if (n == 0) |
168 | { |
169 | /* Check for the error flag IFF N == 0, so that we don't |
170 | exit the loop after a partial read due to e.g., EAGAIN |
171 | or EWOULDBLOCK. */ |
172 | if (ferror (stream)) |
173 | { |
174 | free (buffer); |
175 | return 1; |
176 | } |
177 | goto process_partial_block; |
178 | } |
179 | |
180 | /* We've read at least one byte, so ignore errors. But always |
181 | check for EOF, since feof may be true even though N > 0. |
182 | Otherwise, we could end up calling fread after EOF. */ |
183 | if (feof (stream)) |
184 | goto process_partial_block; |
185 | } |
186 | |
187 | /* Process buffer with BLOCKSIZE bytes. Note that |
188 | BLOCKSIZE % 64 == 0 |
189 | */ |
190 | md5_process_block (buffer, BLOCKSIZE, &ctx); |
191 | } |
192 | |
193 | process_partial_block: |
194 | |
195 | /* Process any remaining bytes. */ |
196 | if (sum > 0) |
197 | md5_process_bytes (buffer, sum, &ctx); |
198 | |
199 | /* Construct result in desired memory. */ |
200 | md5_finish_ctx (&ctx, resblock); |
201 | free (buffer); |
202 | return 0; |
203 | } |
204 | |
205 | /* Compute MD5 message digest for LEN bytes beginning at BUFFER. The |
206 | result is always in little endian byte order, so that a byte-wise |
207 | output yields to the wanted ASCII representation of the message |
208 | digest. */ |
209 | void * |
210 | md5_buffer (const char *buffer, size_t len, void *resblock) |
211 | { |
212 | struct md5_ctx ctx; |
213 | |
214 | /* Initialize the computation context. */ |
215 | md5_init_ctx (&ctx); |
216 | |
217 | /* Process whole buffer but last len % 64 bytes. */ |
218 | md5_process_bytes (buffer, len, &ctx); |
219 | |
220 | /* Put result in desired memory area. */ |
221 | return md5_finish_ctx (&ctx, resblock); |
222 | } |
223 | |
224 | |
225 | void |
226 | md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx) |
227 | { |
228 | /* When we already have some bits in our internal buffer concatenate |
229 | both inputs first. */ |
230 | if (ctx->buflen != 0) |
231 | { |
232 | size_t left_over = ctx->buflen; |
233 | size_t add = 128 - left_over > len ? len : 128 - left_over; |
234 | |
235 | memcpy (&((char *) ctx->buffer)[left_over], buffer, add); |
236 | ctx->buflen += add; |
237 | |
238 | if (ctx->buflen > 64) |
239 | { |
240 | md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); |
241 | |
242 | ctx->buflen &= 63; |
243 | /* The regions in the following copy operation cannot overlap. */ |
244 | memcpy (ctx->buffer, |
245 | &((char *) ctx->buffer)[(left_over + add) & ~63], |
246 | ctx->buflen); |
247 | } |
248 | |
249 | buffer = (const char *) buffer + add; |
250 | len -= add; |
251 | } |
252 | |
253 | /* Process available complete blocks. */ |
254 | if (len >= 64) |
255 | { |
256 | #if !_STRING_ARCH_unaligned |
257 | # define UNALIGNED_P(p) ((uintptr_t) (p) % alignof (uint32_t) != 0) |
258 | if (UNALIGNED_P (buffer)) |
259 | while (len > 64) |
260 | { |
261 | md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); |
262 | buffer = (const char *) buffer + 64; |
263 | len -= 64; |
264 | } |
265 | else |
266 | #endif |
267 | { |
268 | md5_process_block (buffer, len & ~63, ctx); |
269 | buffer = (const char *) buffer + (len & ~63); |
270 | len &= 63; |
271 | } |
272 | } |
273 | |
274 | /* Move remaining bytes in internal buffer. */ |
275 | if (len > 0) |
276 | { |
277 | size_t left_over = ctx->buflen; |
278 | |
279 | memcpy (&((char *) ctx->buffer)[left_over], buffer, len); |
280 | left_over += len; |
281 | if (left_over >= 64) |
282 | { |
283 | md5_process_block (ctx->buffer, 64, ctx); |
284 | left_over -= 64; |
285 | memcpy (ctx->buffer, &ctx->buffer[16], left_over); |
286 | } |
287 | ctx->buflen = left_over; |
288 | } |
289 | } |
290 | |
291 | |
292 | /* These are the four functions used in the four steps of the MD5 algorithm |
293 | and defined in the RFC 1321. The first function is a little bit optimized |
294 | (as found in Colin Plumbs public domain implementation). */ |
295 | /* #define FF(b, c, d) ((b & c) | (~b & d)) */ |
296 | #define FF(b, c, d) (d ^ (b & (c ^ d))) |
297 | #define FG(b, c, d) FF (d, b, c) |
298 | #define FH(b, c, d) (b ^ c ^ d) |
299 | #define FI(b, c, d) (c ^ (b | ~d)) |
300 | |
301 | /* Process LEN bytes of BUFFER, accumulating context into CTX. |
302 | It is assumed that LEN % 64 == 0. */ |
303 | |
304 | void |
305 | md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx) |
306 | { |
307 | uint32_t correct_words[16]; |
308 | const uint32_t *words = buffer; |
309 | size_t nwords = len / sizeof (uint32_t); |
310 | const uint32_t *endp = words + nwords; |
311 | uint32_t A = ctx->A; |
312 | uint32_t B = ctx->B; |
313 | uint32_t C = ctx->C; |
314 | uint32_t D = ctx->D; |
315 | uint32_t lolen = len; |
316 | |
317 | /* First increment the byte count. RFC 1321 specifies the possible |
318 | length of the file up to 2^64 bits. Here we only compute the |
319 | number of bytes. Do a double word increment. */ |
320 | ctx->total[0] += lolen; |
321 | ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen); |
322 | |
323 | /* Process all bytes in the buffer with 64 bytes in each round of |
324 | the loop. */ |
325 | while (words < endp) |
326 | { |
327 | uint32_t *cwp = correct_words; |
328 | uint32_t A_save = A; |
329 | uint32_t B_save = B; |
330 | uint32_t C_save = C; |
331 | uint32_t D_save = D; |
332 | |
333 | /* First round: using the given function, the context and a constant |
334 | the next context is computed. Because the algorithms processing |
335 | unit is a 32-bit word and it is determined to work on words in |
336 | little endian byte order we perhaps have to change the byte order |
337 | before the computation. To reduce the work for the next steps |
338 | we store the swapped words in the array CORRECT_WORDS. */ |
339 | |
340 | #define OP(a, b, c, d, s, T) \ |
341 | do \ |
342 | { \ |
343 | a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \ |
344 | ++words; \ |
345 | CYCLIC (a, s); \ |
346 | a += b; \ |
347 | } \ |
348 | while (0) |
349 | |
350 | /* It is unfortunate that C does not provide an operator for |
351 | cyclic rotation. Hope the C compiler is smart enough. */ |
352 | #define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s))) |
353 | |
354 | /* Before we start, one word to the strange constants. |
355 | They are defined in RFC 1321 as |
356 | |
357 | T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 |
358 | |
359 | Here is an equivalent invocation using Perl: |
360 | |
361 | perl -e 'foreach(1..64){printf "0x%08x\n", int (4294967296 * abs (sin $_))}' |
362 | */ |
363 | |
364 | /* Round 1. */ |
365 | OP (A, B, C, D, 7, 0xd76aa478); |
366 | OP (D, A, B, C, 12, 0xe8c7b756); |
367 | OP (C, D, A, B, 17, 0x242070db); |
368 | OP (B, C, D, A, 22, 0xc1bdceee); |
369 | OP (A, B, C, D, 7, 0xf57c0faf); |
370 | OP (D, A, B, C, 12, 0x4787c62a); |
371 | OP (C, D, A, B, 17, 0xa8304613); |
372 | OP (B, C, D, A, 22, 0xfd469501); |
373 | OP (A, B, C, D, 7, 0x698098d8); |
374 | OP (D, A, B, C, 12, 0x8b44f7af); |
375 | OP (C, D, A, B, 17, 0xffff5bb1); |
376 | OP (B, C, D, A, 22, 0x895cd7be); |
377 | OP (A, B, C, D, 7, 0x6b901122); |
378 | OP (D, A, B, C, 12, 0xfd987193); |
379 | OP (C, D, A, B, 17, 0xa679438e); |
380 | OP (B, C, D, A, 22, 0x49b40821); |
381 | |
382 | /* For the second to fourth round we have the possibly swapped words |
383 | in CORRECT_WORDS. Redefine the macro to take an additional first |
384 | argument specifying the function to use. */ |
385 | #undef OP |
386 | #define OP(f, a, b, c, d, k, s, T) \ |
387 | do \ |
388 | { \ |
389 | a += f (b, c, d) + correct_words[k] + T; \ |
390 | CYCLIC (a, s); \ |
391 | a += b; \ |
392 | } \ |
393 | while (0) |
394 | |
395 | /* Round 2. */ |
396 | OP (FG, A, B, C, D, 1, 5, 0xf61e2562); |
397 | OP (FG, D, A, B, C, 6, 9, 0xc040b340); |
398 | OP (FG, C, D, A, B, 11, 14, 0x265e5a51); |
399 | OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); |
400 | OP (FG, A, B, C, D, 5, 5, 0xd62f105d); |
401 | OP (FG, D, A, B, C, 10, 9, 0x02441453); |
402 | OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); |
403 | OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); |
404 | OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); |
405 | OP (FG, D, A, B, C, 14, 9, 0xc33707d6); |
406 | OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); |
407 | OP (FG, B, C, D, A, 8, 20, 0x455a14ed); |
408 | OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); |
409 | OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); |
410 | OP (FG, C, D, A, B, 7, 14, 0x676f02d9); |
411 | OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); |
412 | |
413 | /* Round 3. */ |
414 | OP (FH, A, B, C, D, 5, 4, 0xfffa3942); |
415 | OP (FH, D, A, B, C, 8, 11, 0x8771f681); |
416 | OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); |
417 | OP (FH, B, C, D, A, 14, 23, 0xfde5380c); |
418 | OP (FH, A, B, C, D, 1, 4, 0xa4beea44); |
419 | OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); |
420 | OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); |
421 | OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); |
422 | OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); |
423 | OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); |
424 | OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); |
425 | OP (FH, B, C, D, A, 6, 23, 0x04881d05); |
426 | OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); |
427 | OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); |
428 | OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); |
429 | OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); |
430 | |
431 | /* Round 4. */ |
432 | OP (FI, A, B, C, D, 0, 6, 0xf4292244); |
433 | OP (FI, D, A, B, C, 7, 10, 0x432aff97); |
434 | OP (FI, C, D, A, B, 14, 15, 0xab9423a7); |
435 | OP (FI, B, C, D, A, 5, 21, 0xfc93a039); |
436 | OP (FI, A, B, C, D, 12, 6, 0x655b59c3); |
437 | OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); |
438 | OP (FI, C, D, A, B, 10, 15, 0xffeff47d); |
439 | OP (FI, B, C, D, A, 1, 21, 0x85845dd1); |
440 | OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); |
441 | OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); |
442 | OP (FI, C, D, A, B, 6, 15, 0xa3014314); |
443 | OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); |
444 | OP (FI, A, B, C, D, 4, 6, 0xf7537e82); |
445 | OP (FI, D, A, B, C, 11, 10, 0xbd3af235); |
446 | OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); |
447 | OP (FI, B, C, D, A, 9, 21, 0xeb86d391); |
448 | |
449 | /* Add the starting values of the context. */ |
450 | A += A_save; |
451 | B += B_save; |
452 | C += C_save; |
453 | D += D_save; |
454 | } |
455 | |
456 | /* Put checksum in context given as argument. */ |
457 | ctx->A = A; |
458 | ctx->B = B; |
459 | ctx->C = C; |
460 | ctx->D = D; |
461 | } |
462 | |