github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/common_zlib.h (about) 1 // Copyright 2022 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 //% This code is derived from puff.{c,h}, found in the zlib development. The 5 //% original files come with the following copyright notice: 6 7 //% Copyright (C) 2002-2013 Mark Adler, all rights reserved 8 //% version 2.3, 21 Jan 2013 9 //% This software is provided 'as-is', without any express or implied 10 //% warranty. In no event will the author be held liable for any damages 11 //% arising from the use of this software. 12 //% Permission is granted to anyone to use this software for any purpose, 13 //% including commercial applications, and to alter it and redistribute it 14 //% freely, subject to the following restrictions: 15 //% 1. The origin of this software must not be misrepresented; you must not 16 //% claim that you wrote the original software. If you use this software 17 //% in a product, an acknowledgment in the product documentation would be 18 //% appreciated but is not required. 19 //% 2. Altered source versions must be plainly marked as such, and must not be 20 //% misrepresented as being the original software. 21 //% 3. This notice may not be removed or altered from any source distribution. 22 //% Mark Adler madler@alumni.caltech.edu 23 24 //% BEGIN CODE DERIVED FROM puff.{c,h} 25 26 // All dynamically allocated memory comes from the stack. The stack required 27 // is less than 2K bytes. This code is compatible with 16-bit int's and 28 // assumes that long's are at least 32 bits. puff.c uses the short data type, 29 // assumed to be 16 bits, for arrays in order to conserve memory. The code 30 // works whether integers are stored big endian or little endian. 31 32 #include <setjmp.h> // for setjmp(), longjmp(), and jmp_buf 33 34 // Maximums for allocations and loops. 35 #define MAXBITS 15 // maximum bits in a code 36 #define MAXLCODES 286 // maximum number of literal/length codes 37 #define MAXDCODES 30 // maximum number of distance codes 38 #define MAXCODES (MAXLCODES + MAXDCODES) // maximum codes lengths to read 39 #define FIXLCODES 288 // number of fixed literal/length codes 40 41 struct puff_state { 42 // output state 43 unsigned char* out; // output buffer 44 unsigned long outlen; // available space at out 45 unsigned long outcnt; // bytes written to out so far 46 47 // input state 48 const unsigned char* in; // input buffer 49 unsigned long inlen; // available input at in 50 unsigned long incnt; // bytes read so far 51 int bitbuf; // bit buffer 52 int bitcnt; // number of bits in bit buffer 53 54 // input limit error return state for bits() and decode() 55 jmp_buf env; 56 }; 57 58 // Return need bits from the input stream. This always leaves less than 59 // eight bits in the buffer. bits() works properly for need == 0. 60 static int puff_bits(struct puff_state* s, int need) 61 { 62 // bit accumulator (can use up to 20 bits) 63 // load at least need bits into val 64 long val = s->bitbuf; 65 while (s->bitcnt < need) { 66 if (s->incnt == s->inlen) 67 longjmp(s->env, 1); // out of input 68 val |= (long)(s->in[s->incnt++]) << s->bitcnt; // load eight bits 69 s->bitcnt += 8; 70 } 71 72 // drop need bits and update buffer, always zero to seven bits left 73 s->bitbuf = (int)(val >> need); 74 s->bitcnt -= need; 75 76 // return need bits, zeroing the bits above that 77 return (int)(val & ((1L << need) - 1)); 78 } 79 80 // Process a stored block. 81 static int puff_stored(struct puff_state* s) 82 { 83 // discard leftover bits from current byte (assumes s->bitcnt < 8) 84 s->bitbuf = 0; 85 s->bitcnt = 0; 86 87 // get length and check against its one's complement 88 if (s->incnt + 4 > s->inlen) 89 return 2; // not enough input 90 unsigned len = s->in[s->incnt++]; // length of stored block 91 len |= s->in[s->incnt++] << 8; 92 if (s->in[s->incnt++] != (~len & 0xff) || 93 s->in[s->incnt++] != ((~len >> 8) & 0xff)) 94 return -2; // didn't match complement! 95 96 // copy len bytes from in to out 97 if (s->incnt + len > s->inlen) 98 return 2; // not enough input 99 if (s->outcnt + len > s->outlen) 100 return 1; // not enough output space 101 for (; len--; s->outcnt++, s->incnt++) { 102 if (s->in[s->incnt]) 103 s->out[s->outcnt] = s->in[s->incnt]; 104 } 105 106 // done with a valid stored block 107 return 0; 108 } 109 110 // Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of 111 // each length, which for a canonical code are stepped through in order. 112 // symbol[] are the symbol values in canonical order, where the number of 113 // entries is the sum of the counts in count[]. The decoding process can be 114 // seen in the function decode() below. 115 struct puff_huffman { 116 short* count; // number of symbols of each length 117 short* symbol; // canonically ordered symbols 118 }; 119 120 // Decode a code from the stream s using huffman table h. Return the symbol or 121 // a negative value if there is an error. If all of the lengths are zero, i.e. 122 // an empty code, or if the code is incomplete and an invalid code is received, 123 // then -10 is returned after reading MAXBITS bits. 124 static int puff_decode(struct puff_state* s, const struct puff_huffman* h) 125 { 126 int first = 0; // first code of length len 127 int index = 0; // index of first code of length len in symbol table 128 int bitbuf = s->bitbuf; // bits from stream 129 int left = s->bitcnt; // bits left in next or left to process 130 int code = first = index = 0; // len bits being decoded 131 int len = 1; // current number of bits in code 132 short* next = h->count + 1; // next number of codes 133 while (1) { 134 while (left--) { 135 code |= bitbuf & 1; 136 bitbuf >>= 1; 137 int count = *next++; // number of codes of length len 138 if (code - count < first) { // if length len, return symbol 139 s->bitbuf = bitbuf; 140 s->bitcnt = (s->bitcnt - len) & 7; 141 return h->symbol[index + (code - first)]; 142 } 143 index += count; // else update for next length 144 first += count; 145 first <<= 1; 146 code <<= 1; 147 len++; 148 } 149 left = (MAXBITS + 1) - len; 150 if (left == 0) 151 break; 152 if (s->incnt == s->inlen) 153 longjmp(s->env, 1); // out of input 154 bitbuf = s->in[s->incnt++]; 155 if (left > 8) 156 left = 8; 157 } 158 return -10; // ran out of codes 159 } 160 161 // Given the list of code lengths length[0..n-1] representing a canonical 162 // Huffman code for n symbols, construct the tables required to decode those 163 // codes. Those tables are the number of codes of each length, and the symbols 164 // sorted by length, retaining their original order within each length. The 165 // return value is zero for a complete code set, negative for an over- 166 // subscribed code set, and positive for an incomplete code set. The tables 167 // can be used if the return value is zero or positive, but they cannot be used 168 // if the return value is negative. If the return value is zero, it is not 169 // possible for decode() using that table to return an error--any stream of 170 // enough bits will resolve to a symbol. If the return value is positive, then 171 // it is possible for decode() using that table to return an error for received 172 // codes past the end of the incomplete lengths. 173 174 // Not used by decode(), but used for error checking, h->count[0] is the number 175 // of the n symbols not in the code. So n - h->count[0] is the number of 176 // codes. This is useful for checking for incomplete codes that have more than 177 // one symbol, which is an error in a dynamic block. 178 179 // Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS 180 // This is assured by the construction of the length arrays in dynamic() and 181 // fixed() and is not verified by construct(). 182 static int puff_construct(struct puff_huffman* h, const short* length, int n) 183 { 184 // count number of codes of each length 185 int len; // current length when stepping through h->count[] 186 for (len = 0; len <= MAXBITS; len++) 187 h->count[len] = 0; 188 int symbol; // current symbol when stepping through length[] 189 for (symbol = 0; symbol < n; symbol++) 190 (h->count[length[symbol]])++; // assumes lengths are within bounds 191 if (h->count[0] == n) // no codes! 192 return 0; // complete, but decode() will fail 193 194 // check for an over-subscribed or incomplete set of lengths 195 int left = 1; // one possible code of zero length 196 for (len = 1; len <= MAXBITS; len++) { 197 left <<= 1; // one more bit, double codes left 198 left -= h->count[len]; // deduct count from possible codes 199 if (left < 0) 200 return left; // over-subscribed--return negative 201 } // left > 0 means incomplete 202 203 // generate offsets into symbol table for each length for sorting 204 short offs[MAXBITS + 1]; 205 offs[1] = 0; 206 for (len = 1; len < MAXBITS; len++) 207 offs[len + 1] = offs[len] + h->count[len]; 208 209 // put symbols in table sorted by length, by symbol order within each length 210 for (symbol = 0; symbol < n; symbol++) 211 if (length[symbol] != 0) 212 h->symbol[offs[length[symbol]]++] = symbol; 213 214 // return zero for complete set, positive for incomplete set 215 return left; 216 } 217 218 // Decode literal/length and distance codes until an end-of-block code. 219 static int puff_codes(struct puff_state* s, 220 const struct puff_huffman* lencode, 221 const struct puff_huffman* distcode) 222 { 223 static const short lens[29] = {// Size base for length codes 257..285 224 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 225 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; 226 static const short lext[29] = {// Extra bits for length codes 257..285 227 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 228 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; 229 static const short dists[30] = {// Offset base for distance codes 0..29 230 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 231 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 232 8193, 12289, 16385, 24577}; 233 static const short dext[30] = {// Extra bits for distance codes 0..29 234 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 235 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 236 12, 12, 13, 13}; 237 238 // decode literals and length/distance pairs 239 int symbol; // decoded symbol 240 do { 241 symbol = puff_decode(s, lencode); 242 if (symbol < 0) 243 return symbol; // invalid symbol 244 if (symbol < 256) { // literal: symbol is the byte 245 // write out the literal 246 if (s->outcnt == s->outlen) 247 return 1; 248 if (symbol) 249 s->out[s->outcnt] = symbol; 250 s->outcnt++; 251 } else if (symbol > 256) { // length 252 // get and compute length 253 symbol -= 257; 254 if (symbol >= 29) 255 return -10; // invalid fixed code 256 int len = lens[symbol] + puff_bits(s, lext[symbol]); 257 258 // get and check distance 259 symbol = puff_decode(s, distcode); 260 if (symbol < 0) 261 return symbol; // invalid symbol 262 unsigned dist = dists[symbol] + puff_bits(s, dext[symbol]); 263 if (dist > s->outcnt) 264 return -11; // distance too far back 265 266 // copy length bytes from distance bytes back 267 if (s->outcnt + len > s->outlen) 268 return 1; 269 while (len--) { 270 if (dist <= s->outcnt && s->out[s->outcnt - dist]) 271 s->out[s->outcnt] = s->out[s->outcnt - dist]; 272 s->outcnt++; 273 } 274 } 275 } while (symbol != 256); // end of block symbol 276 277 // done with a valid fixed or dynamic block 278 return 0; 279 } 280 281 // Process a fixed codes block. 282 static int puff_fixed(struct puff_state* s) 283 { 284 static int virgin = 1; 285 static short lencnt[MAXBITS + 1], lensym[FIXLCODES]; 286 static short distcnt[MAXBITS + 1], distsym[MAXDCODES]; 287 static struct puff_huffman lencode, distcode; 288 289 // build fixed huffman tables if first call (may not be thread safe) 290 if (virgin) { 291 // construct lencode and distcode 292 lencode.count = lencnt; 293 lencode.symbol = lensym; 294 distcode.count = distcnt; 295 distcode.symbol = distsym; 296 297 // literal/length table 298 short lengths[FIXLCODES]; 299 int symbol; 300 for (symbol = 0; symbol < 144; symbol++) 301 lengths[symbol] = 8; 302 for (; symbol < 256; symbol++) 303 lengths[symbol] = 9; 304 for (; symbol < 280; symbol++) 305 lengths[symbol] = 7; 306 for (; symbol < FIXLCODES; symbol++) 307 lengths[symbol] = 8; 308 puff_construct(&lencode, lengths, FIXLCODES); 309 310 // distance table 311 for (symbol = 0; symbol < MAXDCODES; symbol++) 312 lengths[symbol] = 5; 313 puff_construct(&distcode, lengths, MAXDCODES); 314 315 // do this just once 316 virgin = 0; 317 } 318 319 // decode data until end-of-block code 320 return puff_codes(s, &lencode, &distcode); 321 } 322 323 // Process a dynamic codes block. 324 static int puff_dynamic(struct puff_state* s) 325 { 326 static const short order[19] = // permutation of code length codes 327 {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 328 329 // get number of lengths in each table, check lengths 330 int nlen = puff_bits(s, 5) + 257; // number of lengths in descriptor 331 int ndist = puff_bits(s, 5) + 1; 332 int ncode = puff_bits(s, 4) + 4; 333 if (nlen > MAXLCODES || ndist > MAXDCODES) 334 return -3; // bad counts 335 336 // read code length code lengths (really), missing lengths are zero 337 short lengths[MAXCODES]; // descriptor code lengths 338 int index; // index of lengths[] 339 for (index = 0; index < ncode; index++) 340 lengths[order[index]] = puff_bits(s, 3); 341 for (; index < 19; index++) 342 lengths[order[index]] = 0; 343 344 // build huffman table for code lengths codes (use lencode temporarily) 345 short lencnt[MAXBITS + 1], lensym[MAXLCODES]; // lencode memory 346 struct puff_huffman lencode = {lencnt, lensym}; // length codes 347 int err = puff_construct(&lencode, lengths, 19); 348 if (err != 0) // require complete code set here 349 return -4; 350 351 // read length/literal and distance code length tables 352 index = 0; 353 while (index < nlen + ndist) { 354 int symbol; // decoded value 355 int len; // last length to repeat 356 357 symbol = puff_decode(s, &lencode); 358 if (symbol < 0) 359 return symbol; // invalid symbol 360 if (symbol < 16) // length in 0..15 361 lengths[index++] = symbol; 362 else { // repeat instruction 363 len = 0; // assume repeating zeros 364 if (symbol == 16) { // repeat last length 3..6 times 365 if (index == 0) 366 return -5; // no last length! 367 len = lengths[index - 1]; // last length 368 symbol = 3 + puff_bits(s, 2); 369 } else if (symbol == 17) // repeat zero 3..10 times 370 symbol = 3 + puff_bits(s, 3); 371 else // == 18, repeat zero 11..138 times 372 symbol = 11 + puff_bits(s, 7); 373 if (index + symbol > nlen + ndist) 374 return -6; // too many lengths! 375 while (symbol--) // repeat last or zero symbol times 376 lengths[index++] = len; 377 } 378 } 379 380 // check for end-of-block code -- there better be one! 381 if (lengths[256] == 0) 382 return -9; 383 384 // build huffman table for literal/length codes 385 err = puff_construct(&lencode, lengths, nlen); 386 if (err && (err < 0 || nlen != lencode.count[0] + lencode.count[1])) 387 return -7; // incomplete code ok only for single length 1 code 388 389 // build huffman table for distance codes 390 short distcnt[MAXBITS + 1], distsym[MAXDCODES]; // distcode memory 391 struct puff_huffman distcode = {distcnt, distsym}; // distance codes 392 err = puff_construct(&distcode, lengths + nlen, ndist); 393 if (err && (err < 0 || ndist != distcode.count[0] + distcode.count[1])) 394 return -8; // incomplete code ok only for single length 1 code 395 396 // decode data until end-of-block code 397 return puff_codes(s, &lencode, &distcode); 398 } 399 400 // Inflate source to dest. On return, destlen and sourcelen are updated to the 401 // size of the uncompressed data and the size of the deflate data respectively. 402 // On success, the return value of puff() is zero. If there is an error in the 403 // source data, i.e. it is not in the deflate format, then a negative value is 404 // returned. If there is not enough input available or there is not enough 405 // output space, then a positive error is returned. In that case, destlen and 406 // sourcelen are not updated to facilitate retrying from the beginning with the 407 // provision of more input data or more output space. In the case of invalid 408 // inflate data (a negative error), the dest and source pointers are updated to 409 // facilitate the debugging of deflators. 410 411 // The return codes are: 412 413 // 2: available inflate data did not terminate 414 // 1: output space exhausted before completing inflate 415 // 0: successful inflate 416 // -1: invalid block type (type == 3) 417 // -2: stored block length did not match one's complement 418 // -3: dynamic block code description: too many length or distance codes 419 // -4: dynamic block code description: code lengths codes incomplete 420 // -5: dynamic block code description: repeat lengths with no first length 421 // -6: dynamic block code description: repeat more than specified lengths 422 // -7: dynamic block code description: invalid literal/length code lengths 423 // -8: dynamic block code description: invalid distance code lengths 424 // -9: dynamic block code description: missing end-of-block code 425 // -10: invalid literal/length or distance code in fixed or dynamic block 426 // -11: distance is too far back in fixed or dynamic block 427 static int puff( 428 unsigned char* dest, // pointer to destination pointer 429 unsigned long* destlen, // amount of output space 430 const unsigned char* source, // pointer to source data pointer 431 unsigned long sourcelen) // amount of input available 432 { 433 struct puff_state s = { 434 .out = dest, 435 .outlen = *destlen, 436 .outcnt = 0, 437 .in = source, 438 .inlen = sourcelen, 439 .incnt = 0, 440 .bitbuf = 0, 441 .bitcnt = 0, 442 }; 443 // return if bits() or decode() tries to read past available input 444 int err; // return value 445 if (setjmp(s.env) != 0) // if came back here via longjmp() 446 err = 2; // then skip do-loop, return error 447 else { 448 // process blocks until last block or error 449 int last; 450 do { 451 last = puff_bits(&s, 1); // one if last block 452 int type = puff_bits(&s, 2); // block type 0..3 453 err = type == 0 ? puff_stored(&s) : (type == 1 ? puff_fixed(&s) : (type == 2 ? puff_dynamic(&s) : -1)); // type == 3, invalid 454 if (err != 0) 455 break; // return with error 456 } while (!last); 457 } 458 459 *destlen = s.outcnt; 460 return err; 461 } 462 463 //% END CODE DERIVED FROM puff.{c,h} 464 465 #include <errno.h> 466 #include <sys/mman.h> 467 #define ZLIB_HEADER_WIDTH 2 // Two-byte zlib header width. 468 469 static int puff_zlib_to_file(const unsigned char* source, unsigned long sourcelen, int dest_fd) 470 { 471 // Ignore zlib header. 472 if (sourcelen < ZLIB_HEADER_WIDTH) 473 return 0; 474 source += ZLIB_HEADER_WIDTH; 475 sourcelen -= ZLIB_HEADER_WIDTH; 476 477 // Note: pkg/image/compression.go also knows this const. 478 const unsigned long max_destlen = 132 << 20; 479 void* ret = mmap(0, max_destlen, PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); 480 if (ret == MAP_FAILED) 481 return -1; 482 unsigned char* dest = (unsigned char*)ret; 483 484 // Inflate source array to destination file. 485 unsigned long destlen = max_destlen; // copy destlen as puff() may modify it 486 int err = puff(dest, &destlen, source, sourcelen); 487 if (err) { 488 munmap(dest, max_destlen); 489 errno = -err; 490 return -1; 491 } 492 if (write(dest_fd, dest, destlen) != (ssize_t)destlen) { 493 munmap(dest, max_destlen); 494 return -1; 495 } 496 // Unmap memory-mapped region 497 return munmap(dest, max_destlen); 498 }