| Lasse Collin | 3ebe124 | 2011-01-12 17:01:23 -0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd | 
|  | 3 | * | 
|  | 4 | * Author: Lasse Collin <lasse.collin@tukaani.org> | 
|  | 5 | * | 
|  | 6 | * This file has been put into the public domain. | 
|  | 7 | * You can do whatever you want with this file. | 
|  | 8 | */ | 
|  | 9 |  | 
|  | 10 | /* | 
|  | 11 | * Important notes about in-place decompression | 
|  | 12 | * | 
|  | 13 | * At least on x86, the kernel is decompressed in place: the compressed data | 
|  | 14 | * is placed to the end of the output buffer, and the decompressor overwrites | 
|  | 15 | * most of the compressed data. There must be enough safety margin to | 
|  | 16 | * guarantee that the write position is always behind the read position. | 
|  | 17 | * | 
|  | 18 | * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below. | 
|  | 19 | * Note that the margin with XZ is bigger than with Deflate (gzip)! | 
|  | 20 | * | 
|  | 21 | * The worst case for in-place decompression is that the beginning of | 
|  | 22 | * the file is compressed extremely well, and the rest of the file is | 
|  | 23 | * uncompressible. Thus, we must look for worst-case expansion when the | 
|  | 24 | * compressor is encoding uncompressible data. | 
|  | 25 | * | 
|  | 26 | * The structure of the .xz file in case of a compresed kernel is as follows. | 
|  | 27 | * Sizes (as bytes) of the fields are in parenthesis. | 
|  | 28 | * | 
|  | 29 | *    Stream Header (12) | 
|  | 30 | *    Block Header: | 
|  | 31 | *      Block Header (8-12) | 
|  | 32 | *      Compressed Data (N) | 
|  | 33 | *      Block Padding (0-3) | 
|  | 34 | *      CRC32 (4) | 
|  | 35 | *    Index (8-20) | 
|  | 36 | *    Stream Footer (12) | 
|  | 37 | * | 
|  | 38 | * Normally there is exactly one Block, but let's assume that there are | 
|  | 39 | * 2-4 Blocks just in case. Because Stream Header and also Block Header | 
|  | 40 | * of the first Block don't make the decompressor produce any uncompressed | 
|  | 41 | * data, we can ignore them from our calculations. Block Headers of possible | 
|  | 42 | * additional Blocks have to be taken into account still. With these | 
|  | 43 | * assumptions, it is safe to assume that the total header overhead is | 
|  | 44 | * less than 128 bytes. | 
|  | 45 | * | 
|  | 46 | * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ | 
|  | 47 | * doesn't change the size of the data, it is enough to calculate the | 
|  | 48 | * safety margin for LZMA2. | 
|  | 49 | * | 
|  | 50 | * LZMA2 stores the data in chunks. Each chunk has a header whose size is | 
|  | 51 | * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that | 
|  | 52 | * the maximum chunk header size is 8 bytes. After the chunk header, there | 
|  | 53 | * may be up to 64 KiB of actual payload in the chunk. Often the payload is | 
|  | 54 | * quite a bit smaller though; to be safe, let's assume that an average | 
|  | 55 | * chunk has only 32 KiB of payload. | 
|  | 56 | * | 
|  | 57 | * The maximum uncompressed size of the payload is 2 MiB. The minimum | 
|  | 58 | * uncompressed size of the payload is in practice never less than the | 
|  | 59 | * payload size itself. The LZMA2 format would allow uncompressed size | 
|  | 60 | * to be less than the payload size, but no sane compressor creates such | 
|  | 61 | * files. LZMA2 supports storing uncompressible data in uncompressed form, | 
|  | 62 | * so there's never a need to create payloads whose uncompressed size is | 
|  | 63 | * smaller than the compressed size. | 
|  | 64 | * | 
|  | 65 | * The assumption, that the uncompressed size of the payload is never | 
|  | 66 | * smaller than the payload itself, is valid only when talking about | 
|  | 67 | * the payload as a whole. It is possible that the payload has parts where | 
|  | 68 | * the decompressor consumes more input than it produces output. Calculating | 
|  | 69 | * the worst case for this would be tricky. Instead of trying to do that, | 
|  | 70 | * let's simply make sure that the decompressor never overwrites any bytes | 
|  | 71 | * of the payload which it is currently reading. | 
|  | 72 | * | 
|  | 73 | * Now we have enough information to calculate the safety margin. We need | 
|  | 74 | *   - 128 bytes for the .xz file format headers; | 
|  | 75 | *   - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header | 
|  | 76 | *     per chunk, each chunk having average payload size of 32 KiB); and | 
|  | 77 | *   - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that | 
|  | 78 | *     the decompressor never overwrites anything from the LZMA2 chunk | 
|  | 79 | *     payload it is currently reading. | 
|  | 80 | * | 
|  | 81 | * We get the following formula: | 
|  | 82 | * | 
|  | 83 | *    safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536 | 
|  | 84 | *                  = 128 + (uncompressed_size >> 12) + 65536 | 
|  | 85 | * | 
| Lucas De Marchi | 25985ed | 2011-03-30 22:57:33 -0300 | [diff] [blame] | 86 | * For comparison, according to arch/x86/boot/compressed/misc.c, the | 
| Lasse Collin | 3ebe124 | 2011-01-12 17:01:23 -0800 | [diff] [blame] | 87 | * equivalent formula for Deflate is this: | 
|  | 88 | * | 
|  | 89 | *    safety_margin = 18 + (uncompressed_size >> 12) + 32768 | 
|  | 90 | * | 
|  | 91 | * Thus, when updating Deflate-only in-place kernel decompressor to | 
|  | 92 | * support XZ, the fixed overhead has to be increased from 18+32768 bytes | 
|  | 93 | * to 128+65536 bytes. | 
|  | 94 | */ | 
|  | 95 |  | 
|  | 96 | /* | 
|  | 97 | * STATIC is defined to "static" if we are being built for kernel | 
|  | 98 | * decompression (pre-boot code). <linux/decompress/mm.h> will define | 
|  | 99 | * STATIC to empty if it wasn't already defined. Since we will need to | 
|  | 100 | * know later if we are being used for kernel decompression, we define | 
|  | 101 | * XZ_PREBOOT here. | 
|  | 102 | */ | 
|  | 103 | #ifdef STATIC | 
|  | 104 | #	define XZ_PREBOOT | 
|  | 105 | #endif | 
|  | 106 | #ifdef __KERNEL__ | 
|  | 107 | #	include <linux/decompress/mm.h> | 
|  | 108 | #endif | 
|  | 109 | #define XZ_EXTERN STATIC | 
|  | 110 |  | 
|  | 111 | #ifndef XZ_PREBOOT | 
|  | 112 | #	include <linux/slab.h> | 
|  | 113 | #	include <linux/xz.h> | 
|  | 114 | #else | 
|  | 115 | /* | 
|  | 116 | * Use the internal CRC32 code instead of kernel's CRC32 module, which | 
|  | 117 | * is not available in early phase of booting. | 
|  | 118 | */ | 
|  | 119 | #define XZ_INTERNAL_CRC32 1 | 
|  | 120 |  | 
|  | 121 | /* | 
|  | 122 | * For boot time use, we enable only the BCJ filter of the current | 
|  | 123 | * architecture or none if no BCJ filter is available for the architecture. | 
|  | 124 | */ | 
|  | 125 | #ifdef CONFIG_X86 | 
|  | 126 | #	define XZ_DEC_X86 | 
|  | 127 | #endif | 
|  | 128 | #ifdef CONFIG_PPC | 
|  | 129 | #	define XZ_DEC_POWERPC | 
|  | 130 | #endif | 
|  | 131 | #ifdef CONFIG_ARM | 
|  | 132 | #	define XZ_DEC_ARM | 
|  | 133 | #endif | 
|  | 134 | #ifdef CONFIG_IA64 | 
|  | 135 | #	define XZ_DEC_IA64 | 
|  | 136 | #endif | 
|  | 137 | #ifdef CONFIG_SPARC | 
|  | 138 | #	define XZ_DEC_SPARC | 
|  | 139 | #endif | 
|  | 140 |  | 
|  | 141 | /* | 
|  | 142 | * This will get the basic headers so that memeq() and others | 
|  | 143 | * can be defined. | 
|  | 144 | */ | 
|  | 145 | #include "xz/xz_private.h" | 
|  | 146 |  | 
|  | 147 | /* | 
|  | 148 | * Replace the normal allocation functions with the versions from | 
|  | 149 | * <linux/decompress/mm.h>. vfree() needs to support vfree(NULL) | 
|  | 150 | * when XZ_DYNALLOC is used, but the pre-boot free() doesn't support it. | 
|  | 151 | * Workaround it here because the other decompressors don't need it. | 
|  | 152 | */ | 
|  | 153 | #undef kmalloc | 
|  | 154 | #undef kfree | 
|  | 155 | #undef vmalloc | 
|  | 156 | #undef vfree | 
|  | 157 | #define kmalloc(size, flags) malloc(size) | 
|  | 158 | #define kfree(ptr) free(ptr) | 
|  | 159 | #define vmalloc(size) malloc(size) | 
|  | 160 | #define vfree(ptr) do { if (ptr != NULL) free(ptr); } while (0) | 
|  | 161 |  | 
|  | 162 | /* | 
|  | 163 | * FIXME: Not all basic memory functions are provided in architecture-specific | 
|  | 164 | * files (yet). We define our own versions here for now, but this should be | 
|  | 165 | * only a temporary solution. | 
|  | 166 | * | 
|  | 167 | * memeq and memzero are not used much and any remotely sane implementation | 
|  | 168 | * is fast enough. memcpy/memmove speed matters in multi-call mode, but | 
|  | 169 | * the kernel image is decompressed in single-call mode, in which only | 
|  | 170 | * memcpy speed can matter and only if there is a lot of uncompressible data | 
|  | 171 | * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the | 
|  | 172 | * functions below should just be kept small; it's probably not worth | 
|  | 173 | * optimizing for speed. | 
|  | 174 | */ | 
|  | 175 |  | 
|  | 176 | #ifndef memeq | 
|  | 177 | static bool memeq(const void *a, const void *b, size_t size) | 
|  | 178 | { | 
|  | 179 | const uint8_t *x = a; | 
|  | 180 | const uint8_t *y = b; | 
|  | 181 | size_t i; | 
|  | 182 |  | 
|  | 183 | for (i = 0; i < size; ++i) | 
|  | 184 | if (x[i] != y[i]) | 
|  | 185 | return false; | 
|  | 186 |  | 
|  | 187 | return true; | 
|  | 188 | } | 
|  | 189 | #endif | 
|  | 190 |  | 
|  | 191 | #ifndef memzero | 
|  | 192 | static void memzero(void *buf, size_t size) | 
|  | 193 | { | 
|  | 194 | uint8_t *b = buf; | 
|  | 195 | uint8_t *e = b + size; | 
|  | 196 |  | 
|  | 197 | while (b != e) | 
|  | 198 | *b++ = '\0'; | 
|  | 199 | } | 
|  | 200 | #endif | 
|  | 201 |  | 
|  | 202 | #ifndef memmove | 
|  | 203 | /* Not static to avoid a conflict with the prototype in the Linux headers. */ | 
|  | 204 | void *memmove(void *dest, const void *src, size_t size) | 
|  | 205 | { | 
|  | 206 | uint8_t *d = dest; | 
|  | 207 | const uint8_t *s = src; | 
|  | 208 | size_t i; | 
|  | 209 |  | 
|  | 210 | if (d < s) { | 
|  | 211 | for (i = 0; i < size; ++i) | 
|  | 212 | d[i] = s[i]; | 
|  | 213 | } else if (d > s) { | 
|  | 214 | i = size; | 
|  | 215 | while (i-- > 0) | 
|  | 216 | d[i] = s[i]; | 
|  | 217 | } | 
|  | 218 |  | 
|  | 219 | return dest; | 
|  | 220 | } | 
|  | 221 | #endif | 
|  | 222 |  | 
|  | 223 | /* | 
|  | 224 | * Since we need memmove anyway, would use it as memcpy too. | 
|  | 225 | * Commented out for now to avoid breaking things. | 
|  | 226 | */ | 
|  | 227 | /* | 
|  | 228 | #ifndef memcpy | 
|  | 229 | #	define memcpy memmove | 
|  | 230 | #endif | 
|  | 231 | */ | 
|  | 232 |  | 
|  | 233 | #include "xz/xz_crc32.c" | 
|  | 234 | #include "xz/xz_dec_stream.c" | 
|  | 235 | #include "xz/xz_dec_lzma2.c" | 
|  | 236 | #include "xz/xz_dec_bcj.c" | 
|  | 237 |  | 
|  | 238 | #endif /* XZ_PREBOOT */ | 
|  | 239 |  | 
|  | 240 | /* Size of the input and output buffers in multi-call mode */ | 
|  | 241 | #define XZ_IOBUF_SIZE 4096 | 
|  | 242 |  | 
|  | 243 | /* | 
|  | 244 | * This function implements the API defined in <linux/decompress/generic.h>. | 
|  | 245 | * | 
|  | 246 | * This wrapper will automatically choose single-call or multi-call mode | 
|  | 247 | * of the native XZ decoder API. The single-call mode can be used only when | 
|  | 248 | * both input and output buffers are available as a single chunk, i.e. when | 
|  | 249 | * fill() and flush() won't be used. | 
|  | 250 | */ | 
|  | 251 | STATIC int INIT unxz(unsigned char *in, int in_size, | 
|  | 252 | int (*fill)(void *dest, unsigned int size), | 
|  | 253 | int (*flush)(void *src, unsigned int size), | 
|  | 254 | unsigned char *out, int *in_used, | 
|  | 255 | void (*error)(char *x)) | 
|  | 256 | { | 
|  | 257 | struct xz_buf b; | 
|  | 258 | struct xz_dec *s; | 
|  | 259 | enum xz_ret ret; | 
|  | 260 | bool must_free_in = false; | 
|  | 261 |  | 
|  | 262 | #if XZ_INTERNAL_CRC32 | 
|  | 263 | xz_crc32_init(); | 
|  | 264 | #endif | 
|  | 265 |  | 
|  | 266 | if (in_used != NULL) | 
|  | 267 | *in_used = 0; | 
|  | 268 |  | 
|  | 269 | if (fill == NULL && flush == NULL) | 
|  | 270 | s = xz_dec_init(XZ_SINGLE, 0); | 
|  | 271 | else | 
|  | 272 | s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1); | 
|  | 273 |  | 
|  | 274 | if (s == NULL) | 
|  | 275 | goto error_alloc_state; | 
|  | 276 |  | 
|  | 277 | if (flush == NULL) { | 
|  | 278 | b.out = out; | 
|  | 279 | b.out_size = (size_t)-1; | 
|  | 280 | } else { | 
|  | 281 | b.out_size = XZ_IOBUF_SIZE; | 
|  | 282 | b.out = malloc(XZ_IOBUF_SIZE); | 
|  | 283 | if (b.out == NULL) | 
|  | 284 | goto error_alloc_out; | 
|  | 285 | } | 
|  | 286 |  | 
|  | 287 | if (in == NULL) { | 
|  | 288 | must_free_in = true; | 
|  | 289 | in = malloc(XZ_IOBUF_SIZE); | 
|  | 290 | if (in == NULL) | 
|  | 291 | goto error_alloc_in; | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | b.in = in; | 
|  | 295 | b.in_pos = 0; | 
|  | 296 | b.in_size = in_size; | 
|  | 297 | b.out_pos = 0; | 
|  | 298 |  | 
|  | 299 | if (fill == NULL && flush == NULL) { | 
|  | 300 | ret = xz_dec_run(s, &b); | 
|  | 301 | } else { | 
|  | 302 | do { | 
|  | 303 | if (b.in_pos == b.in_size && fill != NULL) { | 
|  | 304 | if (in_used != NULL) | 
|  | 305 | *in_used += b.in_pos; | 
|  | 306 |  | 
|  | 307 | b.in_pos = 0; | 
|  | 308 |  | 
|  | 309 | in_size = fill(in, XZ_IOBUF_SIZE); | 
|  | 310 | if (in_size < 0) { | 
|  | 311 | /* | 
|  | 312 | * This isn't an optimal error code | 
|  | 313 | * but it probably isn't worth making | 
|  | 314 | * a new one either. | 
|  | 315 | */ | 
|  | 316 | ret = XZ_BUF_ERROR; | 
|  | 317 | break; | 
|  | 318 | } | 
|  | 319 |  | 
|  | 320 | b.in_size = in_size; | 
|  | 321 | } | 
|  | 322 |  | 
|  | 323 | ret = xz_dec_run(s, &b); | 
|  | 324 |  | 
|  | 325 | if (flush != NULL && (b.out_pos == b.out_size | 
|  | 326 | || (ret != XZ_OK && b.out_pos > 0))) { | 
|  | 327 | /* | 
|  | 328 | * Setting ret here may hide an error | 
|  | 329 | * returned by xz_dec_run(), but probably | 
|  | 330 | * it's not too bad. | 
|  | 331 | */ | 
|  | 332 | if (flush(b.out, b.out_pos) != (int)b.out_pos) | 
|  | 333 | ret = XZ_BUF_ERROR; | 
|  | 334 |  | 
|  | 335 | b.out_pos = 0; | 
|  | 336 | } | 
|  | 337 | } while (ret == XZ_OK); | 
|  | 338 |  | 
|  | 339 | if (must_free_in) | 
|  | 340 | free(in); | 
|  | 341 |  | 
|  | 342 | if (flush != NULL) | 
|  | 343 | free(b.out); | 
|  | 344 | } | 
|  | 345 |  | 
|  | 346 | if (in_used != NULL) | 
|  | 347 | *in_used += b.in_pos; | 
|  | 348 |  | 
|  | 349 | xz_dec_end(s); | 
|  | 350 |  | 
|  | 351 | switch (ret) { | 
|  | 352 | case XZ_STREAM_END: | 
|  | 353 | return 0; | 
|  | 354 |  | 
|  | 355 | case XZ_MEM_ERROR: | 
|  | 356 | /* This can occur only in multi-call mode. */ | 
|  | 357 | error("XZ decompressor ran out of memory"); | 
|  | 358 | break; | 
|  | 359 |  | 
|  | 360 | case XZ_FORMAT_ERROR: | 
|  | 361 | error("Input is not in the XZ format (wrong magic bytes)"); | 
|  | 362 | break; | 
|  | 363 |  | 
|  | 364 | case XZ_OPTIONS_ERROR: | 
|  | 365 | error("Input was encoded with settings that are not " | 
|  | 366 | "supported by this XZ decoder"); | 
|  | 367 | break; | 
|  | 368 |  | 
|  | 369 | case XZ_DATA_ERROR: | 
|  | 370 | case XZ_BUF_ERROR: | 
|  | 371 | error("XZ-compressed data is corrupt"); | 
|  | 372 | break; | 
|  | 373 |  | 
|  | 374 | default: | 
|  | 375 | error("Bug in the XZ decompressor"); | 
|  | 376 | break; | 
|  | 377 | } | 
|  | 378 |  | 
|  | 379 | return -1; | 
|  | 380 |  | 
|  | 381 | error_alloc_in: | 
|  | 382 | if (flush != NULL) | 
|  | 383 | free(b.out); | 
|  | 384 |  | 
|  | 385 | error_alloc_out: | 
|  | 386 | xz_dec_end(s); | 
|  | 387 |  | 
|  | 388 | error_alloc_state: | 
|  | 389 | error("XZ decompressor ran out of memory"); | 
|  | 390 | return -1; | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | /* | 
|  | 394 | * This macro is used by architecture-specific files to decompress | 
|  | 395 | * the kernel image. | 
|  | 396 | */ | 
|  | 397 | #define decompress unxz |