github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/image/compression_optimized.go

github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/image/compression_optimized.go (about)

     1  // Copyright 2022 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  //go:build !windows && !386 && !arm
     5  
     6  package image
     7  
     8  import (
     9  	"bytes"
    10  	"compress/zlib"
    11  	"fmt"
    12  	"io"
    13  	"sync"
    14  	"syscall"
    15  	"unsafe"
    16  )
    17  
    18  // Temporary scratch data used by the decompression procedure.
    19  type decompressScratch struct {
    20  	r   bytes.Reader
    21  	zr  io.Reader
    22  	buf []byte
    23  }
    24  
    25  var decompressPool = sync.Pool{New: func() interface{} {
    26  	return &decompressScratch{
    27  		buf: make([]byte, 8<<10),
    28  	}
    29  }}
    30  
    31  func mustDecompress(compressed []byte) (data []byte, dtor func()) {
    32  	// Optimized decompression procedure that is ~2x faster than a naive version
    33  	// and consumes significantly less memory and generates less garbage.
    34  	// Images tend to contain lots of 0s, especially the larger images.
    35  	// The main idea is that we mmap a buffer and then don't write 0s into it
    36  	// (since it already contains all 0s). As the result if a page is all 0s
    37  	// then we don't page it in and don't consume memory for it.
    38  	// Executor uses the same optimization during decompression.
    39  	scratch := decompressPool.Get().(*decompressScratch)
    40  	defer decompressPool.Put(scratch)
    41  	scratch.r.Reset(compressed)
    42  	if scratch.zr == nil {
    43  		zr, err := zlib.NewReader(&scratch.r)
    44  		if err != nil {
    45  			panic(err)
    46  		}
    47  		scratch.zr = zr
    48  	} else {
    49  		if err := scratch.zr.(zlib.Resetter).Reset(&scratch.r, nil); err != nil {
    50  			panic(err)
    51  		}
    52  	}
    53  	// We don't know the size of the uncompressed image.
    54  	// We could uncompress it into ioutil.Discard first, then allocate memory and uncompress second time
    55  	// (and it's still faster than the naive uncompress into bytes.Buffer!).
    56  	// But we know maximum size of images, so just mmap the max size.
    57  	// It's fast and unused part does not consume memory.
    58  	// Note: executor/common_zlib.h also knows this const.
    59  	const maxImageSize = 132 << 20
    60  	var err error
    61  	data, err = syscall.Mmap(-1, 0, maxImageSize, syscall.PROT_READ|syscall.PROT_WRITE,
    62  		syscall.MAP_ANON|syscall.MAP_PRIVATE)
    63  	if err != nil {
    64  		panic(err)
    65  	}
    66  	dtor = func() {
    67  		if err := syscall.Munmap(data[:maxImageSize]); err != nil {
    68  			panic(err)
    69  		}
    70  	}
    71  	offset := 0
    72  	for {
    73  		n, err := scratch.zr.Read(scratch.buf)
    74  		if err != nil && err != io.EOF {
    75  			panic(err)
    76  		}
    77  		if n == 0 {
    78  			break
    79  		}
    80  		if offset+n > len(data) {
    81  			panic(fmt.Sprintf("bad image size: offset=%v n=%v data=%v", offset, n, len(data)))
    82  		}
    83  		// Copy word-at-a-time and avoid bounds checks in the loop,
    84  		// this is considerably faster than a naive byte loop.
    85  		// We already checked bounds above.
    86  		type word uint64
    87  		const wordSize = unsafe.Sizeof(word(0))
    88  		// Don't copy the last word b/c otherwise we calculate pointer outside of scratch.buf object
    89  		// on the last iteration. We don't use it, but unsafe rules prohibit even calculating
    90  		// such pointers. Alternatively we could add 8 unused bytes to scratch.buf, but it will
    91  		// play badly with memory allocator size classes (it will consume whole additional page,
    92  		// or whatever is the alignment for such large objects). We could also break from the middle
    93  		// of the loop before updating src/dst pointers, but it hurts codegen a lot (compilers like
    94  		// canonical loop forms).
    95  		words := uintptr(n-1) / wordSize
    96  		src := (*word)(unsafe.Pointer(&scratch.buf[0]))
    97  		dst := (*word)(unsafe.Pointer(&data[offset]))
    98  		for i := uintptr(0); i < words; i++ {
    99  			if *src != 0 {
   100  				*dst = *src
   101  			}
   102  			src = (*word)(unsafe.Pointer(uintptr(unsafe.Pointer(src)) + wordSize))
   103  			dst = (*word)(unsafe.Pointer(uintptr(unsafe.Pointer(dst)) + wordSize))
   104  		}
   105  		// Copy any remaining trailing bytes.
   106  		for i := words * wordSize; i < uintptr(n); i++ {
   107  			v := scratch.buf[i]
   108  			if v != 0 {
   109  				data[uintptr(offset)+i] = v
   110  			}
   111  		}
   112  		offset += n
   113  	}
   114  	data = data[:offset]
   115  	return
   116  }