github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/image/compression_optimized.go

github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/image/compression_optimized.go (about)

     1  // Copyright 2022 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  //go:build !windows && !386 && !arm
     5  
     6  package image
     7  
     8  import (
     9  	"bytes"
    10  	"compress/zlib"
    11  	"fmt"
    12  	"io"
    13  	"sync"
    14  	"syscall"
    15  	"unsafe"
    16  )
    17  
    18  // Temporary scratch data used by the decompression procedure.
    19  type decompressScratch struct {
    20  	r   bytes.Reader
    21  	zr  io.Reader
    22  	buf []byte
    23  }
    24  
    25  // This is just for memory consumption estimation, does not need to be precise.
    26  const pageSize = 4 << 10
    27  
    28  var decompressPool = sync.Pool{New: func() interface{} {
    29  	return &decompressScratch{
    30  		buf: make([]byte, pageSize),
    31  	}
    32  }}
    33  
    34  func mustDecompress(compressed []byte) (data []byte, dtor func()) {
    35  	// Optimized decompression procedure that is ~2x faster than a naive version
    36  	// and consumes significantly less memory and generates less garbage.
    37  	// Images tend to contain lots of 0s, especially the larger images.
    38  	// The main idea is that we mmap a buffer and then don't write 0s into it
    39  	// (since it already contains all 0s). As the result if a page is all 0s
    40  	// then we don't page it in and don't consume memory for it.
    41  	// Executor uses the same optimization during decompression.
    42  	scratch := decompressPool.Get().(*decompressScratch)
    43  	defer decompressPool.Put(scratch)
    44  	scratch.r.Reset(compressed)
    45  	if scratch.zr == nil {
    46  		zr, err := zlib.NewReader(&scratch.r)
    47  		if err != nil {
    48  			panic(err)
    49  		}
    50  		scratch.zr = zr
    51  	} else {
    52  		if err := scratch.zr.(zlib.Resetter).Reset(&scratch.r, nil); err != nil {
    53  			panic(err)
    54  		}
    55  	}
    56  	// We don't know the size of the uncompressed image.
    57  	// We could uncompress it into ioutil.Discard first, then allocate memory and uncompress second time
    58  	// (and it's still faster than the naive uncompress into bytes.Buffer!).
    59  	// But we know maximum size of images, so just mmap the max size.
    60  	// It's fast and unused part does not consume memory.
    61  	// Note: executor/common_zlib.h also knows this const.
    62  	const maxImageSize = 132 << 20
    63  	var err error
    64  	data, err = syscall.Mmap(-1, 0, maxImageSize, syscall.PROT_READ|syscall.PROT_WRITE,
    65  		syscall.MAP_ANON|syscall.MAP_PRIVATE)
    66  	if err != nil {
    67  		panic(err)
    68  	}
    69  	pages := 0
    70  	dtor = func() {
    71  		StatImages.Add(-1)
    72  		StatMemory.Add(int64(-pages * pageSize))
    73  		if err := syscall.Munmap(data[:maxImageSize]); err != nil {
    74  			panic(err)
    75  		}
    76  	}
    77  	pagedIn := 0
    78  	offset := 0
    79  	for {
    80  		n, err := scratch.zr.Read(scratch.buf)
    81  		if err != nil && err != io.EOF {
    82  			panic(err)
    83  		}
    84  		if n == 0 {
    85  			break
    86  		}
    87  		if offset+n > len(data) {
    88  			panic(fmt.Sprintf("bad image size: offset=%v n=%v data=%v", offset, n, len(data)))
    89  		}
    90  		// Copy word-at-a-time and avoid bounds checks in the loop,
    91  		// this is considerably faster than a naive byte loop.
    92  		// We already checked bounds above.
    93  		type word uint64
    94  		const wordSize = unsafe.Sizeof(word(0))
    95  		// Don't copy the last word b/c otherwise we calculate pointer outside of scratch.buf object
    96  		// on the last iteration. We don't use it, but unsafe rules prohibit even calculating
    97  		// such pointers. Alternatively we could add 8 unused bytes to scratch.buf, but it will
    98  		// play badly with memory allocator size classes (it will consume whole additional page,
    99  		// or whatever is the alignment for such large objects). We could also break from the middle
   100  		// of the loop before updating src/dst pointers, but it hurts codegen a lot (compilers like
   101  		// canonical loop forms).
   102  		hasData := false
   103  		words := uintptr(n-1) / wordSize
   104  		src := (*word)(unsafe.Pointer(&scratch.buf[0]))
   105  		dst := (*word)(unsafe.Pointer(&data[offset]))
   106  		for i := uintptr(0); i < words; i++ {
   107  			if *src != 0 {
   108  				*dst = *src
   109  			}
   110  			src = (*word)(unsafe.Pointer(uintptr(unsafe.Pointer(src)) + wordSize))
   111  			dst = (*word)(unsafe.Pointer(uintptr(unsafe.Pointer(dst)) + wordSize))
   112  			hasData = true
   113  		}
   114  		// Copy any remaining trailing bytes.
   115  		for i := words * wordSize; i < uintptr(n); i++ {
   116  			v := scratch.buf[i]
   117  			if v != 0 {
   118  				data[uintptr(offset)+i] = v
   119  				hasData = true
   120  			}
   121  		}
   122  		if hasData && offset >= pagedIn {
   123  			pagedIn = (offset + n + pageSize - 1) & ^(pageSize - 1)
   124  			pages++
   125  		}
   126  		offset += n
   127  	}
   128  	data = data[:offset]
   129  	StatImages.Add(1)
   130  	StatMemory.Add(int64(pages * pageSize))
   131  	return
   132  }