golang.org/x/build@v0.0.0-20240506185731-218518f32b70/pargzip/pargzip.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package pargzip contains a parallel gzip writer implementation.  By
     6  // compressing each chunk of data in parallel, all the CPUs on the
     7  // machine can be used, at a slight loss of compression efficiency.
     8  package pargzip
     9  
    10  import (
    11  	"bufio"
    12  	"bytes"
    13  	"compress/gzip"
    14  	"io"
    15  	"runtime"
    16  	"strings"
    17  	"sync"
    18  )
    19  
    20  // A Writer is an io.WriteCloser.
    21  // Writes to a Writer are compressed and written to w.
    22  //
    23  // Any exported fields may only be mutated before the first call to
    24  // Write.
    25  type Writer struct {
    26  	// ChunkSize is the number of bytes to gzip at once.
    27  	// The default from NewWriter is 1MB.
    28  	ChunkSize int
    29  
    30  	// Parallel is the number of chunks to compress in parallel.
    31  	// The default from NewWriter is runtime.NumCPU().
    32  	Parallel int
    33  
    34  	w  io.Writer
    35  	bw *bufio.Writer
    36  
    37  	allWritten  chan struct{} // when writing goroutine ends
    38  	wasWriteErr chan struct{} // closed after 'err' set
    39  
    40  	sem    chan bool        // semaphore bounding compressions in flight
    41  	chunkc chan *writeChunk // closed on Close
    42  
    43  	mu     sync.Mutex // guards following
    44  	closed bool
    45  	err    error // sticky write error
    46  }
    47  
    48  type writeChunk struct {
    49  	zw *Writer
    50  	p  string // uncompressed
    51  
    52  	donec chan struct{} // closed on completion
    53  
    54  	// one of following is set:
    55  	z   []byte // compressed
    56  	err error  // exec error
    57  }
    58  
    59  // compress runs the gzip child process.
    60  // It runs in its own goroutine.
    61  func (c *writeChunk) compress() (err error) {
    62  	defer func() {
    63  		if err != nil {
    64  			c.err = err
    65  		}
    66  		close(c.donec)
    67  		<-c.zw.sem
    68  	}()
    69  	var zbuf bytes.Buffer
    70  	zw := gzip.NewWriter(&zbuf)
    71  	if _, err := io.Copy(zw, strings.NewReader(c.p)); err != nil {
    72  		return err
    73  	}
    74  	if err := zw.Close(); err != nil {
    75  		return err
    76  	}
    77  	c.z = zbuf.Bytes()
    78  	return nil
    79  }
    80  
    81  // NewWriter returns a new Writer.
    82  // Writes to the returned writer are compressed and written to w.
    83  //
    84  // It is the caller's responsibility to call Close on the WriteCloser
    85  // when done. Writes may be buffered and not flushed until Close.
    86  //
    87  // Any fields on Writer may only be modified before the first call to
    88  // Write.
    89  func NewWriter(w io.Writer) *Writer {
    90  	return &Writer{
    91  		w:           w,
    92  		allWritten:  make(chan struct{}),
    93  		wasWriteErr: make(chan struct{}),
    94  
    95  		ChunkSize: 1 << 20,
    96  		Parallel:  runtime.NumCPU(),
    97  	}
    98  }
    99  
   100  func (w *Writer) didInit() bool { return w.bw != nil }
   101  
   102  func (w *Writer) init() {
   103  	w.bw = bufio.NewWriterSize(newChunkWriter{w}, w.ChunkSize)
   104  	w.chunkc = make(chan *writeChunk, w.Parallel+1)
   105  	w.sem = make(chan bool, w.Parallel)
   106  	go func() {
   107  		defer close(w.allWritten)
   108  		for c := range w.chunkc {
   109  			if err := w.writeCompressedChunk(c); err != nil {
   110  				close(w.wasWriteErr)
   111  				return
   112  			}
   113  		}
   114  	}()
   115  }
   116  
   117  func (w *Writer) startChunk(p []byte) {
   118  	w.sem <- true // block until we can begin
   119  	c := &writeChunk{
   120  		zw:    w,
   121  		p:     string(p), // string, since the bufio.Writer owns the slice
   122  		donec: make(chan struct{}),
   123  	}
   124  	go c.compress() // receives from w.sem
   125  	select {
   126  	case w.chunkc <- c:
   127  	case <-w.wasWriteErr:
   128  		// Discard chunks that come after any chunk that failed
   129  		// to write.
   130  	}
   131  }
   132  
   133  func (w *Writer) writeCompressedChunk(c *writeChunk) (err error) {
   134  	defer func() {
   135  		if err != nil {
   136  			w.mu.Lock()
   137  			defer w.mu.Unlock()
   138  			if w.err == nil {
   139  				w.err = err
   140  			}
   141  		}
   142  	}()
   143  	<-c.donec
   144  	if c.err != nil {
   145  		return c.err
   146  	}
   147  	_, err = w.w.Write(c.z)
   148  	return
   149  }
   150  
   151  func (w *Writer) Write(p []byte) (n int, err error) {
   152  	if !w.didInit() {
   153  		w.init()
   154  	}
   155  	return w.bw.Write(p)
   156  }
   157  
   158  func (w *Writer) Close() error {
   159  	w.mu.Lock()
   160  	err, wasClosed := w.err, w.closed
   161  	w.closed = true
   162  	w.mu.Unlock()
   163  	if wasClosed {
   164  		return nil
   165  	}
   166  	if !w.didInit() {
   167  		return nil
   168  	}
   169  	if err != nil {
   170  		return err
   171  	}
   172  
   173  	w.bw.Flush()
   174  	close(w.chunkc)
   175  	<-w.allWritten // wait for writing goroutine to end
   176  
   177  	w.mu.Lock()
   178  	err = w.err
   179  	w.mu.Unlock()
   180  	return err
   181  }
   182  
   183  // newChunkWriter gets large chunks to compress and write to zw.
   184  type newChunkWriter struct {
   185  	zw *Writer
   186  }
   187  
   188  func (cw newChunkWriter) Write(p []byte) (n int, err error) {
   189  	n = len(p)
   190  	max := cw.zw.ChunkSize
   191  	for len(p) > 0 {
   192  		chunk := p
   193  		if len(chunk) > max {
   194  			chunk = chunk[:max]
   195  		}
   196  		p = p[len(chunk):]
   197  		cw.zw.startChunk(chunk)
   198  	}
   199  	return
   200  }