golang.org/x/build@v0.0.0-20240506185731-218518f32b70/pargzip/pargzip.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package pargzip contains a parallel gzip writer implementation. By 6 // compressing each chunk of data in parallel, all the CPUs on the 7 // machine can be used, at a slight loss of compression efficiency. 8 package pargzip 9 10 import ( 11 "bufio" 12 "bytes" 13 "compress/gzip" 14 "io" 15 "runtime" 16 "strings" 17 "sync" 18 ) 19 20 // A Writer is an io.WriteCloser. 21 // Writes to a Writer are compressed and written to w. 22 // 23 // Any exported fields may only be mutated before the first call to 24 // Write. 25 type Writer struct { 26 // ChunkSize is the number of bytes to gzip at once. 27 // The default from NewWriter is 1MB. 28 ChunkSize int 29 30 // Parallel is the number of chunks to compress in parallel. 31 // The default from NewWriter is runtime.NumCPU(). 32 Parallel int 33 34 w io.Writer 35 bw *bufio.Writer 36 37 allWritten chan struct{} // when writing goroutine ends 38 wasWriteErr chan struct{} // closed after 'err' set 39 40 sem chan bool // semaphore bounding compressions in flight 41 chunkc chan *writeChunk // closed on Close 42 43 mu sync.Mutex // guards following 44 closed bool 45 err error // sticky write error 46 } 47 48 type writeChunk struct { 49 zw *Writer 50 p string // uncompressed 51 52 donec chan struct{} // closed on completion 53 54 // one of following is set: 55 z []byte // compressed 56 err error // exec error 57 } 58 59 // compress runs the gzip child process. 60 // It runs in its own goroutine. 61 func (c *writeChunk) compress() (err error) { 62 defer func() { 63 if err != nil { 64 c.err = err 65 } 66 close(c.donec) 67 <-c.zw.sem 68 }() 69 var zbuf bytes.Buffer 70 zw := gzip.NewWriter(&zbuf) 71 if _, err := io.Copy(zw, strings.NewReader(c.p)); err != nil { 72 return err 73 } 74 if err := zw.Close(); err != nil { 75 return err 76 } 77 c.z = zbuf.Bytes() 78 return nil 79 } 80 81 // NewWriter returns a new Writer. 82 // Writes to the returned writer are compressed and written to w. 83 // 84 // It is the caller's responsibility to call Close on the WriteCloser 85 // when done. Writes may be buffered and not flushed until Close. 86 // 87 // Any fields on Writer may only be modified before the first call to 88 // Write. 89 func NewWriter(w io.Writer) *Writer { 90 return &Writer{ 91 w: w, 92 allWritten: make(chan struct{}), 93 wasWriteErr: make(chan struct{}), 94 95 ChunkSize: 1 << 20, 96 Parallel: runtime.NumCPU(), 97 } 98 } 99 100 func (w *Writer) didInit() bool { return w.bw != nil } 101 102 func (w *Writer) init() { 103 w.bw = bufio.NewWriterSize(newChunkWriter{w}, w.ChunkSize) 104 w.chunkc = make(chan *writeChunk, w.Parallel+1) 105 w.sem = make(chan bool, w.Parallel) 106 go func() { 107 defer close(w.allWritten) 108 for c := range w.chunkc { 109 if err := w.writeCompressedChunk(c); err != nil { 110 close(w.wasWriteErr) 111 return 112 } 113 } 114 }() 115 } 116 117 func (w *Writer) startChunk(p []byte) { 118 w.sem <- true // block until we can begin 119 c := &writeChunk{ 120 zw: w, 121 p: string(p), // string, since the bufio.Writer owns the slice 122 donec: make(chan struct{}), 123 } 124 go c.compress() // receives from w.sem 125 select { 126 case w.chunkc <- c: 127 case <-w.wasWriteErr: 128 // Discard chunks that come after any chunk that failed 129 // to write. 130 } 131 } 132 133 func (w *Writer) writeCompressedChunk(c *writeChunk) (err error) { 134 defer func() { 135 if err != nil { 136 w.mu.Lock() 137 defer w.mu.Unlock() 138 if w.err == nil { 139 w.err = err 140 } 141 } 142 }() 143 <-c.donec 144 if c.err != nil { 145 return c.err 146 } 147 _, err = w.w.Write(c.z) 148 return 149 } 150 151 func (w *Writer) Write(p []byte) (n int, err error) { 152 if !w.didInit() { 153 w.init() 154 } 155 return w.bw.Write(p) 156 } 157 158 func (w *Writer) Close() error { 159 w.mu.Lock() 160 err, wasClosed := w.err, w.closed 161 w.closed = true 162 w.mu.Unlock() 163 if wasClosed { 164 return nil 165 } 166 if !w.didInit() { 167 return nil 168 } 169 if err != nil { 170 return err 171 } 172 173 w.bw.Flush() 174 close(w.chunkc) 175 <-w.allWritten // wait for writing goroutine to end 176 177 w.mu.Lock() 178 err = w.err 179 w.mu.Unlock() 180 return err 181 } 182 183 // newChunkWriter gets large chunks to compress and write to zw. 184 type newChunkWriter struct { 185 zw *Writer 186 } 187 188 func (cw newChunkWriter) Write(p []byte) (n int, err error) { 189 n = len(p) 190 max := cw.zw.ChunkSize 191 for len(p) > 0 { 192 chunk := p 193 if len(chunk) > max { 194 chunk = chunk[:max] 195 } 196 p = p[len(chunk):] 197 cw.zw.startChunk(chunk) 198 } 199 return 200 }