github.com/code-reading/golang@v0.0.0-20220303082512-ba5bc0e589a3/go/src/compress/lzw/writer.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package lzw 6 7 import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "io" 12 ) 13 14 // A writer is a buffered, flushable writer. 15 type writer interface { 16 io.ByteWriter 17 Flush() error 18 } 19 20 const ( 21 // A code is a 12 bit value, stored as a uint32 when encoding to avoid 22 // type conversions when shifting bits. 23 maxCode = 1<<12 - 1 24 invalidCode = 1<<32 - 1 25 // There are 1<<12 possible codes, which is an upper bound on the number of 26 // valid hash table entries at any given point in time. tableSize is 4x that. 27 tableSize = 4 * 1 << 12 28 tableMask = tableSize - 1 29 // A hash table entry is a uint32. Zero is an invalid entry since the 30 // lower 12 bits of a valid entry must be a non-literal code. 31 invalidEntry = 0 32 ) 33 34 // Writer is an LZW compressor. It writes the compressed form of the data 35 // to an underlying writer (see NewWriter). 36 type Writer struct { 37 // w is the writer that compressed bytes are written to. 38 w writer 39 // order, write, bits, nBits and width are the state for 40 // converting a code stream into a byte stream. 41 order Order 42 write func(*Writer, uint32) error 43 bits uint32 44 nBits uint 45 width uint 46 // litWidth is the width in bits of literal codes. 47 litWidth uint 48 // hi is the code implied by the next code emission. 49 // overflow is the code at which hi overflows the code width. 50 hi, overflow uint32 51 // savedCode is the accumulated code at the end of the most recent Write 52 // call. It is equal to invalidCode if there was no such call. 53 savedCode uint32 54 // err is the first error encountered during writing. Closing the writer 55 // will make any future Write calls return errClosed 56 err error 57 // table is the hash table from 20-bit keys to 12-bit values. Each table 58 // entry contains key<<12|val and collisions resolve by linear probing. 59 // The keys consist of a 12-bit code prefix and an 8-bit byte suffix. 60 // The values are a 12-bit code. 61 table [tableSize]uint32 62 } 63 64 // writeLSB writes the code c for "Least Significant Bits first" data. 65 func (w *Writer) writeLSB(c uint32) error { 66 w.bits |= c << w.nBits 67 w.nBits += w.width 68 for w.nBits >= 8 { 69 if err := w.w.WriteByte(uint8(w.bits)); err != nil { 70 return err 71 } 72 w.bits >>= 8 73 w.nBits -= 8 74 } 75 return nil 76 } 77 78 // writeMSB writes the code c for "Most Significant Bits first" data. 79 func (w *Writer) writeMSB(c uint32) error { 80 w.bits |= c << (32 - w.width - w.nBits) 81 w.nBits += w.width 82 for w.nBits >= 8 { 83 if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil { 84 return err 85 } 86 w.bits <<= 8 87 w.nBits -= 8 88 } 89 return nil 90 } 91 92 // errOutOfCodes is an internal error that means that the writer has run out 93 // of unused codes and a clear code needs to be sent next. 94 var errOutOfCodes = errors.New("lzw: out of codes") 95 96 // incHi increments e.hi and checks for both overflow and running out of 97 // unused codes. In the latter case, incHi sends a clear code, resets the 98 // writer state and returns errOutOfCodes. 99 func (w *Writer) incHi() error { 100 w.hi++ 101 if w.hi == w.overflow { 102 w.width++ 103 w.overflow <<= 1 104 } 105 if w.hi == maxCode { 106 clear := uint32(1) << w.litWidth 107 if err := w.write(w, clear); err != nil { 108 return err 109 } 110 w.width = w.litWidth + 1 111 w.hi = clear + 1 112 w.overflow = clear << 1 113 for i := range w.table { 114 w.table[i] = invalidEntry 115 } 116 return errOutOfCodes 117 } 118 return nil 119 } 120 121 // Write writes a compressed representation of p to w's underlying writer. 122 func (w *Writer) Write(p []byte) (n int, err error) { 123 if w.err != nil { 124 return 0, w.err 125 } 126 if len(p) == 0 { 127 return 0, nil 128 } 129 if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff { 130 for _, x := range p { 131 if x > maxLit { 132 w.err = errors.New("lzw: input byte too large for the litWidth") 133 return 0, w.err 134 } 135 } 136 } 137 n = len(p) 138 code := w.savedCode 139 if code == invalidCode { 140 // The first code sent is always a literal code. 141 code, p = uint32(p[0]), p[1:] 142 } 143 loop: 144 for _, x := range p { 145 literal := uint32(x) 146 key := code<<8 | literal 147 // If there is a hash table hit for this key then we continue the loop 148 // and do not emit a code yet. 149 hash := (key>>12 ^ key) & tableMask 150 for h, t := hash, w.table[hash]; t != invalidEntry; { 151 if key == t>>12 { 152 code = t & maxCode 153 continue loop 154 } 155 h = (h + 1) & tableMask 156 t = w.table[h] 157 } 158 // Otherwise, write the current code, and literal becomes the start of 159 // the next emitted code. 160 if w.err = w.write(w, code); w.err != nil { 161 return 0, w.err 162 } 163 code = literal 164 // Increment e.hi, the next implied code. If we run out of codes, reset 165 // the writer state (including clearing the hash table) and continue. 166 if err1 := w.incHi(); err1 != nil { 167 if err1 == errOutOfCodes { 168 continue 169 } 170 w.err = err1 171 return 0, w.err 172 } 173 // Otherwise, insert key -> e.hi into the map that e.table represents. 174 for { 175 if w.table[hash] == invalidEntry { 176 w.table[hash] = (key << 12) | w.hi 177 break 178 } 179 hash = (hash + 1) & tableMask 180 } 181 } 182 w.savedCode = code 183 return n, nil 184 } 185 186 // Close closes the Writer, flushing any pending output. It does not close 187 // w's underlying writer. 188 func (w *Writer) Close() error { 189 if w.err != nil { 190 if w.err == errClosed { 191 return nil 192 } 193 return w.err 194 } 195 // Make any future calls to Write return errClosed. 196 w.err = errClosed 197 // Write the savedCode if valid. 198 if w.savedCode != invalidCode { 199 if err := w.write(w, w.savedCode); err != nil { 200 return err 201 } 202 if err := w.incHi(); err != nil && err != errOutOfCodes { 203 return err 204 } 205 } 206 // Write the eof code. 207 eof := uint32(1)<<w.litWidth + 1 208 if err := w.write(w, eof); err != nil { 209 return err 210 } 211 // Write the final bits. 212 if w.nBits > 0 { 213 if w.order == MSB { 214 w.bits >>= 24 215 } 216 if err := w.w.WriteByte(uint8(w.bits)); err != nil { 217 return err 218 } 219 } 220 return w.w.Flush() 221 } 222 223 // Reset clears the Writer's state and allows it to be reused again 224 // as a new Writer. 225 func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) { 226 *w = Writer{} 227 w.init(dst, order, litWidth) 228 } 229 230 // NewWriter creates a new io.WriteCloser. 231 // Writes to the returned io.WriteCloser are compressed and written to w. 232 // It is the caller's responsibility to call Close on the WriteCloser when 233 // finished writing. 234 // The number of bits to use for literal codes, litWidth, must be in the 235 // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth. 236 // 237 // It is guaranteed that the underlying type of the returned io.WriteCloser 238 // is a *Writer. 239 func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser { 240 return newWriter(w, order, litWidth) 241 } 242 243 func newWriter(dst io.Writer, order Order, litWidth int) *Writer { 244 w := new(Writer) 245 w.init(dst, order, litWidth) 246 return w 247 } 248 249 func (w *Writer) init(dst io.Writer, order Order, litWidth int) { 250 switch order { 251 case LSB: 252 w.write = (*Writer).writeLSB 253 case MSB: 254 w.write = (*Writer).writeMSB 255 default: 256 w.err = errors.New("lzw: unknown order") 257 return 258 } 259 if litWidth < 2 || 8 < litWidth { 260 w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth) 261 return 262 } 263 bw, ok := dst.(writer) 264 if !ok && dst != nil { 265 bw = bufio.NewWriter(dst) 266 } 267 w.w = bw 268 lw := uint(litWidth) 269 w.order = order 270 w.width = 1 + lw 271 w.litWidth = lw 272 w.hi = 1<<lw + 1 273 w.overflow = 1 << (lw + 1) 274 w.savedCode = invalidCode 275 }