github.com/pdfcpu/pdfcpu@v0.11.1/pkg/filter/flateDecode.go (about) 1 /* 2 Copyright 2018 The pdfcpu Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package filter 18 19 import ( 20 "bytes" 21 "compress/zlib" 22 "io" 23 "strings" 24 25 "github.com/pdfcpu/pdfcpu/pkg/log" 26 "github.com/pkg/errors" 27 ) 28 29 // Portions of this code are based on ideas of image/png: reader.go:readImagePass 30 // PNG is documented here: www.w3.org/TR/PNG-Filters.html 31 32 // PDF allows a prediction step prior to compression applying TIFF or PNG prediction. 33 // Predictor algorithm. 34 const ( 35 PredictorNo = 1 // No prediction. 36 PredictorTIFF = 2 // Use TIFF prediction for all rows. 37 PredictorNone = 10 // Use PNGNone for all rows. 38 PredictorSub = 11 // Use PNGSub for all rows. 39 PredictorUp = 12 // Use PNGUp for all rows. 40 PredictorAverage = 13 // Use PNGAverage for all rows. 41 PredictorPaeth = 14 // Use PNGPaeth for all rows. 42 PredictorOptimum = 15 // Use the optimum PNG prediction for each row. 43 ) 44 45 // For predictor > 2 PNG filters (see RFC 2083) get applied and the first byte of each pixelrow defines 46 // the prediction algorithm used for all pixels of this row. 47 const ( 48 PNGNone = 0x00 49 PNGSub = 0x01 50 PNGUp = 0x02 51 PNGAverage = 0x03 52 PNGPaeth = 0x04 53 ) 54 55 type flate struct { 56 baseFilter 57 } 58 59 // Encode implements encoding for a Flate filter. 60 func (f flate) Encode(r io.Reader) (io.Reader, error) { 61 if log.TraceEnabled() { 62 log.Trace.Println("EncodeFlate begin") 63 } 64 65 // TODO Optional decode parameters may need predictor preprocessing. 66 67 var b bytes.Buffer 68 w := zlib.NewWriter(&b) 69 defer w.Close() 70 71 written, err := io.Copy(w, r) 72 if err != nil { 73 return nil, err 74 } 75 76 if log.TraceEnabled() { 77 log.Trace.Printf("EncodeFlate end: %d bytes written\n", written) 78 } 79 80 return &b, nil 81 } 82 83 // Decode implements decoding for a Flate filter. 84 func (f flate) Decode(r io.Reader) (io.Reader, error) { 85 return f.DecodeLength(r, -1) 86 } 87 88 func (f flate) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) { 89 if log.TraceEnabled() { 90 log.Trace.Println("DecodeFlate begin") 91 } 92 93 rc, err := zlib.NewReader(r) 94 if err != nil { 95 return nil, err 96 } 97 defer rc.Close() 98 99 // Optional decode parameters need postprocessing. 100 return f.decodePostProcess(rc, maxLen) 101 } 102 103 func passThru(rin io.Reader, maxLen int64) (*bytes.Buffer, error) { 104 var b bytes.Buffer 105 var err error 106 if maxLen < 0 { 107 _, err = io.Copy(&b, rin) 108 } else { 109 _, err = io.CopyN(&b, rin, maxLen) 110 } 111 if err != nil && strings.Contains(err.Error(), "invalid checksum") { 112 if log.CLIEnabled() { 113 log.CLI.Println("skipped: truncated zlib stream") 114 } 115 err = nil 116 } 117 if err == io.ErrUnexpectedEOF { 118 // Workaround for missing support for partial flush in compress/flate. 119 // See also https://github.com/golang/go/issues/31514 120 if log.ReadEnabled() { 121 log.Read.Println("flateDecode: ignoring unexpected EOF") 122 } 123 err = nil 124 } 125 return &b, err 126 } 127 128 func intMemberOf(i int, list []int) bool { 129 for _, v := range list { 130 if i == v { 131 return true 132 } 133 } 134 return false 135 } 136 137 // Each prediction value implies (a) certain row filter(s). 138 // func validateRowFilter(f, p int) error { 139 140 // switch p { 141 142 // case PredictorNone: 143 // if !intMemberOf(f, []int{PNGNone, PNGSub, PNGUp, PNGAverage, PNGPaeth}) { 144 // return errors.Errorf("pdfcpu: validateRowFilter: PredictorOptimum, unexpected row filter #%02x", f) 145 // } 146 // // if f != PNGNone { 147 // // return errors.Errorf("validateRowFilter: expected row filter #%02x, got: #%02x", PNGNone, f) 148 // // } 149 150 // case PredictorSub: 151 // if f != PNGSub { 152 // return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGSub, f) 153 // } 154 155 // case PredictorUp: 156 // if f != PNGUp { 157 // return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGUp, f) 158 // } 159 160 // case PredictorAverage: 161 // if f != PNGAverage { 162 // return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGAverage, f) 163 // } 164 165 // case PredictorPaeth: 166 // if f != PNGPaeth { 167 // return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGPaeth, f) 168 // } 169 170 // case PredictorOptimum: 171 // if !intMemberOf(f, []int{PNGNone, PNGSub, PNGUp, PNGAverage, PNGPaeth}) { 172 // return errors.Errorf("pdfcpu: validateRowFilter: PredictorOptimum, unexpected row filter #%02x", f) 173 // } 174 175 // default: 176 // return errors.Errorf("pdfcpu: validateRowFilter: unexpected predictor #%02x", p) 177 178 // } 179 180 // return nil 181 // } 182 183 func applyHorDiff(row []byte, colors int) ([]byte, error) { 184 // This works for 8 bits per color only. 185 for i := 1; i < len(row)/colors; i++ { 186 for j := 0; j < colors; j++ { 187 row[i*colors+j] += row[(i-1)*colors+j] 188 } 189 } 190 return row, nil 191 } 192 193 func processRow(pr, cr []byte, p, colors, bytesPerPixel int) ([]byte, error) { 194 //fmt.Printf("pr(%v) =\n%s\n", &pr, hex.Dump(pr)) 195 //fmt.Printf("cr(%v) =\n%s\n", &cr, hex.Dump(cr)) 196 197 if p == PredictorTIFF { 198 return applyHorDiff(cr, colors) 199 } 200 201 // Apply the filter. 202 cdat := cr[1:] 203 pdat := pr[1:] 204 205 // Get row filter from 1st byte 206 f := int(cr[0]) 207 208 // The value of Predictor supplied by the decoding filter need not match the value 209 // used when the data was encoded if they are both greater than or equal to 10. 210 211 switch f { 212 213 case PNGNone: 214 // No operation. 215 216 case PNGSub: 217 for i := bytesPerPixel; i < len(cdat); i++ { 218 cdat[i] += cdat[i-bytesPerPixel] 219 } 220 221 case PNGUp: 222 for i, p := range pdat { 223 cdat[i] += p 224 } 225 226 case PNGAverage: 227 // The average of the two neighboring pixels (left and above). 228 // Raw(x) - floor((Raw(x-bpp)+Prior(x))/2) 229 for i := 0; i < bytesPerPixel; i++ { 230 cdat[i] += pdat[i] / 2 231 } 232 for i := bytesPerPixel; i < len(cdat); i++ { 233 cdat[i] += uint8((int(cdat[i-bytesPerPixel]) + int(pdat[i])) / 2) 234 } 235 236 case PNGPaeth: 237 filterPaeth(cdat, pdat, bytesPerPixel) 238 239 } 240 241 return cdat, nil 242 } 243 244 func (f flate) parameters() (colors, bpc, columns int, err error) { 245 // Colors, int 246 // The number of interleaved colour components per sample. 247 // Valid values are 1 to 4 (PDF 1.0) and 1 or greater (PDF 1.3). Default value: 1. 248 // Used by PredictorTIFF only. 249 colors, found := f.parms["Colors"] 250 if !found { 251 colors = 1 252 } else if colors == 0 { 253 return 0, 0, 0, errors.Errorf("pdfcpu: filter FlateDecode: \"Colors\" must be > 0") 254 } 255 256 // BitsPerComponent, int 257 // The number of bits used to represent each colour component in a sample. 258 // Valid values are 1, 2, 4, 8, and (PDF 1.5) 16. Default value: 8. 259 // Used by PredictorTIFF only. 260 bpc, found = f.parms["BitsPerComponent"] 261 if !found { 262 bpc = 8 263 } else if !intMemberOf(bpc, []int{1, 2, 4, 8, 16}) { 264 return 0, 0, 0, errors.Errorf("pdfcpu: filter FlateDecode: Unexpected \"BitsPerComponent\": %d", bpc) 265 } 266 267 // Columns, int 268 // The number of samples in each row. Default value: 1. 269 columns, found = f.parms["Columns"] 270 if !found { 271 columns = 1 272 } 273 274 return colors, bpc, columns, nil 275 } 276 277 func checkBufLen(b bytes.Buffer, maxLen int64) bool { 278 return maxLen < 0 || int64(b.Len()) < maxLen 279 } 280 281 func process(w io.Writer, pr, cr []byte, predictor, colors, bytesPerPixel int) error { 282 d, err := processRow(pr, cr, predictor, colors, bytesPerPixel) 283 if err != nil { 284 return err 285 } 286 287 _, err = w.Write(d) 288 289 return err 290 } 291 292 // decodePostProcess 293 func (f flate) decodePostProcess(r io.Reader, maxLen int64) (io.Reader, error) { 294 predictor, found := f.parms["Predictor"] 295 if !found || predictor == PredictorNo { 296 return passThru(r, maxLen) 297 } 298 299 if !intMemberOf( 300 predictor, 301 []int{PredictorTIFF, 302 PredictorNone, 303 PredictorSub, 304 PredictorUp, 305 PredictorAverage, 306 PredictorPaeth, 307 PredictorOptimum, 308 }) { 309 return nil, errors.Errorf("pdfcpu: filter FlateDecode: undefined \"Predictor\" %d", predictor) 310 } 311 312 colors, bpc, columns, err := f.parameters() 313 if err != nil { 314 return nil, err 315 } 316 317 bytesPerPixel := (bpc*colors + 7) / 8 318 rowSize := (bpc*colors*columns + 7) / 8 319 320 m := rowSize 321 if predictor != PredictorTIFF { 322 // PNG prediction uses a row filter byte prefixing the pixelbytes of a row. 323 m++ 324 } 325 326 // cr and pr are the bytes for the current and previous row. 327 cr := make([]byte, m) 328 pr := make([]byte, m) 329 330 // Output buffer 331 var b bytes.Buffer 332 333 for checkBufLen(b, maxLen) { 334 335 // Read decompressed bytes for one pixel row. 336 n, err := io.ReadFull(r, cr) 337 if err != nil { 338 if err != io.EOF { 339 return nil, err 340 } 341 // eof 342 if n == 0 { 343 break 344 } 345 } 346 347 if n != m { 348 return nil, errors.Errorf("pdfcpu: filter FlateDecode: read error, expected %d bytes, got: %d", m, n) 349 } 350 351 if err := process(&b, pr, cr, predictor, colors, bytesPerPixel); err != nil { 352 return nil, err 353 } 354 355 if err == io.EOF { 356 break 357 } 358 359 pr, cr = cr, pr 360 } 361 362 if maxLen < 0 && b.Len()%rowSize > 0 { 363 log.Info.Printf("failed postprocessing: %d %d\n", b.Len(), rowSize) 364 return nil, errors.New("pdfcpu: filter FlateDecode: postprocessing failed") 365 } 366 367 return &b, nil 368 }