github.com/pdfcpu/pdfcpu@v0.11.1/pkg/filter/flateDecode.go (about)

     1  /*
     2  Copyright 2018 The pdfcpu Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package filter
    18  
    19  import (
    20  	"bytes"
    21  	"compress/zlib"
    22  	"io"
    23  	"strings"
    24  
    25  	"github.com/pdfcpu/pdfcpu/pkg/log"
    26  	"github.com/pkg/errors"
    27  )
    28  
    29  // Portions of this code are based on ideas of image/png: reader.go:readImagePass
    30  // PNG is documented here: www.w3.org/TR/PNG-Filters.html
    31  
    32  // PDF allows a prediction step prior to compression applying TIFF or PNG prediction.
    33  // Predictor algorithm.
    34  const (
    35  	PredictorNo      = 1  // No prediction.
    36  	PredictorTIFF    = 2  // Use TIFF prediction for all rows.
    37  	PredictorNone    = 10 // Use PNGNone for all rows.
    38  	PredictorSub     = 11 // Use PNGSub for all rows.
    39  	PredictorUp      = 12 // Use PNGUp for all rows.
    40  	PredictorAverage = 13 // Use PNGAverage for all rows.
    41  	PredictorPaeth   = 14 // Use PNGPaeth for all rows.
    42  	PredictorOptimum = 15 // Use the optimum PNG prediction for each row.
    43  )
    44  
    45  // For predictor > 2 PNG filters (see RFC 2083) get applied and the first byte of each pixelrow defines
    46  // the prediction algorithm used for all pixels of this row.
    47  const (
    48  	PNGNone    = 0x00
    49  	PNGSub     = 0x01
    50  	PNGUp      = 0x02
    51  	PNGAverage = 0x03
    52  	PNGPaeth   = 0x04
    53  )
    54  
    55  type flate struct {
    56  	baseFilter
    57  }
    58  
    59  // Encode implements encoding for a Flate filter.
    60  func (f flate) Encode(r io.Reader) (io.Reader, error) {
    61  	if log.TraceEnabled() {
    62  		log.Trace.Println("EncodeFlate begin")
    63  	}
    64  
    65  	// TODO Optional decode parameters may need predictor preprocessing.
    66  
    67  	var b bytes.Buffer
    68  	w := zlib.NewWriter(&b)
    69  	defer w.Close()
    70  
    71  	written, err := io.Copy(w, r)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  
    76  	if log.TraceEnabled() {
    77  		log.Trace.Printf("EncodeFlate end: %d bytes written\n", written)
    78  	}
    79  
    80  	return &b, nil
    81  }
    82  
    83  // Decode implements decoding for a Flate filter.
    84  func (f flate) Decode(r io.Reader) (io.Reader, error) {
    85  	return f.DecodeLength(r, -1)
    86  }
    87  
    88  func (f flate) DecodeLength(r io.Reader, maxLen int64) (io.Reader, error) {
    89  	if log.TraceEnabled() {
    90  		log.Trace.Println("DecodeFlate begin")
    91  	}
    92  
    93  	rc, err := zlib.NewReader(r)
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  	defer rc.Close()
    98  
    99  	// Optional decode parameters need postprocessing.
   100  	return f.decodePostProcess(rc, maxLen)
   101  }
   102  
   103  func passThru(rin io.Reader, maxLen int64) (*bytes.Buffer, error) {
   104  	var b bytes.Buffer
   105  	var err error
   106  	if maxLen < 0 {
   107  		_, err = io.Copy(&b, rin)
   108  	} else {
   109  		_, err = io.CopyN(&b, rin, maxLen)
   110  	}
   111  	if err != nil && strings.Contains(err.Error(), "invalid checksum") {
   112  		if log.CLIEnabled() {
   113  			log.CLI.Println("skipped: truncated zlib stream")
   114  		}
   115  		err = nil
   116  	}
   117  	if err == io.ErrUnexpectedEOF {
   118  		// Workaround for missing support for partial flush in compress/flate.
   119  		// See also https://github.com/golang/go/issues/31514
   120  		if log.ReadEnabled() {
   121  			log.Read.Println("flateDecode: ignoring unexpected EOF")
   122  		}
   123  		err = nil
   124  	}
   125  	return &b, err
   126  }
   127  
   128  func intMemberOf(i int, list []int) bool {
   129  	for _, v := range list {
   130  		if i == v {
   131  			return true
   132  		}
   133  	}
   134  	return false
   135  }
   136  
   137  // Each prediction value implies (a) certain row filter(s).
   138  // func validateRowFilter(f, p int) error {
   139  
   140  // 	switch p {
   141  
   142  // 	case PredictorNone:
   143  // 		if !intMemberOf(f, []int{PNGNone, PNGSub, PNGUp, PNGAverage, PNGPaeth}) {
   144  // 			return errors.Errorf("pdfcpu: validateRowFilter: PredictorOptimum, unexpected row filter #%02x", f)
   145  // 		}
   146  // 		// if f != PNGNone {
   147  // 		// 	return errors.Errorf("validateRowFilter: expected row filter #%02x, got: #%02x", PNGNone, f)
   148  // 		// }
   149  
   150  // 	case PredictorSub:
   151  // 		if f != PNGSub {
   152  // 			return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGSub, f)
   153  // 		}
   154  
   155  // 	case PredictorUp:
   156  // 		if f != PNGUp {
   157  // 			return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGUp, f)
   158  // 		}
   159  
   160  // 	case PredictorAverage:
   161  // 		if f != PNGAverage {
   162  // 			return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGAverage, f)
   163  // 		}
   164  
   165  // 	case PredictorPaeth:
   166  // 		if f != PNGPaeth {
   167  // 			return errors.Errorf("pdfcpu: validateRowFilter: expected row filter #%02x, got: #%02x", PNGPaeth, f)
   168  // 		}
   169  
   170  // 	case PredictorOptimum:
   171  // 		if !intMemberOf(f, []int{PNGNone, PNGSub, PNGUp, PNGAverage, PNGPaeth}) {
   172  // 			return errors.Errorf("pdfcpu: validateRowFilter: PredictorOptimum, unexpected row filter #%02x", f)
   173  // 		}
   174  
   175  // 	default:
   176  // 		return errors.Errorf("pdfcpu: validateRowFilter: unexpected predictor #%02x", p)
   177  
   178  // 	}
   179  
   180  // 	return nil
   181  // }
   182  
   183  func applyHorDiff(row []byte, colors int) ([]byte, error) {
   184  	// This works for 8 bits per color only.
   185  	for i := 1; i < len(row)/colors; i++ {
   186  		for j := 0; j < colors; j++ {
   187  			row[i*colors+j] += row[(i-1)*colors+j]
   188  		}
   189  	}
   190  	return row, nil
   191  }
   192  
   193  func processRow(pr, cr []byte, p, colors, bytesPerPixel int) ([]byte, error) {
   194  	//fmt.Printf("pr(%v) =\n%s\n", &pr, hex.Dump(pr))
   195  	//fmt.Printf("cr(%v) =\n%s\n", &cr, hex.Dump(cr))
   196  
   197  	if p == PredictorTIFF {
   198  		return applyHorDiff(cr, colors)
   199  	}
   200  
   201  	// Apply the filter.
   202  	cdat := cr[1:]
   203  	pdat := pr[1:]
   204  
   205  	// Get row filter from 1st byte
   206  	f := int(cr[0])
   207  
   208  	// The value of Predictor supplied by the decoding filter need not match the value
   209  	// used when the data was encoded if they are both greater than or equal to 10.
   210  
   211  	switch f {
   212  
   213  	case PNGNone:
   214  		// No operation.
   215  
   216  	case PNGSub:
   217  		for i := bytesPerPixel; i < len(cdat); i++ {
   218  			cdat[i] += cdat[i-bytesPerPixel]
   219  		}
   220  
   221  	case PNGUp:
   222  		for i, p := range pdat {
   223  			cdat[i] += p
   224  		}
   225  
   226  	case PNGAverage:
   227  		// The average of the two neighboring pixels (left and above).
   228  		// Raw(x) - floor((Raw(x-bpp)+Prior(x))/2)
   229  		for i := 0; i < bytesPerPixel; i++ {
   230  			cdat[i] += pdat[i] / 2
   231  		}
   232  		for i := bytesPerPixel; i < len(cdat); i++ {
   233  			cdat[i] += uint8((int(cdat[i-bytesPerPixel]) + int(pdat[i])) / 2)
   234  		}
   235  
   236  	case PNGPaeth:
   237  		filterPaeth(cdat, pdat, bytesPerPixel)
   238  
   239  	}
   240  
   241  	return cdat, nil
   242  }
   243  
   244  func (f flate) parameters() (colors, bpc, columns int, err error) {
   245  	// Colors, int
   246  	// The number of interleaved colour components per sample.
   247  	// Valid values are 1 to 4 (PDF 1.0) and 1 or greater (PDF 1.3). Default value: 1.
   248  	// Used by PredictorTIFF only.
   249  	colors, found := f.parms["Colors"]
   250  	if !found {
   251  		colors = 1
   252  	} else if colors == 0 {
   253  		return 0, 0, 0, errors.Errorf("pdfcpu: filter FlateDecode: \"Colors\" must be > 0")
   254  	}
   255  
   256  	// BitsPerComponent, int
   257  	// The number of bits used to represent each colour component in a sample.
   258  	// Valid values are 1, 2, 4, 8, and (PDF 1.5) 16. Default value: 8.
   259  	// Used by PredictorTIFF only.
   260  	bpc, found = f.parms["BitsPerComponent"]
   261  	if !found {
   262  		bpc = 8
   263  	} else if !intMemberOf(bpc, []int{1, 2, 4, 8, 16}) {
   264  		return 0, 0, 0, errors.Errorf("pdfcpu: filter FlateDecode: Unexpected \"BitsPerComponent\": %d", bpc)
   265  	}
   266  
   267  	// Columns, int
   268  	// The number of samples in each row. Default value: 1.
   269  	columns, found = f.parms["Columns"]
   270  	if !found {
   271  		columns = 1
   272  	}
   273  
   274  	return colors, bpc, columns, nil
   275  }
   276  
   277  func checkBufLen(b bytes.Buffer, maxLen int64) bool {
   278  	return maxLen < 0 || int64(b.Len()) < maxLen
   279  }
   280  
   281  func process(w io.Writer, pr, cr []byte, predictor, colors, bytesPerPixel int) error {
   282  	d, err := processRow(pr, cr, predictor, colors, bytesPerPixel)
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	_, err = w.Write(d)
   288  
   289  	return err
   290  }
   291  
   292  // decodePostProcess
   293  func (f flate) decodePostProcess(r io.Reader, maxLen int64) (io.Reader, error) {
   294  	predictor, found := f.parms["Predictor"]
   295  	if !found || predictor == PredictorNo {
   296  		return passThru(r, maxLen)
   297  	}
   298  
   299  	if !intMemberOf(
   300  		predictor,
   301  		[]int{PredictorTIFF,
   302  			PredictorNone,
   303  			PredictorSub,
   304  			PredictorUp,
   305  			PredictorAverage,
   306  			PredictorPaeth,
   307  			PredictorOptimum,
   308  		}) {
   309  		return nil, errors.Errorf("pdfcpu: filter FlateDecode: undefined \"Predictor\" %d", predictor)
   310  	}
   311  
   312  	colors, bpc, columns, err := f.parameters()
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	bytesPerPixel := (bpc*colors + 7) / 8
   318  	rowSize := (bpc*colors*columns + 7) / 8
   319  
   320  	m := rowSize
   321  	if predictor != PredictorTIFF {
   322  		// PNG prediction uses a row filter byte prefixing the pixelbytes of a row.
   323  		m++
   324  	}
   325  
   326  	// cr and pr are the bytes for the current and previous row.
   327  	cr := make([]byte, m)
   328  	pr := make([]byte, m)
   329  
   330  	// Output buffer
   331  	var b bytes.Buffer
   332  
   333  	for checkBufLen(b, maxLen) {
   334  
   335  		// Read decompressed bytes for one pixel row.
   336  		n, err := io.ReadFull(r, cr)
   337  		if err != nil {
   338  			if err != io.EOF {
   339  				return nil, err
   340  			}
   341  			// eof
   342  			if n == 0 {
   343  				break
   344  			}
   345  		}
   346  
   347  		if n != m {
   348  			return nil, errors.Errorf("pdfcpu: filter FlateDecode: read error, expected %d bytes, got: %d", m, n)
   349  		}
   350  
   351  		if err := process(&b, pr, cr, predictor, colors, bytesPerPixel); err != nil {
   352  			return nil, err
   353  		}
   354  
   355  		if err == io.EOF {
   356  			break
   357  		}
   358  
   359  		pr, cr = cr, pr
   360  	}
   361  
   362  	if maxLen < 0 && b.Len()%rowSize > 0 {
   363  		log.Info.Printf("failed postprocessing: %d %d\n", b.Len(), rowSize)
   364  		return nil, errors.New("pdfcpu: filter FlateDecode: postprocessing failed")
   365  	}
   366  
   367  	return &b, nil
   368  }