github.com/influxdata/telegraf@v1.30.3/internal/content_coding.go (about) 1 package internal 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 10 "github.com/klauspost/compress/gzip" 11 "github.com/klauspost/compress/zlib" 12 "github.com/klauspost/compress/zstd" 13 "github.com/klauspost/pgzip" 14 ) 15 16 const defaultMaxDecompressionSize int64 = 500 * 1024 * 1024 //500MB 17 18 // DecodingOption provide methods to change the decoding from the standard 19 // configuration. 20 type DecodingOption func(*decoderConfig) 21 22 type decoderConfig struct { 23 maxDecompressionSize int64 24 } 25 26 func WithMaxDecompressionSize(maxDecompressionSize int64) DecodingOption { 27 return func(cfg *decoderConfig) { 28 cfg.maxDecompressionSize = maxDecompressionSize 29 } 30 } 31 32 type encoderConfig struct { 33 level int 34 } 35 36 // EncodingOption provide methods to change the encoding from the standard 37 // configuration. 38 type EncodingOption func(*encoderConfig) 39 40 func WithCompressionLevel(level int) EncodingOption { 41 return func(cfg *encoderConfig) { 42 cfg.level = level 43 } 44 } 45 46 // NewStreamContentDecoder returns a reader that will decode the stream 47 // according to the encoding type. 48 func NewStreamContentDecoder(encoding string, r io.Reader) (io.Reader, error) { 49 switch encoding { 50 case "gzip": 51 return NewGzipReader(r) 52 case "identity", "": 53 return r, nil 54 default: 55 return nil, errors.New("invalid value for content_encoding") 56 } 57 } 58 59 // GzipReader is similar to gzip.Reader but reads only a single gzip stream per read. 60 type GzipReader struct { 61 r io.Reader 62 z *pgzip.Reader 63 endOfStream bool 64 } 65 66 func NewGzipReader(r io.Reader) (io.Reader, error) { 67 // We need a read that implements ByteReader in order to line up the next 68 // stream. 69 br := bufio.NewReader(r) 70 71 // Reads the first gzip stream header. 72 z, err := pgzip.NewReader(br) 73 if err != nil { 74 return nil, err 75 } 76 77 // Prevent future calls to Read from reading the following gzip header. 78 z.Multistream(false) 79 80 return &GzipReader{r: br, z: z}, nil 81 } 82 83 func (r *GzipReader) Read(b []byte) (int, error) { 84 if r.endOfStream { 85 // Reads the next gzip header and prepares for the next stream. 86 err := r.z.Reset(r.r) 87 if err != nil { 88 return 0, err 89 } 90 r.z.Multistream(false) 91 r.endOfStream = false 92 } 93 94 n, err := r.z.Read(b) 95 96 // Since multistream is disabled, io.EOF indicates the end of the gzip 97 // sequence. On the next read we must read the next gzip header. 98 if errors.Is(err, io.EOF) { 99 r.endOfStream = true 100 return n, nil 101 } 102 return n, err 103 } 104 105 // NewContentEncoder returns a ContentEncoder for the encoding type. 106 func NewContentEncoder(encoding string, options ...EncodingOption) (ContentEncoder, error) { 107 switch encoding { 108 case "gzip": 109 return NewGzipEncoder(options...) 110 case "identity", "": 111 return NewIdentityEncoder(options...) 112 case "zlib": 113 return NewZlibEncoder(options...) 114 case "zstd": 115 return NewZstdEncoder(options...) 116 default: 117 return nil, errors.New("invalid value for content_encoding") 118 } 119 } 120 121 type AutoDecoder struct { 122 encoding string 123 gzip *GzipDecoder 124 identity *IdentityDecoder 125 } 126 127 func (a *AutoDecoder) SetEncoding(encoding string) { 128 a.encoding = encoding 129 } 130 131 func (a *AutoDecoder) Decode(data []byte) ([]byte, error) { 132 if a.encoding == "gzip" { 133 return a.gzip.Decode(data) 134 } 135 return a.identity.Decode(data) 136 } 137 138 func NewAutoContentDecoder(options ...DecodingOption) *AutoDecoder { 139 var a AutoDecoder 140 141 a.identity = NewIdentityDecoder(options...) 142 a.gzip = NewGzipDecoder(options...) 143 return &a 144 } 145 146 // NewContentDecoder returns a ContentDecoder for the encoding type. 147 func NewContentDecoder(encoding string, options ...DecodingOption) (ContentDecoder, error) { 148 switch encoding { 149 case "auto": 150 return NewAutoContentDecoder(options...), nil 151 case "gzip": 152 return NewGzipDecoder(options...), nil 153 case "identity", "": 154 return NewIdentityDecoder(options...), nil 155 case "zlib": 156 return NewZlibDecoder(options...), nil 157 case "zstd": 158 return NewZstdDecoder(options...) 159 default: 160 return nil, errors.New("invalid value for content_encoding") 161 } 162 } 163 164 // ContentEncoder applies a wrapper encoding to byte buffers. 165 type ContentEncoder interface { 166 Encode([]byte) ([]byte, error) 167 } 168 169 // GzipEncoder compresses the buffer using gzip at the default level. 170 type GzipEncoder struct { 171 pwriter *pgzip.Writer 172 writer *gzip.Writer 173 buf *bytes.Buffer 174 } 175 176 func NewGzipEncoder(options ...EncodingOption) (*GzipEncoder, error) { 177 cfg := encoderConfig{level: gzip.DefaultCompression} 178 for _, o := range options { 179 o(&cfg) 180 } 181 182 // Check if the compression level is supported 183 switch cfg.level { 184 case gzip.NoCompression, gzip.DefaultCompression, gzip.BestSpeed, gzip.BestCompression: 185 // Do nothing as those are valid levels 186 default: 187 return nil, errors.New("invalid compression level, only 0, 1 and 9 are supported") 188 } 189 190 var buf bytes.Buffer 191 pw, err := pgzip.NewWriterLevel(&buf, cfg.level) 192 if err != nil { 193 return nil, err 194 } 195 196 w, err := gzip.NewWriterLevel(&buf, cfg.level) 197 return &GzipEncoder{ 198 pwriter: pw, 199 writer: w, 200 buf: &buf, 201 }, err 202 } 203 204 func (e *GzipEncoder) Encode(data []byte) ([]byte, error) { 205 // Parallel Gzip is only faster for larger data chunks. According to the 206 // project's documentation the trade-off size is at about 1MB, so we switch 207 // to parallel Gzip if the data is larger and run the built-in version 208 // otherwise. 209 if len(data) > 1024*1024 { 210 return e.encodeBig(data) 211 } 212 return e.encodeSmall(data) 213 } 214 215 func (e *GzipEncoder) encodeSmall(data []byte) ([]byte, error) { 216 e.buf.Reset() 217 e.writer.Reset(e.buf) 218 219 _, err := e.writer.Write(data) 220 if err != nil { 221 return nil, err 222 } 223 err = e.writer.Close() 224 if err != nil { 225 return nil, err 226 } 227 return e.buf.Bytes(), nil 228 } 229 230 func (e *GzipEncoder) encodeBig(data []byte) ([]byte, error) { 231 e.buf.Reset() 232 e.pwriter.Reset(e.buf) 233 234 _, err := e.pwriter.Write(data) 235 if err != nil { 236 return nil, err 237 } 238 err = e.pwriter.Close() 239 if err != nil { 240 return nil, err 241 } 242 return e.buf.Bytes(), nil 243 } 244 245 type ZlibEncoder struct { 246 writer *zlib.Writer 247 buf *bytes.Buffer 248 } 249 250 func NewZlibEncoder(options ...EncodingOption) (*ZlibEncoder, error) { 251 cfg := encoderConfig{level: zlib.DefaultCompression} 252 for _, o := range options { 253 o(&cfg) 254 } 255 256 switch cfg.level { 257 case zlib.NoCompression, zlib.DefaultCompression, zlib.BestSpeed, zlib.BestCompression: 258 // Do nothing as those are valid levels 259 default: 260 return nil, errors.New("invalid compression level, only 0, 1 and 9 are supported") 261 } 262 263 var buf bytes.Buffer 264 w, err := zlib.NewWriterLevel(&buf, cfg.level) 265 return &ZlibEncoder{ 266 writer: w, 267 buf: &buf, 268 }, err 269 } 270 271 func (e *ZlibEncoder) Encode(data []byte) ([]byte, error) { 272 e.buf.Reset() 273 e.writer.Reset(e.buf) 274 275 _, err := e.writer.Write(data) 276 if err != nil { 277 return nil, err 278 } 279 err = e.writer.Close() 280 if err != nil { 281 return nil, err 282 } 283 return e.buf.Bytes(), nil 284 } 285 286 type ZstdEncoder struct { 287 encoder *zstd.Encoder 288 } 289 290 func NewZstdEncoder(options ...EncodingOption) (*ZstdEncoder, error) { 291 cfg := encoderConfig{level: 3} 292 for _, o := range options { 293 o(&cfg) 294 } 295 296 // Map the levels 297 var level zstd.EncoderLevel 298 switch cfg.level { 299 case 1: 300 level = zstd.SpeedFastest 301 case 3: 302 level = zstd.SpeedDefault 303 case 7: 304 level = zstd.SpeedBetterCompression 305 case 11: 306 level = zstd.SpeedBestCompression 307 default: 308 return nil, errors.New("invalid compression level, only 1, 3, 7 and 11 are supported") 309 } 310 311 e, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(level)) 312 return &ZstdEncoder{ 313 encoder: e, 314 }, err 315 } 316 317 func (e *ZstdEncoder) Encode(data []byte) ([]byte, error) { 318 return e.encoder.EncodeAll(data, make([]byte, 0, len(data))), nil 319 } 320 321 // IdentityEncoder is a null encoder that applies no transformation. 322 type IdentityEncoder struct{} 323 324 func NewIdentityEncoder(options ...EncodingOption) (*IdentityEncoder, error) { 325 if len(options) > 0 { 326 return nil, errors.New("identity encoder does not support options") 327 } 328 329 return &IdentityEncoder{}, nil 330 } 331 332 func (*IdentityEncoder) Encode(data []byte) ([]byte, error) { 333 return data, nil 334 } 335 336 // ContentDecoder removes a wrapper encoding from byte buffers. 337 type ContentDecoder interface { 338 SetEncoding(string) 339 Decode([]byte) ([]byte, error) 340 } 341 342 // GzipDecoder decompresses buffers with gzip compression. 343 type GzipDecoder struct { 344 preader *pgzip.Reader 345 reader *gzip.Reader 346 buf *bytes.Buffer 347 maxDecompressionSize int64 348 } 349 350 func NewGzipDecoder(options ...DecodingOption) *GzipDecoder { 351 cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize} 352 for _, o := range options { 353 o(&cfg) 354 } 355 356 return &GzipDecoder{ 357 preader: new(pgzip.Reader), 358 reader: new(gzip.Reader), 359 buf: new(bytes.Buffer), 360 maxDecompressionSize: cfg.maxDecompressionSize, 361 } 362 } 363 364 func (*GzipDecoder) SetEncoding(string) {} 365 366 func (d *GzipDecoder) Decode(data []byte) ([]byte, error) { 367 // Parallel Gzip is only faster for larger data chunks. According to the 368 // project's documentation the trade-off size is at about 1MB, so we switch 369 // to parallel Gzip if the data is larger and run the built-in version 370 // otherwise. 371 if len(data) > 1024*1024 { 372 return d.decodeBig(data) 373 } 374 return d.decodeSmall(data) 375 } 376 377 func (d *GzipDecoder) decodeSmall(data []byte) ([]byte, error) { 378 err := d.reader.Reset(bytes.NewBuffer(data)) 379 if err != nil { 380 return nil, err 381 } 382 d.buf.Reset() 383 384 n, err := io.CopyN(d.buf, d.reader, d.maxDecompressionSize) 385 if err != nil && !errors.Is(err, io.EOF) { 386 return nil, err 387 } else if n == d.maxDecompressionSize { 388 return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize) 389 } 390 391 err = d.reader.Close() 392 if err != nil { 393 return nil, err 394 } 395 return d.buf.Bytes(), nil 396 } 397 398 func (d *GzipDecoder) decodeBig(data []byte) ([]byte, error) { 399 err := d.preader.Reset(bytes.NewBuffer(data)) 400 if err != nil { 401 return nil, err 402 } 403 d.buf.Reset() 404 405 n, err := io.CopyN(d.buf, d.preader, d.maxDecompressionSize) 406 if err != nil && !errors.Is(err, io.EOF) { 407 return nil, err 408 } else if n == d.maxDecompressionSize { 409 return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize) 410 } 411 412 err = d.preader.Close() 413 if err != nil { 414 return nil, err 415 } 416 return d.buf.Bytes(), nil 417 } 418 419 type ZlibDecoder struct { 420 buf *bytes.Buffer 421 maxDecompressionSize int64 422 } 423 424 func NewZlibDecoder(options ...DecodingOption) *ZlibDecoder { 425 cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize} 426 for _, o := range options { 427 o(&cfg) 428 } 429 430 return &ZlibDecoder{ 431 buf: new(bytes.Buffer), 432 maxDecompressionSize: cfg.maxDecompressionSize, 433 } 434 } 435 436 func (*ZlibDecoder) SetEncoding(string) {} 437 438 func (d *ZlibDecoder) Decode(data []byte) ([]byte, error) { 439 d.buf.Reset() 440 441 b := bytes.NewBuffer(data) 442 r, err := zlib.NewReader(b) 443 if err != nil { 444 return nil, err 445 } 446 447 n, err := io.CopyN(d.buf, r, d.maxDecompressionSize) 448 if err != nil && !errors.Is(err, io.EOF) { 449 return nil, err 450 } else if n == d.maxDecompressionSize { 451 return nil, fmt.Errorf("size of decoded data exceeds allowed size %d", d.maxDecompressionSize) 452 } 453 454 err = r.Close() 455 if err != nil { 456 return nil, err 457 } 458 return d.buf.Bytes(), nil 459 } 460 461 type ZstdDecoder struct { 462 decoder *zstd.Decoder 463 } 464 465 func NewZstdDecoder(options ...DecodingOption) (*ZstdDecoder, error) { 466 cfg := decoderConfig{maxDecompressionSize: defaultMaxDecompressionSize} 467 for _, o := range options { 468 o(&cfg) 469 } 470 471 d, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(0), zstd.WithDecoderMaxWindow(uint64(cfg.maxDecompressionSize))) 472 return &ZstdDecoder{ 473 decoder: d, 474 }, err 475 } 476 477 func (*ZstdDecoder) SetEncoding(string) {} 478 479 func (d *ZstdDecoder) Decode(data []byte) ([]byte, error) { 480 return d.decoder.DecodeAll(data, nil) 481 } 482 483 // IdentityDecoder is a null decoder that returns the input. 484 type IdentityDecoder struct { 485 } 486 487 func NewIdentityDecoder(_ ...DecodingOption) *IdentityDecoder { 488 return &IdentityDecoder{} 489 } 490 491 func (*IdentityDecoder) SetEncoding(string) {} 492 493 func (*IdentityDecoder) Decode(data []byte) ([]byte, error) { 494 return data, nil 495 }