github.com/whamcloud/lemur@v0.0.0-20190827193804-4655df8a52af/pkg/zipcheck/analyze.go (about)

     1  package zipcheck
     2  
     3  // Evaluate the "compressibilty" of a file by compressing
     4  // a small sample.
     5  
     6  import (
     7  	"compress/zlib"
     8  	"io"
     9  	"math"
    10  	"os"
    11  	"time"
    12  
    13  	"github.com/pkg/errors"
    14  )
    15  
    16  // CompressResult returns resuls of comressibility check.
    17  type CompressResult struct {
    18  	T        time.Duration
    19  	Samples  int
    20  	Size     int64
    21  	Bytes    int64
    22  	ZipBytes int64
    23  }
    24  
    25  // Null is a /dev/null Writer that counts how many bytes have been written to it.
    26  type Null struct {
    27  	Bytes int64
    28  }
    29  
    30  func (n *Null) Write(b []byte) (int, error) {
    31  	n.Bytes += int64(len(b))
    32  	return len(b), nil
    33  }
    34  
    35  // SampleFile reads count blocks of blockSize from fp, and copies them to w.
    36  func SampleFile(w io.Writer, fp io.ReaderAt, count int, blockSize int64, step int64) (int64, error) {
    37  	var offset int64
    38  	var copied int64
    39  	for i := 0; i < count; i++ {
    40  		r := io.NewSectionReader(fp, offset, blockSize)
    41  		nb, err := io.Copy(w, r)
    42  		if err != nil {
    43  			return copied, errors.Wrap(err, "copy failed")
    44  		}
    45  		copied += nb
    46  		offset += step
    47  
    48  	}
    49  	return copied, nil
    50  }
    51  
    52  func analyze(fname string, count int, block int64, zipper zipFunc) (*CompressResult, error) {
    53  	var cr CompressResult
    54  
    55  	f, err := os.Open(fname)
    56  	if err != nil {
    57  		return nil, errors.Wrap(err, "open failed")
    58  	}
    59  	defer f.Close()
    60  
    61  	fi, err := f.Stat()
    62  	if err != nil {
    63  		return nil, errors.Wrap(err, "stat failed")
    64  	}
    65  	if count == 0 {
    66  		// default is 2*log(size) smaples for a quick scan
    67  		count = 2 * int(math.Log(float64(fi.Size())))
    68  	}
    69  	null := &Null{}
    70  	w, err := zipper(null)
    71  	if err != nil {
    72  		return nil, errors.Wrap(err, "create compressor failed")
    73  	}
    74  
    75  	// Compress entire file it is smaller than the total sample size
    76  	if fi.Size() < int64(count)*block {
    77  		block = fi.Size()
    78  		count = 1
    79  	}
    80  
    81  	step := fi.Size() / int64(count)
    82  	started := time.Now()
    83  	cr.Bytes, err = SampleFile(w, f, count, block, step)
    84  	w.Close()
    85  	if err != nil {
    86  		return nil, errors.Wrap(err, "sample failed")
    87  	}
    88  
    89  	cr.Samples = count
    90  	cr.Size = block
    91  	cr.T = time.Since(started)
    92  	cr.ZipBytes = null.Bytes
    93  	return &cr, nil
    94  }
    95  
    96  type zipFunc func(io.Writer) (io.WriteCloser, error)
    97  
    98  func gzip(level int) zipFunc {
    99  	return func(w io.Writer) (io.WriteCloser, error) {
   100  		zip, err := zlib.NewWriterLevel(w, level)
   101  		if err != nil {
   102  			return nil, errors.Wrap(err, "NewWriterLevel")
   103  		}
   104  		return zip, nil
   105  	}
   106  }
   107  
   108  // AnalyzeFile will compress a sample of if the file and return estimated reduction percentage.
   109  // 0 means no reduction, 50% means file might be resuduced to half.
   110  func AnalyzeFile(fname string) (float64, error) {
   111  	cr, err := analyze(fname, 0, 4096, gzip(1))
   112  	if err != nil {
   113  		return 0, errors.Wrap(err, "analayze failed")
   114  	}
   115  	reduced := (1 - float64(cr.ZipBytes)/float64(cr.Bytes)) * 100
   116  	return reduced, nil
   117  }