github.com/whamcloud/lemur@v0.0.0-20190827193804-4655df8a52af/pkg/zipcheck/analyze.go (about) 1 package zipcheck 2 3 // Evaluate the "compressibilty" of a file by compressing 4 // a small sample. 5 6 import ( 7 "compress/zlib" 8 "io" 9 "math" 10 "os" 11 "time" 12 13 "github.com/pkg/errors" 14 ) 15 16 // CompressResult returns resuls of comressibility check. 17 type CompressResult struct { 18 T time.Duration 19 Samples int 20 Size int64 21 Bytes int64 22 ZipBytes int64 23 } 24 25 // Null is a /dev/null Writer that counts how many bytes have been written to it. 26 type Null struct { 27 Bytes int64 28 } 29 30 func (n *Null) Write(b []byte) (int, error) { 31 n.Bytes += int64(len(b)) 32 return len(b), nil 33 } 34 35 // SampleFile reads count blocks of blockSize from fp, and copies them to w. 36 func SampleFile(w io.Writer, fp io.ReaderAt, count int, blockSize int64, step int64) (int64, error) { 37 var offset int64 38 var copied int64 39 for i := 0; i < count; i++ { 40 r := io.NewSectionReader(fp, offset, blockSize) 41 nb, err := io.Copy(w, r) 42 if err != nil { 43 return copied, errors.Wrap(err, "copy failed") 44 } 45 copied += nb 46 offset += step 47 48 } 49 return copied, nil 50 } 51 52 func analyze(fname string, count int, block int64, zipper zipFunc) (*CompressResult, error) { 53 var cr CompressResult 54 55 f, err := os.Open(fname) 56 if err != nil { 57 return nil, errors.Wrap(err, "open failed") 58 } 59 defer f.Close() 60 61 fi, err := f.Stat() 62 if err != nil { 63 return nil, errors.Wrap(err, "stat failed") 64 } 65 if count == 0 { 66 // default is 2*log(size) smaples for a quick scan 67 count = 2 * int(math.Log(float64(fi.Size()))) 68 } 69 null := &Null{} 70 w, err := zipper(null) 71 if err != nil { 72 return nil, errors.Wrap(err, "create compressor failed") 73 } 74 75 // Compress entire file it is smaller than the total sample size 76 if fi.Size() < int64(count)*block { 77 block = fi.Size() 78 count = 1 79 } 80 81 step := fi.Size() / int64(count) 82 started := time.Now() 83 cr.Bytes, err = SampleFile(w, f, count, block, step) 84 w.Close() 85 if err != nil { 86 return nil, errors.Wrap(err, "sample failed") 87 } 88 89 cr.Samples = count 90 cr.Size = block 91 cr.T = time.Since(started) 92 cr.ZipBytes = null.Bytes 93 return &cr, nil 94 } 95 96 type zipFunc func(io.Writer) (io.WriteCloser, error) 97 98 func gzip(level int) zipFunc { 99 return func(w io.Writer) (io.WriteCloser, error) { 100 zip, err := zlib.NewWriterLevel(w, level) 101 if err != nil { 102 return nil, errors.Wrap(err, "NewWriterLevel") 103 } 104 return zip, nil 105 } 106 } 107 108 // AnalyzeFile will compress a sample of if the file and return estimated reduction percentage. 109 // 0 means no reduction, 50% means file might be resuduced to half. 110 func AnalyzeFile(fname string) (float64, error) { 111 cr, err := analyze(fname, 0, 4096, gzip(1)) 112 if err != nil { 113 return 0, errors.Wrap(err, "analayze failed") 114 } 115 reduced := (1 - float64(cr.ZipBytes)/float64(cr.Bytes)) * 100 116 return reduced, nil 117 }