github.com/zignig/go-ipfs@v0.0.0-20141111235910-c9e5fdf55a52/importer/chunk/rabin.go (about)

     1  package chunk
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"math"
     9  )
    10  
    11  type MaybeRabin struct {
    12  	mask         int
    13  	windowSize   int
    14  	MinBlockSize int
    15  	MaxBlockSize int
    16  }
    17  
    18  func NewMaybeRabin(avgBlkSize int) *MaybeRabin {
    19  	blkbits := uint(math.Log2(float64(avgBlkSize)))
    20  	rb := new(MaybeRabin)
    21  	rb.mask = (1 << blkbits) - 1
    22  	rb.windowSize = 16 // probably a good number...
    23  	rb.MinBlockSize = avgBlkSize / 2
    24  	rb.MaxBlockSize = (avgBlkSize / 2) * 3
    25  	return rb
    26  }
    27  
    28  func (mr *MaybeRabin) Split(r io.Reader) chan []byte {
    29  	out := make(chan []byte, 16)
    30  	go func() {
    31  		inbuf := bufio.NewReader(r)
    32  		blkbuf := new(bytes.Buffer)
    33  
    34  		// some bullshit numbers i made up
    35  		a := 10         // honestly, no idea what this is
    36  		MOD := 33554383 // randomly chosen (seriously)
    37  		an := 1
    38  		rollingHash := 0
    39  
    40  		// Window is a circular buffer
    41  		window := make([]byte, mr.windowSize)
    42  		push := func(i int, val byte) (outval int) {
    43  			outval = int(window[i%len(window)])
    44  			window[i%len(window)] = val
    45  			return
    46  		}
    47  
    48  		// Duplicate byte slice
    49  		dup := func(b []byte) []byte {
    50  			d := make([]byte, len(b))
    51  			copy(d, b)
    52  			return d
    53  		}
    54  
    55  		// Fill up the window
    56  		i := 0
    57  		for ; i < mr.windowSize; i++ {
    58  			b, err := inbuf.ReadByte()
    59  			if err != nil {
    60  				fmt.Println(err)
    61  				return
    62  			}
    63  			blkbuf.WriteByte(b)
    64  			push(i, b)
    65  			rollingHash = (rollingHash*a + int(b)) % MOD
    66  			an = (an * a) % MOD
    67  		}
    68  
    69  		for ; true; i++ {
    70  			b, err := inbuf.ReadByte()
    71  			if err != nil {
    72  				break
    73  			}
    74  			outval := push(i, b)
    75  			blkbuf.WriteByte(b)
    76  			rollingHash = (rollingHash*a + int(b) - an*outval) % MOD
    77  			if (rollingHash&mr.mask == mr.mask && blkbuf.Len() > mr.MinBlockSize) ||
    78  				blkbuf.Len() >= mr.MaxBlockSize {
    79  				out <- dup(blkbuf.Bytes())
    80  				blkbuf.Reset()
    81  			}
    82  
    83  			// Check if there are enough remaining
    84  			peek, err := inbuf.Peek(mr.windowSize)
    85  			if err != nil || len(peek) != mr.windowSize {
    86  				break
    87  			}
    88  		}
    89  		io.Copy(blkbuf, inbuf)
    90  		out <- blkbuf.Bytes()
    91  		close(out)
    92  	}()
    93  	return out
    94  }