github.com/zignig/go-ipfs@v0.0.0-20141111235910-c9e5fdf55a52/importer/chunk/rabin.go (about) 1 package chunk 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "math" 9 ) 10 11 type MaybeRabin struct { 12 mask int 13 windowSize int 14 MinBlockSize int 15 MaxBlockSize int 16 } 17 18 func NewMaybeRabin(avgBlkSize int) *MaybeRabin { 19 blkbits := uint(math.Log2(float64(avgBlkSize))) 20 rb := new(MaybeRabin) 21 rb.mask = (1 << blkbits) - 1 22 rb.windowSize = 16 // probably a good number... 23 rb.MinBlockSize = avgBlkSize / 2 24 rb.MaxBlockSize = (avgBlkSize / 2) * 3 25 return rb 26 } 27 28 func (mr *MaybeRabin) Split(r io.Reader) chan []byte { 29 out := make(chan []byte, 16) 30 go func() { 31 inbuf := bufio.NewReader(r) 32 blkbuf := new(bytes.Buffer) 33 34 // some bullshit numbers i made up 35 a := 10 // honestly, no idea what this is 36 MOD := 33554383 // randomly chosen (seriously) 37 an := 1 38 rollingHash := 0 39 40 // Window is a circular buffer 41 window := make([]byte, mr.windowSize) 42 push := func(i int, val byte) (outval int) { 43 outval = int(window[i%len(window)]) 44 window[i%len(window)] = val 45 return 46 } 47 48 // Duplicate byte slice 49 dup := func(b []byte) []byte { 50 d := make([]byte, len(b)) 51 copy(d, b) 52 return d 53 } 54 55 // Fill up the window 56 i := 0 57 for ; i < mr.windowSize; i++ { 58 b, err := inbuf.ReadByte() 59 if err != nil { 60 fmt.Println(err) 61 return 62 } 63 blkbuf.WriteByte(b) 64 push(i, b) 65 rollingHash = (rollingHash*a + int(b)) % MOD 66 an = (an * a) % MOD 67 } 68 69 for ; true; i++ { 70 b, err := inbuf.ReadByte() 71 if err != nil { 72 break 73 } 74 outval := push(i, b) 75 blkbuf.WriteByte(b) 76 rollingHash = (rollingHash*a + int(b) - an*outval) % MOD 77 if (rollingHash&mr.mask == mr.mask && blkbuf.Len() > mr.MinBlockSize) || 78 blkbuf.Len() >= mr.MaxBlockSize { 79 out <- dup(blkbuf.Bytes()) 80 blkbuf.Reset() 81 } 82 83 // Check if there are enough remaining 84 peek, err := inbuf.Peek(mr.windowSize) 85 if err != nil || len(peek) != mr.windowSize { 86 break 87 } 88 } 89 io.Copy(blkbuf, inbuf) 90 out <- blkbuf.Bytes() 91 close(out) 92 }() 93 return out 94 }