github.com/sabhiram/gsync@v0.0.0-20180214150410-b9089a1b7f13/gsync_client.go (about) 1 // This Source Code Form is subject to the terms of the Mozilla Public 2 // License, version 2.0. If a copy of the MPL was not distributed with this 3 // file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 package gsync 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/sha256" 11 "fmt" 12 "hash" 13 "io" 14 15 "github.com/pkg/errors" 16 ) 17 18 // LookUpTable reads up blocks signatures and builds a lookup table for the client to search from when trying to decide 19 // wether to send or not a block of data. 20 func LookUpTable(ctx context.Context, bc <-chan BlockSignature) (map[uint32][]BlockSignature, error) { 21 table := make(map[uint32][]BlockSignature) 22 for c := range bc { 23 select { 24 case <-ctx.Done(): 25 return table, errors.Wrapf(ctx.Err(), "failed building lookup table") 26 default: 27 break 28 } 29 30 if c.Error != nil { 31 fmt.Printf("gsync: checksum error: %#v\n", c.Error) 32 continue 33 } 34 table[c.Weak] = append(table[c.Weak], c) 35 } 36 37 return table, nil 38 } 39 40 // Sync sends tokens or literal bytes to the caller in order to efficiently re-construct a remote file. Whether to send 41 // tokens or literals is determined by the remote checksums provided by the caller. 42 // This function does not block and returns immediately. Also, the remote blocks map is accessed without a mutex, 43 // so this function is expected to be called once the remote blocks map is fully populated. 44 // 45 // The caller must make sure the concrete reader instance is not nil or this function will panic. 46 func Sync(ctx context.Context, r io.ReaderAt, shash hash.Hash, remote map[uint32][]BlockSignature) (<-chan BlockOperation, error) { 47 if r == nil { 48 return nil, errors.New("gsync: reader required") 49 } 50 51 o := make(chan BlockOperation) 52 53 if shash == nil { 54 shash = sha256.New() 55 } 56 57 go func() { 58 var ( 59 r1, r2, rhash, old uint32 60 offset int64 61 rolling, match bool 62 ) 63 64 delta := make([]byte, 0) 65 66 defer func() { 67 close(o) 68 }() 69 70 for { 71 // Allow for cancellation. 72 select { 73 case <-ctx.Done(): 74 o <- BlockOperation{ 75 Error: ctx.Err(), 76 } 77 return 78 default: 79 break 80 } 81 82 bfp := bufferPool.Get().(*[]byte) 83 buffer := *bfp 84 85 n, err := r.ReadAt(buffer, offset) 86 if err != nil && err != io.EOF { 87 o <- BlockOperation{ 88 Error: errors.Wrapf(err, "failed reading data block"), 89 } 90 bufferPool.Put(bfp) 91 92 // return since data corruption in the server is possible and a re-sync is required. 93 return 94 } 95 96 block := buffer[:n] 97 98 // If there are no block signatures from remote server, send all data blocks 99 if len(remote) == 0 { 100 if n > 0 { 101 o <- BlockOperation{Data: block} 102 offset += int64(n) 103 } 104 105 if err == io.EOF { 106 bufferPool.Put(bfp) 107 return 108 } 109 continue 110 } 111 112 if rolling { 113 new := uint32(block[n-1]) 114 r1, r2, rhash = rollingHash2(uint32(n), r1, r2, old, new) 115 } else { 116 r1, r2, rhash = rollingHash(block) 117 } 118 119 if bs, ok := remote[rhash]; ok { 120 shash.Reset() 121 shash.Write(block) 122 s := shash.Sum(nil) 123 124 for _, b := range bs { 125 if !bytes.Equal(s, b.Strong) { 126 continue 127 } 128 129 match = true 130 131 // We need to send deltas before sending an index token. 132 if len(delta) > 0 { 133 send(ctx, bytes.NewReader(delta), o) 134 delta = make([]byte, 0) 135 } 136 137 // instructs the server to copy block data at offset b.Index 138 // from its own copy of the file. 139 o <- BlockOperation{Index: b.Index} 140 break 141 } 142 } 143 144 if match { 145 if err == io.EOF { 146 bufferPool.Put(bfp) 147 break 148 } 149 150 rolling, match = false, false 151 old, rhash, r1, r2 = 0, 0, 0, 0 152 offset += int64(n) 153 } else { 154 if err == io.EOF { 155 // If EOF is reached and not match data found, we add trailing data 156 // to delta array. 157 delta = append(delta, block...) 158 if len(delta) > 0 { 159 send(ctx, bytes.NewReader(delta), o) 160 } 161 bufferPool.Put(bfp) 162 break 163 } 164 rolling = true 165 old = uint32(block[0]) 166 delta = append(delta, block[0]) 167 offset++ 168 } 169 170 // Returning this buffer to the pool here gives us 5x more speed 171 bufferPool.Put(bfp) 172 } 173 }() 174 175 return o, nil 176 } 177 178 // send sends all deltas over the channel. Any error is reported back using the 179 // same channel. 180 func send(ctx context.Context, r io.Reader, o chan<- BlockOperation) { 181 for { 182 // Allow for cancellation. 183 select { 184 case <-ctx.Done(): 185 o <- BlockOperation{ 186 Error: ctx.Err(), 187 } 188 return 189 default: 190 // break out of the select block and continue reading 191 break 192 } 193 194 bfp := bufferPool.Get().(*[]byte) 195 buffer := *bfp 196 defer bufferPool.Put(bfp) 197 198 n, err := r.Read(buffer) 199 if err != nil && err != io.EOF { 200 o <- BlockOperation{ 201 Error: errors.Wrapf(err, "failed reading data block"), 202 } 203 return 204 } 205 206 // If we don't guard against 0 bytes reads, an operation with index 0 will be sent 207 // and the server will duplicate block 0 at the end of the reconstructed file. 208 if n > 0 { 209 block := buffer[:n] 210 o <- BlockOperation{Data: block} 211 } 212 213 if err == io.EOF { 214 break 215 } 216 } 217 }