github.com/sabhiram/gsync@v0.0.0-20180214150410-b9089a1b7f13/gsync_client.go (about)

     1  // This Source Code Form is subject to the terms of the Mozilla Public
     2  // License, version 2.0. If a copy of the MPL was not distributed with this
     3  // file, You can obtain one at http://mozilla.org/MPL/2.0/.
     4  
     5  package gsync
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/sha256"
    11  	"fmt"
    12  	"hash"
    13  	"io"
    14  
    15  	"github.com/pkg/errors"
    16  )
    17  
    18  // LookUpTable reads up blocks signatures and builds a lookup table for the client to search from when trying to decide
    19  // wether to send or not a block of data.
    20  func LookUpTable(ctx context.Context, bc <-chan BlockSignature) (map[uint32][]BlockSignature, error) {
    21  	table := make(map[uint32][]BlockSignature)
    22  	for c := range bc {
    23  		select {
    24  		case <-ctx.Done():
    25  			return table, errors.Wrapf(ctx.Err(), "failed building lookup table")
    26  		default:
    27  			break
    28  		}
    29  
    30  		if c.Error != nil {
    31  			fmt.Printf("gsync: checksum error: %#v\n", c.Error)
    32  			continue
    33  		}
    34  		table[c.Weak] = append(table[c.Weak], c)
    35  	}
    36  
    37  	return table, nil
    38  }
    39  
    40  // Sync sends tokens or literal bytes to the caller in order to efficiently re-construct a remote file. Whether to send
    41  // tokens or literals is determined by the remote checksums provided by the caller.
    42  // This function does not block and returns immediately. Also, the remote blocks map is accessed without a mutex,
    43  // so this function is expected to be called once the remote blocks map is fully populated.
    44  //
    45  // The caller must make sure the concrete reader instance is not nil or this function will panic.
    46  func Sync(ctx context.Context, r io.ReaderAt, shash hash.Hash, remote map[uint32][]BlockSignature) (<-chan BlockOperation, error) {
    47  	if r == nil {
    48  		return nil, errors.New("gsync: reader required")
    49  	}
    50  
    51  	o := make(chan BlockOperation)
    52  
    53  	if shash == nil {
    54  		shash = sha256.New()
    55  	}
    56  
    57  	go func() {
    58  		var (
    59  			r1, r2, rhash, old uint32
    60  			offset             int64
    61  			rolling, match     bool
    62  		)
    63  
    64  		delta := make([]byte, 0)
    65  
    66  		defer func() {
    67  			close(o)
    68  		}()
    69  
    70  		for {
    71  			// Allow for cancellation.
    72  			select {
    73  			case <-ctx.Done():
    74  				o <- BlockOperation{
    75  					Error: ctx.Err(),
    76  				}
    77  				return
    78  			default:
    79  				break
    80  			}
    81  
    82  			bfp := bufferPool.Get().(*[]byte)
    83  			buffer := *bfp
    84  
    85  			n, err := r.ReadAt(buffer, offset)
    86  			if err != nil && err != io.EOF {
    87  				o <- BlockOperation{
    88  					Error: errors.Wrapf(err, "failed reading data block"),
    89  				}
    90  				bufferPool.Put(bfp)
    91  
    92  				// return since data corruption in the server is possible and a re-sync is required.
    93  				return
    94  			}
    95  
    96  			block := buffer[:n]
    97  
    98  			// If there are no block signatures from remote server, send all data blocks
    99  			if len(remote) == 0 {
   100  				if n > 0 {
   101  					o <- BlockOperation{Data: block}
   102  					offset += int64(n)
   103  				}
   104  
   105  				if err == io.EOF {
   106  					bufferPool.Put(bfp)
   107  					return
   108  				}
   109  				continue
   110  			}
   111  
   112  			if rolling {
   113  				new := uint32(block[n-1])
   114  				r1, r2, rhash = rollingHash2(uint32(n), r1, r2, old, new)
   115  			} else {
   116  				r1, r2, rhash = rollingHash(block)
   117  			}
   118  
   119  			if bs, ok := remote[rhash]; ok {
   120  				shash.Reset()
   121  				shash.Write(block)
   122  				s := shash.Sum(nil)
   123  
   124  				for _, b := range bs {
   125  					if !bytes.Equal(s, b.Strong) {
   126  						continue
   127  					}
   128  
   129  					match = true
   130  
   131  					// We need to send deltas before sending an index token.
   132  					if len(delta) > 0 {
   133  						send(ctx, bytes.NewReader(delta), o)
   134  						delta = make([]byte, 0)
   135  					}
   136  
   137  					// instructs the server to copy block data at offset b.Index
   138  					// from its own copy of the file.
   139  					o <- BlockOperation{Index: b.Index}
   140  					break
   141  				}
   142  			}
   143  
   144  			if match {
   145  				if err == io.EOF {
   146  					bufferPool.Put(bfp)
   147  					break
   148  				}
   149  
   150  				rolling, match = false, false
   151  				old, rhash, r1, r2 = 0, 0, 0, 0
   152  				offset += int64(n)
   153  			} else {
   154  				if err == io.EOF {
   155  					// If EOF is reached and not match data found, we add trailing data
   156  					// to delta array.
   157  					delta = append(delta, block...)
   158  					if len(delta) > 0 {
   159  						send(ctx, bytes.NewReader(delta), o)
   160  					}
   161  					bufferPool.Put(bfp)
   162  					break
   163  				}
   164  				rolling = true
   165  				old = uint32(block[0])
   166  				delta = append(delta, block[0])
   167  				offset++
   168  			}
   169  
   170  			// Returning this buffer to the pool here gives us 5x more speed
   171  			bufferPool.Put(bfp)
   172  		}
   173  	}()
   174  
   175  	return o, nil
   176  }
   177  
   178  // send sends all deltas over the channel. Any error is reported back using the
   179  // same channel.
   180  func send(ctx context.Context, r io.Reader, o chan<- BlockOperation) {
   181  	for {
   182  		// Allow for cancellation.
   183  		select {
   184  		case <-ctx.Done():
   185  			o <- BlockOperation{
   186  				Error: ctx.Err(),
   187  			}
   188  			return
   189  		default:
   190  			// break out of the select block and continue reading
   191  			break
   192  		}
   193  
   194  		bfp := bufferPool.Get().(*[]byte)
   195  		buffer := *bfp
   196  		defer bufferPool.Put(bfp)
   197  
   198  		n, err := r.Read(buffer)
   199  		if err != nil && err != io.EOF {
   200  			o <- BlockOperation{
   201  				Error: errors.Wrapf(err, "failed reading data block"),
   202  			}
   203  			return
   204  		}
   205  
   206  		// If we don't guard against 0 bytes reads, an operation with index 0 will be sent
   207  		// and the server will duplicate block 0 at the end of the reconstructed file.
   208  		if n > 0 {
   209  			block := buffer[:n]
   210  			o <- BlockOperation{Data: block}
   211  		}
   212  
   213  		if err == io.EOF {
   214  			break
   215  		}
   216  	}
   217  }