github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/delta/calculator.go (about)

     1  package delta
     2  
     3  import (
     4  	"bytes"
     5  	"container/list"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"index/suffixarray"
     9  	"io"
    10  )
    11  
    12  // The minimum number of characters to copy from the stream. If
    13  // there is not a prefix amount to copy from the stream.
    14  const minCopy = 3
    15  
    16  // We use a simple interface to make our calculate function easily
    17  // testable and debuggable.
    18  type instruction interface {
    19  	// Write the instruction to w
    20  	write(w io.Writer) error
    21  
    22  	// Used by the test suite
    23  	equals(i2 instruction) bool
    24  }
    25  
    26  // insert instruction. Insert the bytes into the stream.
    27  type insert []byte
    28  
    29  func (i insert) write(w io.Writer) error {
    30  	remaining := []byte(i)
    31  	for len(remaining) > 0 {
    32  		if len(remaining) < 128 {
    33  			// What's left fits in a single insert
    34  			// instruction
    35  			if _, err := w.Write([]byte{byte(len(remaining))}); err != nil {
    36  				return err
    37  			}
    38  			if _, err := w.Write(remaining); err != nil {
    39  				return err
    40  			}
    41  			remaining = nil
    42  		} else {
    43  			// What's left doesn't fit in a single
    44  			// insert instruction, so insert the largest
    45  			// amount that does
    46  			if _, err := w.Write([]byte{127}); err != nil {
    47  				return err
    48  			}
    49  			if _, err := w.Write(remaining[:127]); err != nil {
    50  				return err
    51  			}
    52  			remaining = remaining[127:]
    53  		}
    54  	}
    55  	return nil
    56  }
    57  
    58  func (i insert) equals(i2 instruction) bool {
    59  	i2i, ok := i2.(insert)
    60  	if !ok {
    61  		return false
    62  	}
    63  	return string(i) == string(i2i)
    64  }
    65  
    66  type copyinst struct {
    67  	offset, length uint32
    68  }
    69  
    70  func (c copyinst) equals(i2 instruction) bool {
    71  	i2c, ok := i2.(copyinst)
    72  	if !ok {
    73  		return false
    74  	}
    75  	return i2c.offset == c.offset && i2c.length == c.length
    76  }
    77  
    78  // The meat of our algorithm. Calculate a list of instructions to
    79  // insert into the stream.
    80  func calculate(index *suffixarray.Index, src, dst []byte, maxsz int) (*list.List, error) {
    81  	instructions := list.New()
    82  	remaining := dst
    83  	estsz := 0
    84  	for len(remaining) > 0 {
    85  		nexto, nextl := longestPrefix(index, remaining)
    86  		if maxsz > 0 && estsz > maxsz {
    87  			return nil, fmt.Errorf("Max size exceeded")
    88  		}
    89  		if nextl > 0 {
    90  			estsz += 9
    91  			instructions.PushBack(copyinst{uint32(nexto), uint32(nextl)})
    92  			remaining = remaining[nextl:]
    93  			continue
    94  		}
    95  		// FIXME: Find where the next prefix > minCopy starts,
    96  		// insert until then instead of always inserting minCopy
    97  		if len(remaining) <= minCopy {
    98  			estsz += len(remaining) + 1
    99  			instructions.PushBack(insert(remaining))
   100  			remaining = nil
   101  			continue
   102  		}
   103  
   104  		nextOffset := nextPrefixStart(index, dst)
   105  		if nextOffset >= 0 {
   106  			estsz += 1 + len(remaining) - nextOffset
   107  			instructions.PushBack(insert(remaining[:nextOffset]))
   108  			remaining = remaining[nextOffset:]
   109  		} else {
   110  			// nextPrefixStart went through the whole string
   111  			// and didn't find anything, so insert the whole string
   112  			estsz += len(remaining) + 1
   113  			instructions.PushBack(insert(remaining))
   114  			remaining = nil
   115  		}
   116  
   117  	}
   118  	return instructions, nil
   119  }
   120  
   121  // Returns the longest prefix of dst that is found somewhere in src.
   122  func longestPrefix(src *suffixarray.Index, dst []byte) (offset, length int) {
   123  	// First the simple edge simple cases. Is it smaller than minCopy? Does
   124  	// it have a prefix of at least minCopy?
   125  	if len(dst) < minCopy {
   126  		return 0, -1
   127  	}
   128  
   129  	// If there's no prefix at all of at least length minCopy,
   130  	// don't bother searching for one.
   131  	if result := src.Lookup(dst[:minCopy], 1); len(result) == 0 {
   132  		return 0, -1
   133  	}
   134  
   135  	// If the entire dst exists somewhere in src, return the first
   136  	// one found.
   137  	if result := src.Lookup(dst, 1); len(result) > 0 {
   138  		return result[0], len(dst)
   139  	}
   140  
   141  	// We know there's a substring somewhere but the whole thing
   142  	// isn't a substring, brute force the location of the longest
   143  	// substring with a binary search of our suffix array.
   144  	length = -1
   145  	minIdx := minCopy
   146  	maxIdx := len(dst)
   147  	for i := minIdx; maxIdx-minIdx > 1; i = ((maxIdx - minIdx) / 2) + minIdx {
   148  		if result := src.Lookup(dst[:i], 1); result != nil {
   149  			offset = result[0]
   150  			length = i
   151  			minIdx = i
   152  		} else {
   153  			maxIdx = i - 1
   154  		}
   155  	}
   156  	return
   157  }
   158  
   159  // Find the start of the next prefix of dst that has a size of at least
   160  // minCopy
   161  func nextPrefixStart(src *suffixarray.Index, dst []byte) (offset int) {
   162  	for i := 1; i < len(dst); i++ {
   163  		end := i + minCopy
   164  		if end > len(dst) {
   165  			end = len(dst)
   166  		}
   167  		if result := src.Lookup(dst[i:end], 1); result != nil {
   168  			return i
   169  		}
   170  	}
   171  	return -1
   172  }
   173  
   174  func CalculateWithIndex(index *suffixarray.Index, w io.Writer, src, dst []byte, maxsz int) error {
   175  	instructions, err := calculate(index, src, dst, maxsz)
   176  	if err != nil {
   177  		return err
   178  	}
   179  	// Write src and dst length header
   180  	if err := writeVarInt(w, len(src)); err != nil {
   181  		return err
   182  	}
   183  	if err := writeVarInt(w, len(dst)); err != nil {
   184  		return err
   185  	}
   186  	// Write the instructions themselves
   187  	for e := instructions.Front(); e != nil; e = e.Next() {
   188  		inst := e.Value.(instruction)
   189  
   190  		if err := inst.write(w); err != nil {
   191  			return err
   192  		}
   193  	}
   194  	return nil
   195  }
   196  
   197  // Calculate how to generate dst using src as the base
   198  // of the deltas and write the result to w.
   199  func Calculate(w io.Writer, src, dst []byte, maxsz int) error {
   200  	index := suffixarray.New(src)
   201  	return CalculateWithIndex(index, w, src, dst, maxsz)
   202  }
   203  
   204  func (c copyinst) write(w io.Writer) error {
   205  	var buf bytes.Buffer
   206  	instbyte := byte(0x80)
   207  
   208  	// Set the offset bits in the instruction
   209  	if c.offset&0xff != 0 {
   210  		instbyte |= 0x01
   211  	}
   212  	if c.offset&0xff00 != 0 {
   213  		instbyte |= 0x02
   214  	}
   215  	if c.offset&0xff0000 != 0 {
   216  		instbyte |= 0x04
   217  	}
   218  	if c.offset&0xff000000 != 0 {
   219  		instbyte |= 0x08
   220  	}
   221  
   222  	// Set the length bits in the instruction
   223  	if c.length > 0xffffff {
   224  		// FIXME: Decompose this into multiple copy
   225  		// instructions
   226  	} else if c.length == 0x10000 {
   227  		// 0x10000 is a special case, encoded as 0
   228  	} else {
   229  		// Encode the bits in the byte that denote
   230  		// which bits are incoming in the stream
   231  		// for length
   232  		if c.length&0xff != 0 {
   233  			instbyte |= 0x10
   234  		}
   235  
   236  		if c.length&0xff00 != 0 {
   237  			instbyte |= 0x20
   238  		}
   239  
   240  		if c.length&0xff0000 != 0 {
   241  			instbyte |= 0x40
   242  		}
   243  	}
   244  	// Write the header
   245  	if err := buf.WriteByte(instbyte); err != nil {
   246  		return err
   247  	}
   248  
   249  	// Write the offset bytes
   250  	if val := byte(c.offset & 0xff); val != 0 {
   251  		if err := buf.WriteByte(val); err != nil {
   252  			return err
   253  		}
   254  	}
   255  	if val := byte(c.offset >> 8 & 0xff); val != 0 {
   256  		if err := buf.WriteByte(val); err != nil {
   257  			return err
   258  		}
   259  	}
   260  	if val := byte(c.offset >> 16 & 0xff); val != 0 {
   261  		if err := buf.WriteByte(val); err != nil {
   262  			return err
   263  		}
   264  	}
   265  	if val := byte(c.offset >> 24 & 0xff); val != 0 {
   266  		if err := buf.WriteByte(val); err != nil {
   267  			return err
   268  		}
   269  	}
   270  
   271  	// Write the length
   272  	if c.length != 0x10000 {
   273  		if val := byte(c.length & 0xff); val != 0 {
   274  			if err := buf.WriteByte(val); err != nil {
   275  				return err
   276  			}
   277  		}
   278  		if val := byte((c.length >> 8) & 0xff); val != 0 {
   279  			if err := buf.WriteByte(val); err != nil {
   280  				return err
   281  			}
   282  
   283  		}
   284  		if val := byte((c.length >> 16) & 0xff); val != 0 {
   285  			if err := buf.WriteByte(val); err != nil {
   286  				return err
   287  			}
   288  
   289  		}
   290  
   291  	}
   292  	if n, err := w.Write(buf.Bytes()); err != nil {
   293  		return err
   294  	} else if n != buf.Len() {
   295  		return fmt.Errorf("Could not write entire instruction")
   296  	}
   297  	return nil
   298  }
   299  
   300  func writeVarInt(w io.Writer, val int) error {
   301  	var buf [128]byte
   302  	n := binary.PutUvarint(buf[:], uint64(val))
   303  	if _, err := w.Write(buf[:n]); err != nil {
   304  		return err
   305  	}
   306  	return nil
   307  }