github.com/Press-One/go-update@v1.0.0/internal/binarydist/diff.go (about)

     1  package binarydist
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"io"
     7  	"io/ioutil"
     8  )
     9  
    10  func swap(a []int, i, j int) { a[i], a[j] = a[j], a[i] }
    11  
    12  func split(I, V []int, start, length, h int) {
    13  	var i, j, k, x, jj, kk int
    14  
    15  	if length < 16 {
    16  		for k = start; k < start+length; k += j {
    17  			j = 1
    18  			x = V[I[k]+h]
    19  			for i = 1; k+i < start+length; i++ {
    20  				if V[I[k+i]+h] < x {
    21  					x = V[I[k+i]+h]
    22  					j = 0
    23  				}
    24  				if V[I[k+i]+h] == x {
    25  					swap(I, k+i, k+j)
    26  					j++
    27  				}
    28  			}
    29  			for i = 0; i < j; i++ {
    30  				V[I[k+i]] = k + j - 1
    31  			}
    32  			if j == 1 {
    33  				I[k] = -1
    34  			}
    35  		}
    36  		return
    37  	}
    38  
    39  	x = V[I[start+length/2]+h]
    40  	jj = 0
    41  	kk = 0
    42  	for i = start; i < start+length; i++ {
    43  		if V[I[i]+h] < x {
    44  			jj++
    45  		}
    46  		if V[I[i]+h] == x {
    47  			kk++
    48  		}
    49  	}
    50  	jj += start
    51  	kk += jj
    52  
    53  	i = start
    54  	j = 0
    55  	k = 0
    56  	for i < jj {
    57  		if V[I[i]+h] < x {
    58  			i++
    59  		} else if V[I[i]+h] == x {
    60  			swap(I, i, jj+j)
    61  			j++
    62  		} else {
    63  			swap(I, i, kk+k)
    64  			k++
    65  		}
    66  	}
    67  
    68  	for jj+j < kk {
    69  		if V[I[jj+j]+h] == x {
    70  			j++
    71  		} else {
    72  			swap(I, jj+j, kk+k)
    73  			k++
    74  		}
    75  	}
    76  
    77  	if jj > start {
    78  		split(I, V, start, jj-start, h)
    79  	}
    80  
    81  	for i = 0; i < kk-jj; i++ {
    82  		V[I[jj+i]] = kk - 1
    83  	}
    84  	if jj == kk-1 {
    85  		I[jj] = -1
    86  	}
    87  
    88  	if start+length > kk {
    89  		split(I, V, kk, start+length-kk, h)
    90  	}
    91  }
    92  
    93  func qsufsort(obuf []byte) []int {
    94  	var buckets [256]int
    95  	var i, h int
    96  	I := make([]int, len(obuf)+1)
    97  	V := make([]int, len(obuf)+1)
    98  
    99  	for _, c := range obuf {
   100  		buckets[c]++
   101  	}
   102  	for i = 1; i < 256; i++ {
   103  		buckets[i] += buckets[i-1]
   104  	}
   105  	copy(buckets[1:], buckets[:])
   106  	buckets[0] = 0
   107  
   108  	for i, c := range obuf {
   109  		buckets[c]++
   110  		I[buckets[c]] = i
   111  	}
   112  
   113  	I[0] = len(obuf)
   114  	for i, c := range obuf {
   115  		V[i] = buckets[c]
   116  	}
   117  
   118  	V[len(obuf)] = 0
   119  	for i = 1; i < 256; i++ {
   120  		if buckets[i] == buckets[i-1]+1 {
   121  			I[buckets[i]] = -1
   122  		}
   123  	}
   124  	I[0] = -1
   125  
   126  	for h = 1; I[0] != -(len(obuf) + 1); h += h {
   127  		var n int
   128  		for i = 0; i < len(obuf)+1; {
   129  			if I[i] < 0 {
   130  				n -= I[i]
   131  				i -= I[i]
   132  			} else {
   133  				if n != 0 {
   134  					I[i-n] = -n
   135  				}
   136  				n = V[I[i]] + 1 - i
   137  				split(I, V, i, n, h)
   138  				i += n
   139  				n = 0
   140  			}
   141  		}
   142  		if n != 0 {
   143  			I[i-n] = -n
   144  		}
   145  	}
   146  
   147  	for i = 0; i < len(obuf)+1; i++ {
   148  		I[V[i]] = i
   149  	}
   150  	return I
   151  }
   152  
   153  func matchlen(a, b []byte) (i int) {
   154  	for i < len(a) && i < len(b) && a[i] == b[i] {
   155  		i++
   156  	}
   157  	return i
   158  }
   159  
   160  func search(I []int, obuf, nbuf []byte, st, en int) (pos, n int) {
   161  	if en-st < 2 {
   162  		x := matchlen(obuf[I[st]:], nbuf)
   163  		y := matchlen(obuf[I[en]:], nbuf)
   164  
   165  		if x > y {
   166  			return I[st], x
   167  		} else {
   168  			return I[en], y
   169  		}
   170  	}
   171  
   172  	x := st + (en-st)/2
   173  	if bytes.Compare(obuf[I[x]:], nbuf) < 0 {
   174  		return search(I, obuf, nbuf, x, en)
   175  	} else {
   176  		return search(I, obuf, nbuf, st, x)
   177  	}
   178  	panic("unreached")
   179  }
   180  
   181  // Diff computes the difference between old and new, according to the bsdiff
   182  // algorithm, and writes the result to patch.
   183  func Diff(old, new io.Reader, patch io.Writer) error {
   184  	obuf, err := ioutil.ReadAll(old)
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	nbuf, err := ioutil.ReadAll(new)
   190  	if err != nil {
   191  		return err
   192  	}
   193  
   194  	pbuf, err := diffBytes(obuf, nbuf)
   195  	if err != nil {
   196  		return err
   197  	}
   198  
   199  	_, err = patch.Write(pbuf)
   200  	return err
   201  }
   202  
   203  func diffBytes(obuf, nbuf []byte) ([]byte, error) {
   204  	var patch seekBuffer
   205  	err := diff(obuf, nbuf, &patch)
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  	return patch.buf, nil
   210  }
   211  
   212  func diff(obuf, nbuf []byte, patch io.WriteSeeker) error {
   213  	var lenf int
   214  	I := qsufsort(obuf)
   215  	db := make([]byte, len(nbuf))
   216  	eb := make([]byte, len(nbuf))
   217  	var dblen, eblen int
   218  
   219  	var hdr header
   220  	hdr.Magic = magic
   221  	hdr.NewSize = int64(len(nbuf))
   222  	err := binary.Write(patch, signMagLittleEndian{}, &hdr)
   223  	if err != nil {
   224  		return err
   225  	}
   226  
   227  	// Compute the differences, writing ctrl as we go
   228  	pfbz2, err := newBzip2Writer(patch)
   229  	if err != nil {
   230  		return err
   231  	}
   232  	var scan, pos, length int
   233  	var lastscan, lastpos, lastoffset int
   234  	for scan < len(nbuf) {
   235  		var oldscore int
   236  		scan += length
   237  		for scsc := scan; scan < len(nbuf); scan++ {
   238  			pos, length = search(I, obuf, nbuf[scan:], 0, len(obuf))
   239  
   240  			for ; scsc < scan+length; scsc++ {
   241  				if scsc+lastoffset < len(obuf) &&
   242  					obuf[scsc+lastoffset] == nbuf[scsc] {
   243  					oldscore++
   244  				}
   245  			}
   246  
   247  			if (length == oldscore && length != 0) || length > oldscore+8 {
   248  				break
   249  			}
   250  
   251  			if scan+lastoffset < len(obuf) && obuf[scan+lastoffset] == nbuf[scan] {
   252  				oldscore--
   253  			}
   254  		}
   255  
   256  		if length != oldscore || scan == len(nbuf) {
   257  			var s, Sf int
   258  			lenf = 0
   259  			for i := 0; lastscan+i < scan && lastpos+i < len(obuf); {
   260  				if obuf[lastpos+i] == nbuf[lastscan+i] {
   261  					s++
   262  				}
   263  				i++
   264  				if s*2-i > Sf*2-lenf {
   265  					Sf = s
   266  					lenf = i
   267  				}
   268  			}
   269  
   270  			lenb := 0
   271  			if scan < len(nbuf) {
   272  				var s, Sb int
   273  				for i := 1; (scan >= lastscan+i) && (pos >= i); i++ {
   274  					if obuf[pos-i] == nbuf[scan-i] {
   275  						s++
   276  					}
   277  					if s*2-i > Sb*2-lenb {
   278  						Sb = s
   279  						lenb = i
   280  					}
   281  				}
   282  			}
   283  
   284  			if lastscan+lenf > scan-lenb {
   285  				overlap := (lastscan + lenf) - (scan - lenb)
   286  				s := 0
   287  				Ss := 0
   288  				lens := 0
   289  				for i := 0; i < overlap; i++ {
   290  					if nbuf[lastscan+lenf-overlap+i] == obuf[lastpos+lenf-overlap+i] {
   291  						s++
   292  					}
   293  					if nbuf[scan-lenb+i] == obuf[pos-lenb+i] {
   294  						s--
   295  					}
   296  					if s > Ss {
   297  						Ss = s
   298  						lens = i + 1
   299  					}
   300  				}
   301  
   302  				lenf += lens - overlap
   303  				lenb -= lens
   304  			}
   305  
   306  			for i := 0; i < lenf; i++ {
   307  				db[dblen+i] = nbuf[lastscan+i] - obuf[lastpos+i]
   308  			}
   309  			for i := 0; i < (scan-lenb)-(lastscan+lenf); i++ {
   310  				eb[eblen+i] = nbuf[lastscan+lenf+i]
   311  			}
   312  
   313  			dblen += lenf
   314  			eblen += (scan - lenb) - (lastscan + lenf)
   315  
   316  			err = binary.Write(pfbz2, signMagLittleEndian{}, int64(lenf))
   317  			if err != nil {
   318  				pfbz2.Close()
   319  				return err
   320  			}
   321  
   322  			val := (scan - lenb) - (lastscan + lenf)
   323  			err = binary.Write(pfbz2, signMagLittleEndian{}, int64(val))
   324  			if err != nil {
   325  				pfbz2.Close()
   326  				return err
   327  			}
   328  
   329  			val = (pos - lenb) - (lastpos + lenf)
   330  			err = binary.Write(pfbz2, signMagLittleEndian{}, int64(val))
   331  			if err != nil {
   332  				pfbz2.Close()
   333  				return err
   334  			}
   335  
   336  			lastscan = scan - lenb
   337  			lastpos = pos - lenb
   338  			lastoffset = pos - scan
   339  		}
   340  	}
   341  	err = pfbz2.Close()
   342  	if err != nil {
   343  		return err
   344  	}
   345  
   346  	// Compute size of compressed ctrl data
   347  	l64, err := patch.Seek(0, 1)
   348  	if err != nil {
   349  		return err
   350  	}
   351  	hdr.CtrlLen = int64(l64 - 32)
   352  
   353  	// Write compressed diff data
   354  	pfbz2, err = newBzip2Writer(patch)
   355  	if err != nil {
   356  		return err
   357  	}
   358  	n, err := pfbz2.Write(db[:dblen])
   359  	if err != nil {
   360  		pfbz2.Close()
   361  		return err
   362  	}
   363  	if n != dblen {
   364  		pfbz2.Close()
   365  		return io.ErrShortWrite
   366  	}
   367  	err = pfbz2.Close()
   368  	if err != nil {
   369  		return err
   370  	}
   371  
   372  	// Compute size of compressed diff data
   373  	n64, err := patch.Seek(0, 1)
   374  	if err != nil {
   375  		return err
   376  	}
   377  	hdr.DiffLen = n64 - l64
   378  
   379  	// Write compressed extra data
   380  	pfbz2, err = newBzip2Writer(patch)
   381  	if err != nil {
   382  		return err
   383  	}
   384  	n, err = pfbz2.Write(eb[:eblen])
   385  	if err != nil {
   386  		pfbz2.Close()
   387  		return err
   388  	}
   389  	if n != eblen {
   390  		pfbz2.Close()
   391  		return io.ErrShortWrite
   392  	}
   393  	err = pfbz2.Close()
   394  	if err != nil {
   395  		return err
   396  	}
   397  
   398  	// Seek to the beginning, write the header, and close the file
   399  	_, err = patch.Seek(0, 0)
   400  	if err != nil {
   401  		return err
   402  	}
   403  	err = binary.Write(patch, signMagLittleEndian{}, &hdr)
   404  	if err != nil {
   405  		return err
   406  	}
   407  	return nil
   408  }