github.com/driusan/dgit@v0.0.0-20221118233547-f39f0c15edbb/git/delta/calculator.go (about) 1 package delta 2 3 import ( 4 "bytes" 5 "container/list" 6 "encoding/binary" 7 "fmt" 8 "index/suffixarray" 9 "io" 10 ) 11 12 // The minimum number of characters to copy from the stream. If 13 // there is not a prefix amount to copy from the stream. 14 const minCopy = 3 15 16 // We use a simple interface to make our calculate function easily 17 // testable and debuggable. 18 type instruction interface { 19 // Write the instruction to w 20 write(w io.Writer) error 21 22 // Used by the test suite 23 equals(i2 instruction) bool 24 } 25 26 // insert instruction. Insert the bytes into the stream. 27 type insert []byte 28 29 func (i insert) write(w io.Writer) error { 30 remaining := []byte(i) 31 for len(remaining) > 0 { 32 if len(remaining) < 128 { 33 // What's left fits in a single insert 34 // instruction 35 if _, err := w.Write([]byte{byte(len(remaining))}); err != nil { 36 return err 37 } 38 if _, err := w.Write(remaining); err != nil { 39 return err 40 } 41 remaining = nil 42 } else { 43 // What's left doesn't fit in a single 44 // insert instruction, so insert the largest 45 // amount that does 46 if _, err := w.Write([]byte{127}); err != nil { 47 return err 48 } 49 if _, err := w.Write(remaining[:127]); err != nil { 50 return err 51 } 52 remaining = remaining[127:] 53 } 54 } 55 return nil 56 } 57 58 func (i insert) equals(i2 instruction) bool { 59 i2i, ok := i2.(insert) 60 if !ok { 61 return false 62 } 63 return string(i) == string(i2i) 64 } 65 66 type copyinst struct { 67 offset, length uint32 68 } 69 70 func (c copyinst) equals(i2 instruction) bool { 71 i2c, ok := i2.(copyinst) 72 if !ok { 73 return false 74 } 75 return i2c.offset == c.offset && i2c.length == c.length 76 } 77 78 // The meat of our algorithm. Calculate a list of instructions to 79 // insert into the stream. 80 func calculate(index *suffixarray.Index, src, dst []byte, maxsz int) (*list.List, error) { 81 instructions := list.New() 82 remaining := dst 83 estsz := 0 84 for len(remaining) > 0 { 85 nexto, nextl := longestPrefix(index, remaining) 86 if maxsz > 0 && estsz > maxsz { 87 return nil, fmt.Errorf("Max size exceeded") 88 } 89 if nextl > 0 { 90 estsz += 9 91 instructions.PushBack(copyinst{uint32(nexto), uint32(nextl)}) 92 remaining = remaining[nextl:] 93 continue 94 } 95 // FIXME: Find where the next prefix > minCopy starts, 96 // insert until then instead of always inserting minCopy 97 if len(remaining) <= minCopy { 98 estsz += len(remaining) + 1 99 instructions.PushBack(insert(remaining)) 100 remaining = nil 101 continue 102 } 103 104 nextOffset := nextPrefixStart(index, dst) 105 if nextOffset >= 0 { 106 estsz += 1 + len(remaining) - nextOffset 107 instructions.PushBack(insert(remaining[:nextOffset])) 108 remaining = remaining[nextOffset:] 109 } else { 110 // nextPrefixStart went through the whole string 111 // and didn't find anything, so insert the whole string 112 estsz += len(remaining) + 1 113 instructions.PushBack(insert(remaining)) 114 remaining = nil 115 } 116 117 } 118 return instructions, nil 119 } 120 121 // Returns the longest prefix of dst that is found somewhere in src. 122 func longestPrefix(src *suffixarray.Index, dst []byte) (offset, length int) { 123 // First the simple edge simple cases. Is it smaller than minCopy? Does 124 // it have a prefix of at least minCopy? 125 if len(dst) < minCopy { 126 return 0, -1 127 } 128 129 // If there's no prefix at all of at least length minCopy, 130 // don't bother searching for one. 131 if result := src.Lookup(dst[:minCopy], 1); len(result) == 0 { 132 return 0, -1 133 } 134 135 // If the entire dst exists somewhere in src, return the first 136 // one found. 137 if result := src.Lookup(dst, 1); len(result) > 0 { 138 return result[0], len(dst) 139 } 140 141 // We know there's a substring somewhere but the whole thing 142 // isn't a substring, brute force the location of the longest 143 // substring with a binary search of our suffix array. 144 length = -1 145 minIdx := minCopy 146 maxIdx := len(dst) 147 for i := minIdx; maxIdx-minIdx > 1; i = ((maxIdx - minIdx) / 2) + minIdx { 148 if result := src.Lookup(dst[:i], 1); result != nil { 149 offset = result[0] 150 length = i 151 minIdx = i 152 } else { 153 maxIdx = i - 1 154 } 155 } 156 return 157 } 158 159 // Find the start of the next prefix of dst that has a size of at least 160 // minCopy 161 func nextPrefixStart(src *suffixarray.Index, dst []byte) (offset int) { 162 for i := 1; i < len(dst); i++ { 163 end := i + minCopy 164 if end > len(dst) { 165 end = len(dst) 166 } 167 if result := src.Lookup(dst[i:end], 1); result != nil { 168 return i 169 } 170 } 171 return -1 172 } 173 174 func CalculateWithIndex(index *suffixarray.Index, w io.Writer, src, dst []byte, maxsz int) error { 175 instructions, err := calculate(index, src, dst, maxsz) 176 if err != nil { 177 return err 178 } 179 // Write src and dst length header 180 if err := writeVarInt(w, len(src)); err != nil { 181 return err 182 } 183 if err := writeVarInt(w, len(dst)); err != nil { 184 return err 185 } 186 // Write the instructions themselves 187 for e := instructions.Front(); e != nil; e = e.Next() { 188 inst := e.Value.(instruction) 189 190 if err := inst.write(w); err != nil { 191 return err 192 } 193 } 194 return nil 195 } 196 197 // Calculate how to generate dst using src as the base 198 // of the deltas and write the result to w. 199 func Calculate(w io.Writer, src, dst []byte, maxsz int) error { 200 index := suffixarray.New(src) 201 return CalculateWithIndex(index, w, src, dst, maxsz) 202 } 203 204 func (c copyinst) write(w io.Writer) error { 205 var buf bytes.Buffer 206 instbyte := byte(0x80) 207 208 // Set the offset bits in the instruction 209 if c.offset&0xff != 0 { 210 instbyte |= 0x01 211 } 212 if c.offset&0xff00 != 0 { 213 instbyte |= 0x02 214 } 215 if c.offset&0xff0000 != 0 { 216 instbyte |= 0x04 217 } 218 if c.offset&0xff000000 != 0 { 219 instbyte |= 0x08 220 } 221 222 // Set the length bits in the instruction 223 if c.length > 0xffffff { 224 // FIXME: Decompose this into multiple copy 225 // instructions 226 } else if c.length == 0x10000 { 227 // 0x10000 is a special case, encoded as 0 228 } else { 229 // Encode the bits in the byte that denote 230 // which bits are incoming in the stream 231 // for length 232 if c.length&0xff != 0 { 233 instbyte |= 0x10 234 } 235 236 if c.length&0xff00 != 0 { 237 instbyte |= 0x20 238 } 239 240 if c.length&0xff0000 != 0 { 241 instbyte |= 0x40 242 } 243 } 244 // Write the header 245 if err := buf.WriteByte(instbyte); err != nil { 246 return err 247 } 248 249 // Write the offset bytes 250 if val := byte(c.offset & 0xff); val != 0 { 251 if err := buf.WriteByte(val); err != nil { 252 return err 253 } 254 } 255 if val := byte(c.offset >> 8 & 0xff); val != 0 { 256 if err := buf.WriteByte(val); err != nil { 257 return err 258 } 259 } 260 if val := byte(c.offset >> 16 & 0xff); val != 0 { 261 if err := buf.WriteByte(val); err != nil { 262 return err 263 } 264 } 265 if val := byte(c.offset >> 24 & 0xff); val != 0 { 266 if err := buf.WriteByte(val); err != nil { 267 return err 268 } 269 } 270 271 // Write the length 272 if c.length != 0x10000 { 273 if val := byte(c.length & 0xff); val != 0 { 274 if err := buf.WriteByte(val); err != nil { 275 return err 276 } 277 } 278 if val := byte((c.length >> 8) & 0xff); val != 0 { 279 if err := buf.WriteByte(val); err != nil { 280 return err 281 } 282 283 } 284 if val := byte((c.length >> 16) & 0xff); val != 0 { 285 if err := buf.WriteByte(val); err != nil { 286 return err 287 } 288 289 } 290 291 } 292 if n, err := w.Write(buf.Bytes()); err != nil { 293 return err 294 } else if n != buf.Len() { 295 return fmt.Errorf("Could not write entire instruction") 296 } 297 return nil 298 } 299 300 func writeVarInt(w io.Writer, val int) error { 301 var buf [128]byte 302 n := binary.PutUvarint(buf[:], uint64(val)) 303 if _, err := w.Write(buf[:n]); err != nil { 304 return err 305 } 306 return nil 307 }