github.com/outcaste-io/ristretto@v0.2.3/z/buffer.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package z 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "sort" 25 "sync/atomic" 26 27 "github.com/pkg/errors" 28 ) 29 30 const ( 31 defaultCapacity = 64 32 defaultTag = "buffer" 33 ) 34 35 // Buffer is equivalent of bytes.Buffer without the ability to read. It is NOT thread-safe. 36 // 37 // In UseCalloc mode, z.Calloc is used to allocate memory, which depending upon how the code is 38 // compiled could use jemalloc for allocations. 39 // 40 // In UseMmap mode, Buffer uses file mmap to allocate memory. This allows us to store big data 41 // structures without using physical memory. 42 // 43 // MaxSize can be set to limit the memory usage. 44 type Buffer struct { 45 padding uint64 // number of starting bytes used for padding 46 offset uint64 // used length of the buffer 47 buf []byte // backing slice for the buffer 48 bufType BufferType // type of the underlying buffer 49 curSz int // capacity of the buffer 50 maxSz int // causes a panic if the buffer grows beyond this size 51 mmapFile *MmapFile // optional mmap backing for the buffer 52 autoMmapAfter int // Calloc falls back to an mmaped tmpfile after crossing this size 53 autoMmapDir string // directory for autoMmap to create a tempfile in 54 persistent bool // when enabled, Release will not delete the underlying mmap file 55 tag string // used for jemalloc stats 56 } 57 58 func NewBuffer(capacity int, tag string) *Buffer { 59 if capacity < defaultCapacity { 60 capacity = defaultCapacity 61 } 62 if tag == "" { 63 tag = defaultTag 64 } 65 return &Buffer{ 66 buf: Calloc(capacity, tag), 67 bufType: UseCalloc, 68 curSz: capacity, 69 offset: 8, 70 padding: 8, 71 tag: tag, 72 } 73 } 74 75 // It is the caller's responsibility to set offset after this, because Buffer 76 // doesn't remember what it was. 77 func NewBufferPersistent(path string, capacity int) (*Buffer, error) { 78 file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 79 if err != nil { 80 return nil, err 81 } 82 buffer, err := newBufferFile(file, capacity) 83 if err != nil { 84 return nil, err 85 } 86 buffer.persistent = true 87 return buffer, nil 88 } 89 90 func NewBufferTmp(dir string, capacity int) (*Buffer, error) { 91 if dir == "" { 92 dir = tmpDir 93 } 94 file, err := ioutil.TempFile(dir, "buffer") 95 if err != nil { 96 return nil, err 97 } 98 return newBufferFile(file, capacity) 99 } 100 101 func newBufferFile(file *os.File, capacity int) (*Buffer, error) { 102 if capacity < defaultCapacity { 103 capacity = defaultCapacity 104 } 105 mmapFile, err := OpenMmapFileUsing(file, capacity, true) 106 if err != nil && err != NewFile { 107 return nil, err 108 } 109 buf := &Buffer{ 110 buf: mmapFile.Data, 111 bufType: UseMmap, 112 curSz: len(mmapFile.Data), 113 mmapFile: mmapFile, 114 offset: 8, 115 padding: 8, 116 } 117 return buf, nil 118 } 119 120 func NewBufferSlice(slice []byte) *Buffer { 121 return &Buffer{ 122 offset: uint64(len(slice)), 123 buf: slice, 124 bufType: UseInvalid, 125 } 126 } 127 128 func (b *Buffer) WithAutoMmap(threshold int, path string) *Buffer { 129 if b.bufType != UseCalloc { 130 panic("can only autoMmap with UseCalloc") 131 } 132 b.autoMmapAfter = threshold 133 if path == "" { 134 b.autoMmapDir = tmpDir 135 } else { 136 b.autoMmapDir = path 137 } 138 return b 139 } 140 141 func (b *Buffer) WithMaxSize(size int) *Buffer { 142 b.maxSz = size 143 return b 144 } 145 146 func (b *Buffer) IsEmpty() bool { 147 return int(b.offset) == b.StartOffset() 148 } 149 150 // LenWithPadding would return the number of bytes written to the buffer so far 151 // plus the padding at the start of the buffer. 152 func (b *Buffer) LenWithPadding() int { 153 return int(atomic.LoadUint64(&b.offset)) 154 } 155 156 // LenNoPadding would return the number of bytes written to the buffer so far 157 // (without the padding). 158 func (b *Buffer) LenNoPadding() int { 159 return int(atomic.LoadUint64(&b.offset) - b.padding) 160 } 161 162 // Bytes would return all the written bytes as a slice. 163 func (b *Buffer) Bytes() []byte { 164 off := atomic.LoadUint64(&b.offset) 165 return b.buf[b.padding:off] 166 } 167 168 // Grow would grow the buffer to have at least n more bytes. In case the buffer is at capacity, it 169 // would reallocate twice the size of current capacity + n, to ensure n bytes can be written to the 170 // buffer without further allocation. In UseMmap mode, this might result in underlying file 171 // expansion. 172 func (b *Buffer) Grow(n int) { 173 if b.buf == nil { 174 panic("z.Buffer needs to be initialized before using") 175 } 176 if b.maxSz > 0 && int(b.offset)+n > b.maxSz { 177 err := fmt.Errorf( 178 "z.Buffer max size exceeded: %d offset: %d grow: %d", b.maxSz, b.offset, n) 179 panic(err) 180 } 181 if int(b.offset)+n < b.curSz { 182 return 183 } 184 185 // Calculate new capacity. 186 growBy := b.curSz + n 187 // Don't allocate more than 1GB at a time. 188 if growBy > 1<<30 { 189 growBy = 1 << 30 190 } 191 // Allocate at least n, even if it exceeds the 1GB limit above. 192 if n > growBy { 193 growBy = n 194 } 195 b.curSz += growBy 196 197 switch b.bufType { 198 case UseCalloc: 199 // If autoMmap gets triggered, copy the slice over to an mmaped file. 200 if b.autoMmapAfter > 0 && b.curSz > b.autoMmapAfter { 201 b.bufType = UseMmap 202 file, err := ioutil.TempFile(b.autoMmapDir, "") 203 if err != nil { 204 panic(err) 205 } 206 mmapFile, err := OpenMmapFileUsing(file, b.curSz, true) 207 if err != nil && err != NewFile { 208 panic(err) 209 } 210 assert(int(b.offset) == copy(mmapFile.Data, b.buf[:b.offset])) 211 Free(b.buf) 212 b.mmapFile = mmapFile 213 b.buf = mmapFile.Data 214 break 215 } 216 217 // Else, reallocate the slice. 218 newBuf := Calloc(b.curSz, b.tag) 219 assert(int(b.offset) == copy(newBuf, b.buf[:b.offset])) 220 Free(b.buf) 221 b.buf = newBuf 222 223 case UseMmap: 224 // Truncate and remap the underlying file. 225 if err := b.mmapFile.Truncate(int64(b.curSz)); err != nil { 226 err = errors.Wrapf(err, 227 "while trying to truncate file: %s to size: %d", b.mmapFile.Fd.Name(), b.curSz) 228 panic(err) 229 } 230 b.buf = b.mmapFile.Data 231 232 default: 233 panic("can only use Grow on UseCalloc and UseMmap buffers") 234 } 235 } 236 237 // Allocate is a way to get a slice of size n back from the buffer. This slice can be directly 238 // written to. Warning: Allocate is not thread-safe. The byte slice returned MUST be used before 239 // further calls to Buffer. 240 func (b *Buffer) Allocate(n int) []byte { 241 b.Grow(n) 242 off := b.offset 243 b.offset += uint64(n) 244 return b.buf[off:int(b.offset)] 245 } 246 247 // AllocateOffset works the same way as allocate, but instead of returning a byte slice, it returns 248 // the offset of the allocation. 249 func (b *Buffer) AllocateOffset(n int) int { 250 b.Grow(n) 251 b.offset += uint64(n) 252 return int(b.offset) - n 253 } 254 255 func (b *Buffer) writeLen(sz int) { 256 buf := b.Allocate(4) 257 binary.BigEndian.PutUint32(buf, uint32(sz)) 258 } 259 260 // SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate, 261 // hence returning the slice of size sz. This can be used to allocate a lot of small buffers into 262 // this big buffer. 263 // Note that SliceAllocate should NOT be mixed with normal calls to Write. 264 func (b *Buffer) SliceAllocate(sz int) []byte { 265 b.Grow(4 + sz) 266 b.writeLen(sz) 267 return b.Allocate(sz) 268 } 269 270 func (b *Buffer) StartOffset() int { 271 return int(b.padding) 272 } 273 274 func (b *Buffer) WriteSlice(slice []byte) { 275 dst := b.SliceAllocate(len(slice)) 276 assert(len(slice) == copy(dst, slice)) 277 } 278 279 func (b *Buffer) SliceIterate(f func(slice []byte) error) error { 280 if b.IsEmpty() { 281 return nil 282 } 283 slice, next := []byte{}, b.StartOffset() 284 for next >= 0 { 285 slice, next = b.Slice(next) 286 if len(slice) == 0 { 287 continue 288 } 289 if err := f(slice); err != nil { 290 return err 291 } 292 } 293 return nil 294 } 295 296 const ( 297 UseCalloc BufferType = iota 298 UseMmap 299 UseInvalid 300 ) 301 302 type BufferType int 303 304 func (t BufferType) String() string { 305 switch t { 306 case UseCalloc: 307 return "UseCalloc" 308 case UseMmap: 309 return "UseMmap" 310 default: 311 return "UseInvalid" 312 } 313 } 314 315 type LessFunc func(a, b []byte) bool 316 type sortHelper struct { 317 offsets []int 318 b *Buffer 319 tmp *Buffer 320 less LessFunc 321 small []int 322 } 323 324 func (s *sortHelper) sortSmall(start, end int) { 325 s.tmp.Reset() 326 s.small = s.small[:0] 327 next := start 328 for next >= 0 && next < end { 329 s.small = append(s.small, next) 330 _, next = s.b.Slice(next) 331 } 332 333 // We are sorting the slices pointed to by s.small offsets, but only moving the offsets around. 334 sort.Slice(s.small, func(i, j int) bool { 335 left, _ := s.b.Slice(s.small[i]) 336 right, _ := s.b.Slice(s.small[j]) 337 return s.less(left, right) 338 }) 339 // Now we iterate over the s.small offsets and copy over the slices. The result is now in order. 340 for _, off := range s.small { 341 s.tmp.Write(rawSlice(s.b.buf[off:])) 342 } 343 assert(end-start == copy(s.b.buf[start:end], s.tmp.Bytes())) 344 } 345 346 func assert(b bool) { 347 if !b { 348 fatalf("%+v", errors.Errorf("Assertion failure")) 349 } 350 } 351 func check(err error) { 352 if err != nil { 353 fatalf("%+v", err) 354 } 355 } 356 func check2(_ interface{}, err error) { 357 check(err) 358 } 359 360 func (s *sortHelper) merge(left, right []byte, start, end int) { 361 if len(left) == 0 || len(right) == 0 { 362 return 363 } 364 s.tmp.Reset() 365 check2(s.tmp.Write(left)) 366 left = s.tmp.Bytes() 367 368 var ls, rs []byte 369 370 copyLeft := func() { 371 assert(len(ls) == copy(s.b.buf[start:], ls)) 372 left = left[len(ls):] 373 start += len(ls) 374 } 375 copyRight := func() { 376 assert(len(rs) == copy(s.b.buf[start:], rs)) 377 right = right[len(rs):] 378 start += len(rs) 379 } 380 381 for start < end { 382 if len(left) == 0 { 383 assert(len(right) == copy(s.b.buf[start:end], right)) 384 return 385 } 386 if len(right) == 0 { 387 assert(len(left) == copy(s.b.buf[start:end], left)) 388 return 389 } 390 ls = rawSlice(left) 391 rs = rawSlice(right) 392 393 // We skip the first 4 bytes in the rawSlice, because that stores the length. 394 if s.less(ls[4:], rs[4:]) { 395 copyLeft() 396 } else { 397 copyRight() 398 } 399 } 400 } 401 402 func (s *sortHelper) sort(lo, hi int) []byte { 403 assert(lo <= hi) 404 405 mid := lo + (hi-lo)/2 406 loff, hoff := s.offsets[lo], s.offsets[hi] 407 if lo == mid { 408 // No need to sort, just return the buffer. 409 return s.b.buf[loff:hoff] 410 } 411 412 // lo, mid would sort from [offset[lo], offset[mid]) . 413 left := s.sort(lo, mid) 414 // Typically we'd use mid+1, but here mid represents an offset in the buffer. Each offset 415 // contains a thousand entries. So, if we do mid+1, we'd skip over those entries. 416 right := s.sort(mid, hi) 417 418 s.merge(left, right, loff, hoff) 419 return s.b.buf[loff:hoff] 420 } 421 422 // SortSlice is like SortSliceBetween but sorting over the entire buffer. 423 func (b *Buffer) SortSlice(less func(left, right []byte) bool) { 424 b.SortSliceBetween(b.StartOffset(), int(b.offset), less) 425 } 426 func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) { 427 if start >= end { 428 return 429 } 430 if start == 0 { 431 panic("start can never be zero") 432 } 433 434 var offsets []int 435 next, count := start, 0 436 for next >= 0 && next < end { 437 if count%1024 == 0 { 438 offsets = append(offsets, next) 439 } 440 _, next = b.Slice(next) 441 count++ 442 } 443 assert(len(offsets) > 0) 444 if offsets[len(offsets)-1] != end { 445 offsets = append(offsets, end) 446 } 447 448 szTmp := int(float64((end-start)/2) * 1.1) 449 s := &sortHelper{ 450 offsets: offsets, 451 b: b, 452 less: less, 453 small: make([]int, 0, 1024), 454 tmp: NewBuffer(szTmp, b.tag), 455 } 456 defer s.tmp.Release() 457 458 left := offsets[0] 459 for _, off := range offsets[1:] { 460 s.sortSmall(left, off) 461 left = off 462 } 463 s.sort(0, len(offsets)-1) 464 } 465 466 func rawSlice(buf []byte) []byte { 467 sz := binary.BigEndian.Uint32(buf) 468 return buf[:4+int(sz)] 469 } 470 471 // Slice would return the slice written at offset. 472 func (b *Buffer) Slice(offset int) ([]byte, int) { 473 if offset >= int(b.offset) { 474 return nil, -1 475 } 476 477 sz := binary.BigEndian.Uint32(b.buf[offset:]) 478 start := offset + 4 479 next := start + int(sz) 480 res := b.buf[start:next] 481 if next >= int(b.offset) { 482 next = -1 483 } 484 return res, next 485 } 486 487 // SliceOffsets is an expensive function. Use sparingly. 488 func (b *Buffer) SliceOffsets() []int { 489 next := b.StartOffset() 490 var offsets []int 491 for next >= 0 { 492 offsets = append(offsets, next) 493 _, next = b.Slice(next) 494 } 495 return offsets 496 } 497 498 func (b *Buffer) Data(offset int) []byte { 499 if offset > b.curSz { 500 panic("offset beyond current size") 501 } 502 return b.buf[offset:b.curSz] 503 } 504 505 // Write would write p bytes to the buffer. 506 func (b *Buffer) Write(p []byte) (n int, err error) { 507 n = len(p) 508 b.Grow(n) 509 assert(n == copy(b.buf[b.offset:], p)) 510 b.offset += uint64(n) 511 return n, nil 512 } 513 514 // Reset would reset the buffer to be reused. 515 func (b *Buffer) Reset() { 516 b.offset = uint64(b.StartOffset()) 517 } 518 519 // Release would free up the memory allocated by the buffer. Once the usage of buffer is done, it is 520 // important to call Release, otherwise a memory leak can happen. 521 func (b *Buffer) Release() error { 522 if b == nil { 523 return nil 524 } 525 switch b.bufType { 526 case UseCalloc: 527 Free(b.buf) 528 case UseMmap: 529 if b.mmapFile == nil { 530 return nil 531 } 532 path := b.mmapFile.Fd.Name() 533 if err := b.mmapFile.Close(-1); err != nil { 534 return errors.Wrapf(err, "while closing file: %s", path) 535 } 536 if !b.persistent { 537 if err := os.Remove(path); err != nil { 538 return errors.Wrapf(err, "while deleting file %s", path) 539 } 540 } 541 } 542 return nil 543 }