github.com/dgraph-io/ristretto@v0.1.2-0.20240116140435-c67e07994f91/z/buffer.go (about) 1 /* 2 * Copyright 2020 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package z 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "log" 23 "os" 24 "sort" 25 "sync/atomic" 26 27 "github.com/pkg/errors" 28 ) 29 30 const ( 31 defaultCapacity = 64 32 defaultTag = "buffer" 33 ) 34 35 // Buffer is equivalent of bytes.Buffer without the ability to read. It is NOT thread-safe. 36 // 37 // In UseCalloc mode, z.Calloc is used to allocate memory, which depending upon how the code is 38 // compiled could use jemalloc for allocations. 39 // 40 // In UseMmap mode, Buffer uses file mmap to allocate memory. This allows us to store big data 41 // structures without using physical memory. 42 // 43 // MaxSize can be set to limit the memory usage. 44 type Buffer struct { 45 padding uint64 // number of starting bytes used for padding 46 offset uint64 // used length of the buffer 47 buf []byte // backing slice for the buffer 48 bufType BufferType // type of the underlying buffer 49 curSz int // capacity of the buffer 50 maxSz int // causes a panic if the buffer grows beyond this size 51 mmapFile *MmapFile // optional mmap backing for the buffer 52 autoMmapAfter int // Calloc falls back to an mmaped tmpfile after crossing this size 53 autoMmapDir string // directory for autoMmap to create a tempfile in 54 persistent bool // when enabled, Release will not delete the underlying mmap file 55 tag string // used for jemalloc stats 56 } 57 58 func NewBuffer(capacity int, tag string) *Buffer { 59 if capacity < defaultCapacity { 60 capacity = defaultCapacity 61 } 62 if tag == "" { 63 tag = defaultTag 64 } 65 return &Buffer{ 66 buf: Calloc(capacity, tag), 67 bufType: UseCalloc, 68 curSz: capacity, 69 offset: 8, 70 padding: 8, 71 tag: tag, 72 } 73 } 74 75 // It is the caller's responsibility to set offset after this, because Buffer 76 // doesn't remember what it was. 77 func NewBufferPersistent(path string, capacity int) (*Buffer, error) { 78 file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666) 79 if err != nil { 80 return nil, err 81 } 82 buffer, err := newBufferFile(file, capacity) 83 if err != nil { 84 return nil, err 85 } 86 buffer.persistent = true 87 return buffer, nil 88 } 89 90 func NewBufferTmp(dir string, capacity int) (*Buffer, error) { 91 if dir == "" { 92 dir = tmpDir 93 } 94 file, err := os.CreateTemp(dir, "buffer") 95 if err != nil { 96 return nil, err 97 } 98 return newBufferFile(file, capacity) 99 } 100 101 func newBufferFile(file *os.File, capacity int) (*Buffer, error) { 102 if capacity < defaultCapacity { 103 capacity = defaultCapacity 104 } 105 mmapFile, err := OpenMmapFileUsing(file, capacity, true) 106 if err != nil && err != NewFile { 107 return nil, err 108 } 109 buf := &Buffer{ 110 buf: mmapFile.Data, 111 bufType: UseMmap, 112 curSz: len(mmapFile.Data), 113 mmapFile: mmapFile, 114 offset: 8, 115 padding: 8, 116 } 117 return buf, nil 118 } 119 120 func NewBufferSlice(slice []byte) *Buffer { 121 return &Buffer{ 122 offset: uint64(len(slice)), 123 buf: slice, 124 bufType: UseInvalid, 125 } 126 } 127 128 func (b *Buffer) WithAutoMmap(threshold int, path string) *Buffer { 129 if b.bufType != UseCalloc { 130 panic("can only autoMmap with UseCalloc") 131 } 132 b.autoMmapAfter = threshold 133 if path == "" { 134 b.autoMmapDir = tmpDir 135 } else { 136 b.autoMmapDir = path 137 } 138 return b 139 } 140 141 func (b *Buffer) WithMaxSize(size int) *Buffer { 142 b.maxSz = size 143 return b 144 } 145 146 func (b *Buffer) IsEmpty() bool { 147 return int(b.offset) == b.StartOffset() 148 } 149 150 // LenWithPadding would return the number of bytes written to the buffer so far 151 // plus the padding at the start of the buffer. 152 func (b *Buffer) LenWithPadding() int { 153 return int(atomic.LoadUint64(&b.offset)) 154 } 155 156 // LenNoPadding would return the number of bytes written to the buffer so far 157 // (without the padding). 158 func (b *Buffer) LenNoPadding() int { 159 return int(atomic.LoadUint64(&b.offset) - b.padding) 160 } 161 162 // Bytes would return all the written bytes as a slice. 163 func (b *Buffer) Bytes() []byte { 164 off := atomic.LoadUint64(&b.offset) 165 return b.buf[b.padding:off] 166 } 167 168 // Grow would grow the buffer to have at least n more bytes. In case the buffer is at capacity, it 169 // would reallocate twice the size of current capacity + n, to ensure n bytes can be written to the 170 // buffer without further allocation. In UseMmap mode, this might result in underlying file 171 // expansion. 172 func (b *Buffer) Grow(n int) { 173 if b.buf == nil { 174 panic("z.Buffer needs to be initialized before using") 175 } 176 if b.maxSz > 0 && int(b.offset)+n > b.maxSz { 177 err := fmt.Errorf( 178 "z.Buffer max size exceeded: %d offset: %d grow: %d", b.maxSz, b.offset, n) 179 panic(err) 180 } 181 if int(b.offset)+n < b.curSz { 182 return 183 } 184 185 // Calculate new capacity. 186 growBy := b.curSz + n 187 // Don't allocate more than 1GB at a time. 188 if growBy > 1<<30 { 189 growBy = 1 << 30 190 } 191 // Allocate at least n, even if it exceeds the 1GB limit above. 192 if n > growBy { 193 growBy = n 194 } 195 b.curSz += growBy 196 197 switch b.bufType { 198 case UseCalloc: 199 // If autoMmap gets triggered, copy the slice over to an mmaped file. 200 if b.autoMmapAfter > 0 && b.curSz > b.autoMmapAfter { 201 b.bufType = UseMmap 202 file, err := os.CreateTemp(b.autoMmapDir, "") 203 if err != nil { 204 panic(err) 205 } 206 mmapFile, err := OpenMmapFileUsing(file, b.curSz, true) 207 if err != nil && err != NewFile { 208 panic(err) 209 } 210 assert(int(b.offset) == copy(mmapFile.Data, b.buf[:b.offset])) 211 Free(b.buf) 212 b.mmapFile = mmapFile 213 b.buf = mmapFile.Data 214 break 215 } 216 217 // Else, reallocate the slice. 218 newBuf := Calloc(b.curSz, b.tag) 219 assert(int(b.offset) == copy(newBuf, b.buf[:b.offset])) 220 Free(b.buf) 221 b.buf = newBuf 222 223 case UseMmap: 224 // Truncate and remap the underlying file. 225 if err := b.mmapFile.Truncate(int64(b.curSz)); err != nil { 226 err = errors.Wrapf(err, 227 "while trying to truncate file: %s to size: %d", b.mmapFile.Fd.Name(), b.curSz) 228 panic(err) 229 } 230 b.buf = b.mmapFile.Data 231 232 default: 233 panic("can only use Grow on UseCalloc and UseMmap buffers") 234 } 235 } 236 237 // Allocate is a way to get a slice of size n back from the buffer. This slice can be directly 238 // written to. Warning: Allocate is not thread-safe. The byte slice returned MUST be used before 239 // further calls to Buffer. 240 func (b *Buffer) Allocate(n int) []byte { 241 b.Grow(n) 242 off := b.offset 243 b.offset += uint64(n) 244 return b.buf[off:int(b.offset)] 245 } 246 247 // AllocateOffset works the same way as allocate, but instead of returning a byte slice, it returns 248 // the offset of the allocation. 249 func (b *Buffer) AllocateOffset(n int) int { 250 b.Grow(n) 251 b.offset += uint64(n) 252 return int(b.offset) - n 253 } 254 255 func (b *Buffer) writeLen(sz int) { 256 buf := b.Allocate(8) 257 binary.BigEndian.PutUint64(buf, uint64(sz)) 258 } 259 260 // SliceAllocate would encode the size provided into the buffer, followed by a call to Allocate, 261 // hence returning the slice of size sz. This can be used to allocate a lot of small buffers into 262 // this big buffer. 263 // Note that SliceAllocate should NOT be mixed with normal calls to Write. 264 func (b *Buffer) SliceAllocate(sz int) []byte { 265 b.Grow(8 + sz) 266 b.writeLen(sz) 267 return b.Allocate(sz) 268 } 269 270 func (b *Buffer) StartOffset() int { 271 return int(b.padding) 272 } 273 274 func (b *Buffer) WriteSlice(slice []byte) { 275 dst := b.SliceAllocate(len(slice)) 276 assert(len(slice) == copy(dst, slice)) 277 } 278 279 func (b *Buffer) SliceIterate(f func(slice []byte) error) error { 280 if b.IsEmpty() { 281 return nil 282 } 283 284 next := b.StartOffset() 285 var slice []byte 286 for next >= 0 { 287 slice, next = b.Slice(next) 288 if len(slice) == 0 { 289 continue 290 } 291 if err := f(slice); err != nil { 292 return err 293 } 294 } 295 296 return nil 297 } 298 299 const ( 300 UseCalloc BufferType = iota 301 UseMmap 302 UseInvalid 303 ) 304 305 type BufferType int 306 307 func (t BufferType) String() string { 308 switch t { 309 case UseCalloc: 310 return "UseCalloc" 311 case UseMmap: 312 return "UseMmap" 313 default: 314 return "UseInvalid" 315 } 316 } 317 318 type LessFunc func(a, b []byte) bool 319 type sortHelper struct { 320 offsets []int 321 b *Buffer 322 tmp *Buffer 323 less LessFunc 324 small []int 325 } 326 327 func (s *sortHelper) sortSmall(start, end int) { 328 s.tmp.Reset() 329 s.small = s.small[:0] 330 next := start 331 for next >= 0 && next < end { 332 s.small = append(s.small, next) 333 _, next = s.b.Slice(next) 334 } 335 336 // We are sorting the slices pointed to by s.small offsets, but only moving the offsets around. 337 sort.Slice(s.small, func(i, j int) bool { 338 left, _ := s.b.Slice(s.small[i]) 339 right, _ := s.b.Slice(s.small[j]) 340 return s.less(left, right) 341 }) 342 // Now we iterate over the s.small offsets and copy over the slices. The result is now in order. 343 for _, off := range s.small { 344 _, _ = s.tmp.Write(rawSlice(s.b.buf[off:])) 345 } 346 assert(end-start == copy(s.b.buf[start:end], s.tmp.Bytes())) 347 } 348 349 func assert(b bool) { 350 if !b { 351 log.Fatalf("%+v", errors.Errorf("Assertion failure")) 352 } 353 } 354 func check(err error) { 355 if err != nil { 356 log.Fatalf("%+v", err) 357 } 358 } 359 func check2(_ interface{}, err error) { 360 check(err) 361 } 362 363 func (s *sortHelper) merge(left, right []byte, start, end int) { 364 if len(left) == 0 || len(right) == 0 { 365 return 366 } 367 s.tmp.Reset() 368 check2(s.tmp.Write(left)) 369 left = s.tmp.Bytes() 370 371 var ls, rs []byte 372 373 copyLeft := func() { 374 assert(len(ls) == copy(s.b.buf[start:], ls)) 375 left = left[len(ls):] 376 start += len(ls) 377 } 378 copyRight := func() { 379 assert(len(rs) == copy(s.b.buf[start:], rs)) 380 right = right[len(rs):] 381 start += len(rs) 382 } 383 384 for start < end { 385 if len(left) == 0 { 386 assert(len(right) == copy(s.b.buf[start:end], right)) 387 return 388 } 389 if len(right) == 0 { 390 assert(len(left) == copy(s.b.buf[start:end], left)) 391 return 392 } 393 ls = rawSlice(left) 394 rs = rawSlice(right) 395 396 // We skip the first 4 bytes in the rawSlice, because that stores the length. 397 if s.less(ls[8:], rs[8:]) { 398 copyLeft() 399 } else { 400 copyRight() 401 } 402 } 403 } 404 405 func (s *sortHelper) sort(lo, hi int) []byte { 406 assert(lo <= hi) 407 408 mid := lo + (hi-lo)/2 409 loff, hoff := s.offsets[lo], s.offsets[hi] 410 if lo == mid { 411 // No need to sort, just return the buffer. 412 return s.b.buf[loff:hoff] 413 } 414 415 // lo, mid would sort from [offset[lo], offset[mid]) . 416 left := s.sort(lo, mid) 417 // Typically we'd use mid+1, but here mid represents an offset in the buffer. Each offset 418 // contains a thousand entries. So, if we do mid+1, we'd skip over those entries. 419 right := s.sort(mid, hi) 420 421 s.merge(left, right, loff, hoff) 422 return s.b.buf[loff:hoff] 423 } 424 425 // SortSlice is like SortSliceBetween but sorting over the entire buffer. 426 func (b *Buffer) SortSlice(less func(left, right []byte) bool) { 427 b.SortSliceBetween(b.StartOffset(), int(b.offset), less) 428 } 429 func (b *Buffer) SortSliceBetween(start, end int, less LessFunc) { 430 if start >= end { 431 return 432 } 433 if start == 0 { 434 panic("start can never be zero") 435 } 436 437 var offsets []int 438 next, count := start, 0 439 for next >= 0 && next < end { 440 if count%1024 == 0 { 441 offsets = append(offsets, next) 442 } 443 _, next = b.Slice(next) 444 count++ 445 } 446 assert(len(offsets) > 0) 447 if offsets[len(offsets)-1] != end { 448 offsets = append(offsets, end) 449 } 450 451 szTmp := int(float64((end-start)/2) * 1.1) 452 s := &sortHelper{ 453 offsets: offsets, 454 b: b, 455 less: less, 456 small: make([]int, 0, 1024), 457 tmp: NewBuffer(szTmp, b.tag), 458 } 459 defer func() { _ = s.tmp.Release() }() 460 461 left := offsets[0] 462 for _, off := range offsets[1:] { 463 s.sortSmall(left, off) 464 left = off 465 } 466 s.sort(0, len(offsets)-1) 467 } 468 469 func rawSlice(buf []byte) []byte { 470 sz := binary.BigEndian.Uint64(buf) 471 return buf[:8+int(sz)] 472 } 473 474 // Slice would return the slice written at offset. 475 func (b *Buffer) Slice(offset int) ([]byte, int) { 476 if offset >= int(b.offset) { 477 return nil, -1 478 } 479 480 sz := binary.BigEndian.Uint64(b.buf[offset:]) 481 start := offset + 8 482 next := start + int(sz) 483 res := b.buf[start:next] 484 if next >= int(b.offset) { 485 next = -1 486 } 487 return res, next 488 } 489 490 // SliceOffsets is an expensive function. Use sparingly. 491 func (b *Buffer) SliceOffsets() []int { 492 next := b.StartOffset() 493 var offsets []int 494 for next >= 0 { 495 offsets = append(offsets, next) 496 _, next = b.Slice(next) 497 } 498 return offsets 499 } 500 501 func (b *Buffer) Data(offset int) []byte { 502 if offset > b.curSz { 503 panic("offset beyond current size") 504 } 505 return b.buf[offset:b.curSz] 506 } 507 508 // Write would write p bytes to the buffer. 509 func (b *Buffer) Write(p []byte) (n int, err error) { 510 n = len(p) 511 b.Grow(n) 512 assert(n == copy(b.buf[b.offset:], p)) 513 b.offset += uint64(n) 514 return n, nil 515 } 516 517 // Reset would reset the buffer to be reused. 518 func (b *Buffer) Reset() { 519 b.offset = uint64(b.StartOffset()) 520 } 521 522 // Release would free up the memory allocated by the buffer. Once the usage of buffer is done, it is 523 // important to call Release, otherwise a memory leak can happen. 524 func (b *Buffer) Release() error { 525 if b == nil { 526 return nil 527 } 528 switch b.bufType { 529 case UseCalloc: 530 Free(b.buf) 531 case UseMmap: 532 if b.mmapFile == nil { 533 return nil 534 } 535 path := b.mmapFile.Fd.Name() 536 if err := b.mmapFile.Close(-1); err != nil { 537 return errors.Wrapf(err, "while closing file: %s", path) 538 } 539 if !b.persistent { 540 if err := os.Remove(path); err != nil { 541 return errors.Wrapf(err, "while deleting file %s", path) 542 } 543 } 544 } 545 return nil 546 }