github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/store/ram.go (about) 1 package store 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/util" 7 "hash" 8 "hash/crc32" 9 "math" 10 "os" 11 "sync" 12 "sync/atomic" 13 "unsafe" 14 ) 15 16 // store/RAMDirectory.java 17 18 /* 19 A memory-resident Directory implementation. Locking implementation 20 is by default the SingleInstanceLockFactory but can be changed with 21 SetLockFactory(). 22 23 Warning: This class is not intended to work with huge indexes. 24 Everything beyond several hundred megabytes will waste resources (GC 25 cycles), becaues it uses an internal buffer size of 1024 bytes, 26 producing millions of byte[1024] arrays. This class is optimized for 27 small memory-resident indexes. It also has bad concurrency on 28 multithreaded environments. 29 30 It is recommended to materialze large indexes on disk and use 31 MMapDirectory, which is a high-performance directory implementation 32 working diretly on the file system cache of the operating system, so 33 copying dat to Java heap space is not useful. 34 */ 35 type RAMDirectory struct { 36 *DirectoryImpl 37 *BaseDirectory 38 39 sizeInBytes int64 // synchronized 40 41 fileMap map[string]*RAMFile // synchronized 42 fileMapLock *sync.RWMutex 43 } 44 45 func NewRAMDirectory() *RAMDirectory { 46 ans := &RAMDirectory{ 47 fileMap: make(map[string]*RAMFile), 48 fileMapLock: &sync.RWMutex{}, 49 } 50 ans.DirectoryImpl = NewDirectoryImpl(ans) 51 ans.BaseDirectory = NewBaseDirectory(ans) 52 ans.SetLockFactory(newSingleInstanceLockFactory()) 53 return ans 54 } 55 56 func (d *RAMDirectory) LockID() string { 57 return fmt.Sprintf("lucene-%v", util.ItoHex(int64(uintptr(unsafe.Pointer(&d))))) 58 } 59 60 func (rd *RAMDirectory) ListAll() (names []string, err error) { 61 rd.EnsureOpen() 62 rd.fileMapLock.RLock() 63 defer rd.fileMapLock.RUnlock() 64 names = make([]string, 0, len(rd.fileMap)) 65 for name, _ := range rd.fileMap { 66 names = append(names, name) 67 } 68 return names, nil 69 } 70 71 // Returns true iff the named file exists in this directory 72 func (rd *RAMDirectory) FileExists(name string) bool { 73 rd.EnsureOpen() 74 rd.fileMapLock.RLock() 75 defer rd.fileMapLock.RUnlock() 76 _, ok := rd.fileMap[name] 77 return ok 78 } 79 80 // Returns the length in bytes of a file in the directory. 81 func (rd *RAMDirectory) FileLength(name string) (length int64, err error) { 82 rd.EnsureOpen() 83 rd.fileMapLock.RLock() 84 defer rd.fileMapLock.RUnlock() 85 if file, ok := rd.fileMap[name]; ok { 86 return file.Length(), nil 87 } 88 return 0, os.ErrNotExist 89 } 90 91 /* 92 Return total size in bytes of all files in this directory. This is 93 currently quantized to BUFFER_SIZE. 94 */ 95 func (rd *RAMDirectory) RamBytesUsed() int64 { 96 rd.EnsureOpen() 97 return atomic.LoadInt64(&rd.sizeInBytes) 98 } 99 100 /* Removes an existing file in the directory */ 101 func (rd *RAMDirectory) DeleteFile(name string) error { 102 rd.EnsureOpen() 103 rd.fileMapLock.RLock() 104 defer rd.fileMapLock.RUnlock() 105 if file, ok := rd.fileMap[name]; ok { 106 file.directory = nil 107 atomic.AddInt64(&rd.sizeInBytes, -file.sizeInBytes) 108 return nil 109 } 110 return errors.New(name) 111 } 112 113 // Creates a new, empty file in the directory with the given name. 114 // Returns a stream writing this file: 115 func (rd *RAMDirectory) CreateOutput(name string, context IOContext) (out IndexOutput, err error) { 116 rd.EnsureOpen() 117 file := rd.newRAMFile() 118 rd.fileMapLock.Lock() 119 defer rd.fileMapLock.Unlock() 120 if existing, ok := rd.fileMap[name]; ok { 121 atomic.AddInt64(&rd.sizeInBytes, -existing.sizeInBytes) 122 existing.directory = nil 123 } 124 rd.fileMap[name] = file 125 return NewRAMOutputStream(file, true), nil 126 } 127 128 // Returns a new RAMFile for storing data. This method can be 129 // overridden to return different RAMFile impls, that e.g. override 130 // RAMFile.newBuffer(int). 131 func (rd *RAMDirectory) newRAMFile() *RAMFile { 132 return NewRAMFile(rd) 133 } 134 135 func (rd *RAMDirectory) Sync(names []string) error { 136 return nil 137 } 138 139 // Returns a stream reading an existing file. 140 func (rd *RAMDirectory) OpenInput(name string, context IOContext) (in IndexInput, err error) { 141 rd.EnsureOpen() 142 if file, ok := rd.fileMap[name]; ok { 143 return newRAMInputStream(name, file) 144 } 145 return nil, errors.New(name) 146 } 147 148 // Closes the store to future operations, releasing associated memroy. 149 func (rd *RAMDirectory) Close() error { 150 rd.IsOpen = false 151 rd.fileMapLock.Lock() 152 defer rd.fileMapLock.Unlock() 153 rd.fileMap = make(map[string]*RAMFile) 154 return nil 155 } 156 157 /* test-only */ 158 func (rd *RAMDirectory) GetRAMFile(name string) *RAMFile { 159 rd.fileMapLock.Lock() 160 defer rd.fileMapLock.Unlock() 161 return rd.fileMap[name] 162 } 163 164 /* test-only */ 165 func (d *RAMDirectory) PutRAMFile(name string, file *RAMFile) { 166 d.fileMapLock.Lock() 167 defer d.fileMapLock.Unlock() 168 d.fileMap[name] = file 169 } 170 171 /* test-only */ 172 func (rd *RAMDirectory) ChangeSize(diff int64) { 173 atomic.AddInt64(&rd.sizeInBytes, diff) 174 } 175 176 func (rd *RAMDirectory) String() string { 177 return fmt.Sprintf("RAMDirectory@%v", rd.DirectoryImpl.String()) 178 } 179 180 // store/RAMFile.java 181 182 // Represents a file in RAM as a list of []byte buffers. 183 type RAMFile struct { 184 sync.Locker 185 buffers [][]byte 186 length int64 187 directory *RAMDirectory 188 sizeInBytes int64 189 newBuffer func(size int) []byte 190 } 191 192 func NewRAMFileBuffer() *RAMFile { 193 return &RAMFile{ 194 Locker: &sync.Mutex{}, 195 newBuffer: newBuffer, 196 } 197 } 198 199 func NewRAMFile(directory *RAMDirectory) *RAMFile { 200 return &RAMFile{ 201 Locker: &sync.Mutex{}, 202 directory: directory, 203 newBuffer: newBuffer, 204 } 205 } 206 207 func (rf *RAMFile) Length() int64 { 208 rf.Lock() 209 defer rf.Unlock() 210 return rf.length 211 } 212 213 func (rf *RAMFile) SetLength(length int64) { 214 rf.Lock() // synchronized 215 defer rf.Unlock() 216 rf.length = length 217 } 218 219 func (rf *RAMFile) addBuffer(size int) []byte { 220 buffer := rf.newBuffer(size) 221 rf.Lock() // synchronized 222 defer rf.Unlock() 223 rf.buffers = append(rf.buffers, buffer) 224 rf.sizeInBytes += int64(size) 225 226 if rf.directory != nil { 227 atomic.AddInt64(&rf.directory.sizeInBytes, int64(size)) 228 } 229 return buffer 230 } 231 232 func (rf *RAMFile) Buffer(index int) []byte { 233 rf.Lock() 234 defer rf.Unlock() 235 return rf.buffers[index] 236 } 237 238 func (rf *RAMFile) numBuffers() int { 239 rf.Lock() 240 defer rf.Unlock() 241 return len(rf.buffers) 242 } 243 244 // Expert: allocate a new buffer. 245 // Subclasses can allocate differently 246 func newBuffer(size int) []byte { 247 return make([]byte, size) 248 } 249 250 func (rf *RAMFile) RamBytesUsed() int64 { 251 rf.Lock() 252 defer rf.Unlock() 253 return rf.sizeInBytes 254 } 255 256 // store/SingleInstanceLockFactory.java 257 258 /* 259 Implements LockFactory for a single in-process instance, meaning all 260 locking will take place through this one instance. Only use this 261 LockFactory when you are certain all IndexReaders and IndexWriters 262 for a given index are running against a single shared in-process 263 Directory instance. This is currently the default locking for 264 RAMDirectory. 265 */ 266 type SingleInstanceLockFactory struct { 267 *LockFactoryImpl 268 locksLock sync.Locker 269 locks map[string]bool 270 } 271 272 func newSingleInstanceLockFactory() *SingleInstanceLockFactory { 273 return &SingleInstanceLockFactory{ 274 LockFactoryImpl: &LockFactoryImpl{}, 275 locksLock: &sync.Mutex{}, 276 locks: make(map[string]bool), 277 } 278 } 279 280 func (fac *SingleInstanceLockFactory) Make(name string) Lock { 281 // We do not use the LockPrefix at all, becaues the private map 282 // instance effectively scopes the locking to this single Directory 283 // instance. 284 return newSingleInstanceLock(fac.locks, fac.locksLock, name) 285 } 286 287 func (fac *SingleInstanceLockFactory) Clear(name string) error { 288 fac.locksLock.Lock() // synchronized 289 defer fac.locksLock.Unlock() 290 if _, ok := fac.locks[name]; ok { 291 delete(fac.locks, name) 292 } 293 return nil 294 } 295 296 func (fac *SingleInstanceLockFactory) String() string { 297 return fmt.Sprintf("SingleInstanceLockFactory@%v", fac.locks) 298 } 299 300 type SingleInstanceLock struct { 301 *LockImpl 302 name string 303 locksLock sync.Locker 304 locks map[string]bool 305 } 306 307 func newSingleInstanceLock(locks map[string]bool, locksLock sync.Locker, name string) *SingleInstanceLock { 308 ans := &SingleInstanceLock{ 309 name: name, 310 locksLock: locksLock, 311 locks: locks, 312 } 313 ans.LockImpl = NewLockImpl(ans) 314 return ans 315 } 316 317 func (lock *SingleInstanceLock) Obtain() (ok bool, err error) { 318 lock.locksLock.Lock() // synchronized 319 defer lock.locksLock.Unlock() 320 lock.locks[lock.name] = true 321 return true, nil 322 } 323 324 func (lock *SingleInstanceLock) Close() error { 325 lock.locksLock.Lock() // synchronized 326 defer lock.locksLock.Unlock() 327 delete(lock.locks, lock.name) 328 return nil 329 } 330 331 func (lock *SingleInstanceLock) IsLocked() bool { 332 lock.locksLock.Lock() // synchronized 333 defer lock.locksLock.Unlock() 334 _, ok := lock.locks[lock.name] 335 return ok 336 } 337 338 func (lock *SingleInstanceLock) String() string { 339 return fmt.Sprintf("SingleInstanceLock: %v", lock.name) 340 } 341 342 // store/RAMInputStream.java 343 344 // A memory-resident IndexInput implementation. 345 type RAMInputStream struct { 346 *IndexInputImpl 347 348 file *RAMFile 349 length int64 350 351 currentBuffer []byte 352 currentBufferIndex int 353 354 bufferPosition int 355 bufferStart int64 356 bufferLength int 357 } 358 359 func newRAMInputStream(name string, f *RAMFile) (in *RAMInputStream, err error) { 360 if !(f.length/BUFFER_SIZE < math.MaxInt32) { 361 return nil, errors.New(fmt.Sprintf("RAMInputStream too large length=%v: %v", f.length, name)) 362 } 363 364 in = &RAMInputStream{ 365 file: f, 366 length: int64(f.length), 367 currentBufferIndex: -1, 368 } 369 in.IndexInputImpl = NewIndexInputImpl(fmt.Sprintf("RAMInputStream(name=%v)", name), in) 370 return in, nil 371 } 372 373 func (in *RAMInputStream) Close() error { 374 return nil 375 } 376 377 func (in *RAMInputStream) Length() int64 { 378 return in.length 379 } 380 381 func (in *RAMInputStream) ReadByte() (byte, error) { 382 if in.bufferPosition >= in.bufferLength { 383 in.currentBufferIndex++ 384 err := in.switchCurrentBuffer(true) 385 if err != nil { 386 return 0, err 387 } 388 } 389 in.bufferPosition++ 390 return in.currentBuffer[in.bufferPosition-1], nil 391 } 392 393 func (in *RAMInputStream) ReadBytes(buf []byte) error { 394 var offset = 0 395 for limit := len(buf); limit > 0; { 396 if in.bufferPosition >= in.bufferLength { 397 in.currentBufferIndex++ 398 err := in.switchCurrentBuffer(true) 399 if err != nil { 400 return err 401 } 402 } 403 404 bytesToCopy := in.bufferLength - in.bufferPosition 405 if limit < bytesToCopy { 406 bytesToCopy = limit 407 } 408 copy(buf[offset:], in.currentBuffer[in.bufferPosition:in.bufferPosition+bytesToCopy]) 409 offset += bytesToCopy 410 limit -= bytesToCopy 411 in.bufferPosition += bytesToCopy 412 } 413 return nil 414 } 415 416 func (in *RAMInputStream) switchCurrentBuffer(enforceEOF bool) error { 417 in.bufferStart = int64(BUFFER_SIZE * in.currentBufferIndex) 418 if in.bufferStart > in.length || in.currentBufferIndex >= in.file.numBuffers() { 419 // end of file reached, no more buffer left 420 if enforceEOF { 421 return errors.New(fmt.Sprintf("read past EOF: %v", in)) 422 } 423 // Force EOF if a read takes place at this position 424 in.currentBufferIndex-- 425 in.bufferPosition = BUFFER_SIZE 426 } else { 427 in.currentBuffer = in.file.Buffer(in.currentBufferIndex) 428 in.bufferPosition = 0 429 bufLen := in.length - in.bufferStart 430 if BUFFER_SIZE < bufLen { 431 bufLen = BUFFER_SIZE 432 } 433 in.bufferLength = int(bufLen) 434 } 435 return nil 436 } 437 438 func (in *RAMInputStream) FilePointer() int64 { 439 if in.currentBufferIndex < 0 { 440 return 0 441 } 442 return in.bufferStart + int64(in.bufferPosition) 443 } 444 445 func (in *RAMInputStream) Seek(pos int64) error { 446 if in.currentBuffer == nil || pos < in.bufferStart || pos >= in.bufferStart+BUFFER_SIZE { 447 in.currentBufferIndex = int(pos / BUFFER_SIZE) 448 err := in.switchCurrentBuffer(false) 449 if err != nil { 450 return err 451 } 452 } 453 in.bufferPosition = int(pos % BUFFER_SIZE) 454 return nil 455 } 456 457 func (in *RAMInputStream) Slice(desc string, offset, length int64) (IndexInput, error) { 458 panic("not implemented yet") 459 } 460 461 func (in *RAMInputStream) Clone() IndexInput { 462 panic("not implemented yet") 463 } 464 465 func (in *RAMInputStream) String() string { 466 return fmt.Sprintf("%v;%v@[0-%v]", in.IndexInputImpl.String(), in.FilePointer(), in.length) 467 } 468 469 // store/RamOutputStream.java 470 471 /* 472 A memory-resident IndexOutput implementation 473 */ 474 type RAMOutputStream struct { 475 *IndexOutputImpl 476 477 file *RAMFile 478 479 currentBuffer []byte 480 currentBufferIndex int 481 482 bufferPosition int 483 bufferStart int64 484 bufferLength int 485 486 crc hash.Hash32 487 } 488 489 /* Construct an empty output buffer. */ 490 func NewRAMOutputStreamBuffer() *RAMOutputStream { 491 return NewRAMOutputStream(NewRAMFileBuffer(), false) 492 } 493 494 func NewRAMOutputStream(f *RAMFile, checksum bool) *RAMOutputStream { 495 // make sure that we switch to the first needed buffer lazily 496 out := &RAMOutputStream{file: f, currentBufferIndex: -1} 497 out.IndexOutputImpl = NewIndexOutput(out) 498 if checksum { 499 out.crc = newBufferedChecksum(crc32.NewIEEE()) 500 } 501 return out 502 } 503 504 /* Copy the current contents of this buffer to the named output. */ 505 func (out *RAMOutputStream) WriteTo(output util.DataOutput) error { 506 err := out.Flush() 507 if err != nil { 508 return err 509 } 510 end := out.file.length 511 pos := int64(0) 512 buffer := 0 513 for pos < end { 514 length := BUFFER_SIZE 515 nextPos := pos + int64(length) 516 if nextPos > end { // at the last buffer 517 length = int(end - pos) 518 } 519 err = output.WriteBytes(out.file.Buffer(buffer)[:length]) 520 if err != nil { 521 return err 522 } 523 buffer++ 524 pos = nextPos 525 } 526 return nil 527 } 528 529 /* Copy the current contents of this buffer to output byte slice */ 530 func (out *RAMOutputStream) WriteToBytes(bytes []byte) error { 531 err := out.Flush() 532 if err != nil { 533 return err 534 } 535 end := out.file.length 536 pos := int64(0) 537 buffer := 0 538 bytesUpto := 0 539 for pos < end { 540 length := BUFFER_SIZE 541 nextPos := pos + int64(length) 542 if nextPos > end { 543 length = int(end - pos) 544 } 545 copy(bytes[bytesUpto:], out.file.Buffer(buffer)[:length]) 546 buffer++ 547 bytesUpto += length 548 pos = nextPos 549 } 550 return nil 551 } 552 553 /* Resets this to an empty file. */ 554 func (out *RAMOutputStream) Reset() { 555 out.currentBuffer = nil 556 out.currentBufferIndex = -1 557 out.bufferPosition = 0 558 out.bufferStart = 0 559 out.bufferLength = 0 560 out.file.SetLength(0) 561 if out.crc != nil { 562 out.crc.Reset() 563 } 564 } 565 566 func (out *RAMOutputStream) Close() error { 567 return out.Flush() 568 } 569 570 // func (out *RAMOutputStream) Length() (int64, error) { 571 // return out.file.length, nil 572 // } 573 574 func (out *RAMOutputStream) WriteByte(b byte) error { 575 if out.bufferPosition == out.bufferLength { 576 out.currentBufferIndex++ 577 out.switchCurrentBuffer() 578 } 579 if out.crc != nil { 580 out.crc.Write([]byte{b}) 581 } 582 out.currentBuffer[out.bufferPosition] = b 583 out.bufferPosition++ 584 return nil 585 } 586 587 func (out *RAMOutputStream) WriteBytes(buf []byte) error { 588 assert(buf != nil) 589 if out.crc != nil { 590 out.crc.Write(buf) 591 } 592 var offset = 0 593 for limit := len(buf); limit > 0; { 594 if out.bufferPosition == out.bufferLength { 595 out.currentBufferIndex++ 596 out.switchCurrentBuffer() 597 } 598 599 bytesToCopy := len(out.currentBuffer) - out.bufferPosition 600 if limit < bytesToCopy { 601 bytesToCopy = limit 602 } 603 copy(out.currentBuffer[out.bufferPosition:], buf[offset:offset+bytesToCopy]) 604 offset += bytesToCopy 605 limit -= bytesToCopy 606 out.bufferPosition += bytesToCopy 607 } 608 return nil 609 } 610 611 func (out *RAMOutputStream) switchCurrentBuffer() { 612 if out.currentBufferIndex == out.file.numBuffers() { 613 out.currentBuffer = out.file.addBuffer(BUFFER_SIZE) 614 } else { 615 out.currentBuffer = out.file.Buffer(out.currentBufferIndex) 616 } 617 out.bufferPosition = 0 618 out.bufferStart = BUFFER_SIZE * int64(out.currentBufferIndex) 619 out.bufferLength = len(out.currentBuffer) 620 } 621 622 func (out *RAMOutputStream) setFileLength() { 623 if pointer := out.bufferStart + int64(out.bufferPosition); pointer > int64(out.file.length) { 624 out.file.SetLength(pointer) 625 } 626 } 627 628 func (out *RAMOutputStream) Flush() error { 629 out.setFileLength() 630 return nil 631 } 632 633 func (out *RAMOutputStream) FilePointer() int64 { 634 if out.currentBufferIndex < 0 { 635 return 0 636 } 637 return out.bufferStart + int64(out.bufferPosition) 638 } 639 640 func (out *RAMOutputStream) Checksum() int64 { 641 assert2(out.crc != nil, "internal RAMOutputStream created with checksum disabled") 642 return int64(out.crc.Sum32()) 643 }