github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/threadPool.go (about) 1 package index 2 3 import ( 4 "container/list" 5 "fmt" 6 "sync" 7 ) 8 9 // index/DocumentsWriterPerThreadPool.java 10 11 /* 12 ThreadState references and guards a DocumentsWriterPerThread instance 13 that is used during indexing to build a in-memory index segment. 14 ThreadState also holds all flush related per-thread data controlled 15 by DocumentsWriterFlushControl. 16 17 A ThreadState, its methods and members should only accessed by one 18 goroutine a time. users must acquire the lock via lock() and release 19 the lock in a finally block via unlock() before accesing the state. 20 */ 21 type ThreadState struct { 22 id int // used by pool 23 dwpt *DocumentsWriterPerThread 24 // TODO this should really be part of DocumentsWriterFlushControl 25 // write access guarded by DocumentsWriterFlushControl 26 flushPending bool // volatile 27 // TODO this should really be part of DocumentsWriterFlushControl 28 // write access guarded by DocumentsWriterFlushControl 29 bytesUsed int64 30 isActive bool 31 } 32 33 func newThreadState(id int) *ThreadState { 34 return &ThreadState{id: id, isActive: true} 35 } 36 37 func (ts *ThreadState) deactivate() { 38 ts.isActive = false 39 ts.reset() 40 } 41 42 func (ts *ThreadState) reset() { 43 ts.dwpt = nil 44 ts.bytesUsed = 0 45 ts.flushPending = false 46 } 47 48 /* 49 DocumentsWriterPerThreadPool controls ThreadState instances and their 50 goroutine assignment during indexing. Each TheadState holds a 51 reference to a DocumentsWriterPerThread that is once a ThreadState is 52 obtained from the pool exclusively used for indexing a single 53 document by the obtaining thread. Each indexing thread must obtain 54 such a ThreadState to make progress. Depending on the DocumentsWriterPerThreadPool 55 implementation ThreadState assingments might differ from document to 56 document. 57 58 Once a DocumentWriterPerThread is selected for flush the thread pool 59 is reusing the flushing DocumentsWriterPerthread's ThreadState with a 60 new DocumentsWriterPerThread instance. 61 62 GoRoutine is different from Java's thread. So intead of thread 63 affinity, I will use channels and concurrent running goroutines to 64 hold individual DocumentsWriterPerThread instances and states. 65 */ 66 type DocumentsWriterPerThreadPool struct { 67 sync.Locker 68 threadStates []*ThreadState 69 listeners []*list.List 70 freeList *list.List 71 lockedList *list.List 72 hasMoreStates *sync.Cond 73 } 74 75 func NewDocumentsWriterPerThreadPool(maxNumThreadStates int) *DocumentsWriterPerThreadPool { 76 assert2(maxNumThreadStates >= 1, fmt.Sprintf("maxNumThreadStates must be >= 1 but was: %v", maxNumThreadStates)) 77 return &DocumentsWriterPerThreadPool{ 78 Locker: &sync.Mutex{}, 79 threadStates: make([]*ThreadState, 0, maxNumThreadStates), 80 listeners: make([]*list.List, maxNumThreadStates), 81 freeList: list.New(), 82 lockedList: list.New(), 83 hasMoreStates: sync.NewCond(&sync.Mutex{}), 84 } 85 } 86 87 func (tp *DocumentsWriterPerThreadPool) numActiveThreadState() int { 88 return len(tp.threadStates) 89 } 90 91 func (tp *DocumentsWriterPerThreadPool) reset(threadState *ThreadState, closed bool) *DocumentsWriterPerThread { 92 dwpt := threadState.dwpt 93 if !closed { 94 threadState.reset() 95 } else { 96 threadState.deactivate() 97 } 98 return dwpt 99 } 100 101 /* 102 It's unfortunately that Go doesn't support 'Thread Affinity'. Default 103 strategy is LIFO. 104 */ 105 func (tp *DocumentsWriterPerThreadPool) lockAny() (res *ThreadState) { 106 for res == nil { 107 if res = tp.findNextAvailableThreadState(); res == nil { 108 // ThreadState is already locked before return by this method: 109 if res = tp.newThreadState(); res == nil { 110 // Wait until a thread state freez up: 111 func() { 112 tp.hasMoreStates.L.Lock() 113 defer tp.hasMoreStates.L.Unlock() 114 115 tp.hasMoreStates.Wait() 116 }() 117 } 118 } 119 } 120 return 121 } 122 123 func (tp *DocumentsWriterPerThreadPool) lock(id int, wait bool) *ThreadState { 124 tp.Lock() 125 defer tp.Unlock() 126 127 for e := tp.freeList.Front(); e != nil; e = e.Next() { 128 if tid := e.Value.(int); tid == id { 129 tp.freeList.Remove(e) 130 tp.lockedList.PushBack(id) 131 return tp.threadStates[tid] 132 } 133 } 134 135 if !wait { 136 return nil 137 } 138 waitingList := tp.listeners[id] 139 if waitingList == nil { 140 waitingList = list.New() 141 tp.listeners[id] = waitingList 142 } 143 ch := make(chan *ThreadState) 144 waitingList.PushBack(ch) 145 return <-ch // block until reserved thread state is released 146 } 147 148 func (tp *DocumentsWriterPerThreadPool) findNextAvailableThreadState() *ThreadState { 149 tp.Lock() 150 defer tp.Unlock() 151 152 if tp.freeList.Len() > 0 { 153 // Important that we are LIFO here! This way if number of 154 // concurrent indexing threads was once high, but has now 155 // reduced, we only use a limited number of thread states: 156 e := tp.freeList.Back() 157 id := e.Value.(int) 158 tp.freeList.Remove(e) 159 160 if tp.threadStates[id].dwpt == nil { 161 // This thread-state is not initialized, e.g. it was just 162 // flushed. See if we can instead find another free thread 163 // state that already has docs indexed. This way if incoming 164 // thread concurrentcy has decreased, we don't leave docs 165 // indefinitely buffered, tying up RAM. This will instead get 166 // thread states flushed, freein up RAM for larger segment 167 // flushes: 168 for e = tp.freeList.Front(); e != nil; e = e.Next() { 169 if id2 := e.Value.(int); tp.threadStates[id2].dwpt != nil { 170 // Use this one instead, and swap it with the 171 // un-initialized one: 172 tp.freeList.PushFront(id) 173 id = id2 174 tp.freeList.Remove(e) 175 break 176 } 177 } 178 } 179 return tp.threadStates[id] 180 } 181 return nil 182 } 183 184 /* 185 Returns a new ThreadState iff any new state is available otherwise 186 nil. 187 188 NOTE: the returned ThreadState is already locked iff non-nil. 189 */ 190 func (tp *DocumentsWriterPerThreadPool) newThreadState() (ts *ThreadState) { 191 tp.Lock() 192 defer tp.Unlock() 193 194 // Create a new empty thread state if possible 195 if len(tp.threadStates) < cap(tp.threadStates) { 196 ts = newThreadState(len(tp.threadStates)) 197 tp.threadStates = append(tp.threadStates, ts) 198 tp.lockedList.PushBack(ts.id) 199 } 200 return 201 } 202 203 func (tp *DocumentsWriterPerThreadPool) foreach(f func(state *ThreadState)) { 204 for i, limit := 0, len(tp.threadStates); i < limit; i++ { 205 ts := tp.lock(i, true) 206 assert(ts != nil) 207 f(ts) 208 tp.release(ts) 209 } 210 } 211 212 func (tp *DocumentsWriterPerThreadPool) find(f func(state *ThreadState) interface{}) interface{} { 213 for i, limit := 0, len(tp.threadStates); i < limit; i++ { 214 if ts := tp.lock(i, false); ts != nil { 215 res := f(ts) 216 tp.release(ts) 217 if res != nil { 218 return res 219 } 220 } 221 } 222 return nil 223 } 224 225 /* 226 Release the ThreadState back to the pool. Equals to 227 ThreadState.Unlock() in Lucene Java. 228 */ 229 func (tp *DocumentsWriterPerThreadPool) release(ts *ThreadState) { 230 tp.Lock() 231 defer tp.Unlock() 232 233 if waitingList := tp.listeners[ts.id]; waitingList != nil && waitingList.Len() > 0 { 234 // this thread state is reserved 235 e := waitingList.Front() 236 waitingList.Remove(e) 237 // re-allocate to external handler 238 e.Value.(chan *ThreadState) <- ts 239 return 240 } 241 242 // push the thread state back to 243 tp.freeList.PushBack(ts.id) 244 tp.hasMoreStates.Signal() 245 }