github.com/dolthub/go-mysql-server@v0.18.0/sql/plan/cached_results.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "errors" 19 "io" 20 "sync" 21 "sync/atomic" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 ) 25 26 // CachedResultsGlobalCache manages the caches created by CachedResults nodes. 27 var CachedResultsGlobalCache = NewCachedResultsManager() 28 29 var ErrEmptyCachedResult = errors.New("CachedResult contains no rows") 30 var ErrRowIterDisposed = errors.New("attempted to call RowIter() on a disposed Node") 31 32 // NewCachedResults returns a cached results plan Node, which will use a 33 // RowCache to cache results generated by Child.RowIter() and return those 34 // results for future calls to RowIter. This node is only safe to use if the 35 // Child is deterministic and is not dependent on the |row| parameter in the 36 // call to RowIter. 37 func NewCachedResults(n sql.Node) *CachedResults { 38 return &CachedResults{ 39 UnaryNode: UnaryNode{n}, 40 Id: CachedResultsGlobalCache.allocateUniqueId(), 41 } 42 } 43 44 // CachedResults tees the child node iterator into an in-memory cache 45 // for faster subsequent retrieval. This is usually combined with a 46 // HashLookup, whose RowIter defers to a CachedResult child to populate 47 // rows in memory on a first iteration. The second RowIter moves the 48 // rows from the memory cache to a hash map attached to HashLookup, 49 // disposing the CachedResult afterwards. 50 // 51 // In the special case where we fill a CachedResult on pass one, but pass 52 // two never happens, we have to take care not to orphan the cache. 53 // 54 // When we exhaust the source, but the cache is empty, subsequent calls 55 // to RowIter return an ErrEmptyCachedResult error for short-circuiting 56 // join trees. 57 // 58 // When the memory manager cannot accommodate expanding the cache, we 59 // fall back to a passthrough iterator. 60 type CachedResults struct { 61 UnaryNode 62 Id uint64 63 Mutex sync.Mutex 64 //NoCache is set when the memory manager is unable to build 65 // a cache, so we fallback to a passthrough RowIter 66 NoCache bool 67 // Finalized is set when we exhaust the child iter, and subsequent 68 // RowIters will read from the cache rather than the child 69 Finalized bool 70 // Disposed is set after this CachedResults is invalidated 71 Disposed bool 72 } 73 74 var _ sql.Node = (*CachedResults)(nil) 75 var _ sql.CollationCoercible = (*CachedResults)(nil) 76 77 func (n *CachedResults) Dispose() { 78 n.Disposed = true 79 CachedResultsGlobalCache.disposeCachedResultsById(n.Id) 80 } 81 82 func (n *CachedResults) String() string { 83 pr := sql.NewTreePrinter() 84 _ = pr.WriteNode("CachedResults") 85 _ = pr.WriteChildren(n.UnaryNode.Child.String()) 86 return pr.String() 87 } 88 89 func (n *CachedResults) DebugString() string { 90 pr := sql.NewTreePrinter() 91 _ = pr.WriteNode("CachedResults") 92 _ = pr.WriteChildren(sql.DebugString(n.UnaryNode.Child)) 93 return pr.String() 94 } 95 96 func (n *CachedResults) WithChildren(children ...sql.Node) (sql.Node, error) { 97 if len(children) != 1 { 98 return nil, sql.ErrInvalidChildrenNumber.New(n, len(children), 1) 99 } 100 nn := *n 101 nn.UnaryNode.Child = children[0] 102 return &nn, nil 103 } 104 105 // CheckPrivileges implements the interface sql.Node. 106 func (n *CachedResults) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { 107 return n.Child.CheckPrivileges(ctx, opChecker) 108 } 109 110 // CollationCoercibility implements the interface sql.CollationCoercible. 111 func (n *CachedResults) CollationCoercibility(ctx *sql.Context) (collation sql.CollationID, coercibility byte) { 112 return sql.GetCoercibility(ctx, n.Child) 113 } 114 115 func (n *CachedResults) GetCachedResults() []sql.Row { 116 return CachedResultsGlobalCache.getCachedResultsById(n.Id) 117 } 118 119 func (n *CachedResults) IsReadOnly() bool { 120 return n.Child.IsReadOnly() 121 } 122 123 var EmptyIter = &emptyCacheIter{} 124 125 func IsEmptyIter(i sql.RowIter) bool { 126 return i == EmptyIter 127 } 128 129 type emptyCacheIter struct{} 130 131 var _ sql.RowIter = (*emptyCacheIter)(nil) 132 133 func (i *emptyCacheIter) Next(ctx *sql.Context) (sql.Row, error) { return nil, io.EOF } 134 135 func (i *emptyCacheIter) Close(ctx *sql.Context) error { return nil } 136 137 // cachedResultsManager manages the saved results collected by CachedResults nodes. It is necessary to do this outside 138 // of the CachedResult node instances themselves, since executing a query plan can make transient transforms that are 139 // not persisted back and can cause cache memory leaks. 140 type cachedResultsManager struct { 141 // cachedResultsCaches tracks caches used by CachedResults globally so that even if a CachedResult 142 // object is copied as part of a transform, its cache can be properly disposed. This is necessary because 143 // when evaluating subquery expressions, the query plan is transformed into a new copy that has prependRow nodes 144 // in it, but that modified version of the query plan is transient, so the caches need to be held somewhere 145 // where they can be properly disposed after the lifetime of an individual subquery expression. 146 cachedResultsCaches map[uint64]*cacheDisposeTuple 147 148 // mutex protects the cachedResultsCaches map from concurrent access. 149 mutex sync.Mutex 150 151 // cachedResultsUniqueIdCounter stores a counter that should only be incremented atomically and is used 152 // as a unique ID when a new CachedResults object is created for a node. 153 cachedResultsUniqueIdCounter uint64 154 } 155 156 // cacheDisposeTuple is a container for a cache and the related function to dispose it. 157 type cacheDisposeTuple struct { 158 cache sql.RowsCache 159 dispose sql.DisposeFunc 160 } 161 162 func NewCachedResultsManager() *cachedResultsManager { 163 return &cachedResultsManager{ 164 cachedResultsCaches: make(map[uint64]*cacheDisposeTuple), 165 } 166 } 167 168 func (crm *cachedResultsManager) allocateUniqueId() uint64 { 169 return atomic.AddUint64(&(crm.cachedResultsUniqueIdCounter), 1) 170 } 171 172 func (crm *cachedResultsManager) getCachedResultsById(id uint64) []sql.Row { 173 crm.mutex.Lock() 174 defer crm.mutex.Unlock() 175 176 if results, ok := crm.cachedResultsCaches[id]; ok { 177 return results.cache.Get() 178 } else { 179 return nil 180 } 181 } 182 183 func (crm *cachedResultsManager) AddNewCache(id uint64, cache sql.RowsCache, dispose sql.DisposeFunc) bool { 184 crm.mutex.Lock() 185 defer crm.mutex.Unlock() 186 187 if _, ok := crm.cachedResultsCaches[id]; ok { 188 return false 189 } 190 191 crm.cachedResultsCaches[id] = &cacheDisposeTuple{cache: cache, dispose: dispose} 192 return true 193 } 194 195 func (crm *cachedResultsManager) disposeCachedResultsById(id uint64) { 196 crm.mutex.Lock() 197 defer crm.mutex.Unlock() 198 199 if results, ok := crm.cachedResultsCaches[id]; ok { 200 results.dispose() 201 delete(crm.cachedResultsCaches, id) 202 } 203 }