github.com/dolthub/go-mysql-server@v0.18.0/sql/plan/cached_results.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"errors"
    19  	"io"
    20  	"sync"
    21  	"sync/atomic"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  )
    25  
    26  // CachedResultsGlobalCache manages the caches created by CachedResults nodes.
    27  var CachedResultsGlobalCache = NewCachedResultsManager()
    28  
    29  var ErrEmptyCachedResult = errors.New("CachedResult contains no rows")
    30  var ErrRowIterDisposed = errors.New("attempted to call RowIter() on a disposed Node")
    31  
    32  // NewCachedResults returns a cached results plan Node, which will use a
    33  // RowCache to cache results generated by Child.RowIter() and return those
    34  // results for future calls to RowIter. This node is only safe to use if the
    35  // Child is deterministic and is not dependent on the |row| parameter in the
    36  // call to RowIter.
    37  func NewCachedResults(n sql.Node) *CachedResults {
    38  	return &CachedResults{
    39  		UnaryNode: UnaryNode{n},
    40  		Id:        CachedResultsGlobalCache.allocateUniqueId(),
    41  	}
    42  }
    43  
    44  // CachedResults tees the child node iterator into an in-memory cache
    45  // for faster subsequent retrieval. This is usually combined with a
    46  // HashLookup, whose RowIter defers to a CachedResult child to populate
    47  // rows in memory on a first iteration. The second RowIter moves the
    48  // rows from the memory cache to a hash map attached to HashLookup,
    49  // disposing the CachedResult afterwards.
    50  //
    51  // In the special case where we fill a CachedResult on pass one, but pass
    52  // two never happens, we have to take care not to orphan the cache.
    53  //
    54  // When we exhaust the source, but the cache is empty, subsequent calls
    55  // to RowIter return an ErrEmptyCachedResult error for short-circuiting
    56  // join trees.
    57  //
    58  // When the memory manager cannot accommodate expanding the cache, we
    59  // fall back to a passthrough iterator.
    60  type CachedResults struct {
    61  	UnaryNode
    62  	Id    uint64
    63  	Mutex sync.Mutex
    64  	//NoCache is set when the memory manager is unable to build
    65  	// a cache, so we fallback to a passthrough RowIter
    66  	NoCache bool
    67  	// Finalized is set when we exhaust the child iter, and subsequent
    68  	// RowIters will read from the cache rather than the child
    69  	Finalized bool
    70  	// Disposed is set after this CachedResults is invalidated
    71  	Disposed bool
    72  }
    73  
    74  var _ sql.Node = (*CachedResults)(nil)
    75  var _ sql.CollationCoercible = (*CachedResults)(nil)
    76  
    77  func (n *CachedResults) Dispose() {
    78  	n.Disposed = true
    79  	CachedResultsGlobalCache.disposeCachedResultsById(n.Id)
    80  }
    81  
    82  func (n *CachedResults) String() string {
    83  	pr := sql.NewTreePrinter()
    84  	_ = pr.WriteNode("CachedResults")
    85  	_ = pr.WriteChildren(n.UnaryNode.Child.String())
    86  	return pr.String()
    87  }
    88  
    89  func (n *CachedResults) DebugString() string {
    90  	pr := sql.NewTreePrinter()
    91  	_ = pr.WriteNode("CachedResults")
    92  	_ = pr.WriteChildren(sql.DebugString(n.UnaryNode.Child))
    93  	return pr.String()
    94  }
    95  
    96  func (n *CachedResults) WithChildren(children ...sql.Node) (sql.Node, error) {
    97  	if len(children) != 1 {
    98  		return nil, sql.ErrInvalidChildrenNumber.New(n, len(children), 1)
    99  	}
   100  	nn := *n
   101  	nn.UnaryNode.Child = children[0]
   102  	return &nn, nil
   103  }
   104  
   105  // CheckPrivileges implements the interface sql.Node.
   106  func (n *CachedResults) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool {
   107  	return n.Child.CheckPrivileges(ctx, opChecker)
   108  }
   109  
   110  // CollationCoercibility implements the interface sql.CollationCoercible.
   111  func (n *CachedResults) CollationCoercibility(ctx *sql.Context) (collation sql.CollationID, coercibility byte) {
   112  	return sql.GetCoercibility(ctx, n.Child)
   113  }
   114  
   115  func (n *CachedResults) GetCachedResults() []sql.Row {
   116  	return CachedResultsGlobalCache.getCachedResultsById(n.Id)
   117  }
   118  
   119  func (n *CachedResults) IsReadOnly() bool {
   120  	return n.Child.IsReadOnly()
   121  }
   122  
   123  var EmptyIter = &emptyCacheIter{}
   124  
   125  func IsEmptyIter(i sql.RowIter) bool {
   126  	return i == EmptyIter
   127  }
   128  
   129  type emptyCacheIter struct{}
   130  
   131  var _ sql.RowIter = (*emptyCacheIter)(nil)
   132  
   133  func (i *emptyCacheIter) Next(ctx *sql.Context) (sql.Row, error) { return nil, io.EOF }
   134  
   135  func (i *emptyCacheIter) Close(ctx *sql.Context) error { return nil }
   136  
   137  // cachedResultsManager manages the saved results collected by CachedResults nodes. It is necessary to do this outside
   138  // of the CachedResult node instances themselves, since executing a query plan can make transient transforms that are
   139  // not persisted back and can cause cache memory leaks.
   140  type cachedResultsManager struct {
   141  	// cachedResultsCaches tracks caches used by CachedResults globally so that even if a CachedResult
   142  	// object is copied as part of a transform, its cache can be properly disposed. This is necessary because
   143  	// when evaluating subquery expressions, the query plan is transformed into a new copy that has prependRow nodes
   144  	// in it, but that modified version of the query plan is transient, so the caches need to be held somewhere
   145  	// where they can be properly disposed after the lifetime of an individual subquery expression.
   146  	cachedResultsCaches map[uint64]*cacheDisposeTuple
   147  
   148  	// mutex protects the cachedResultsCaches map from concurrent access.
   149  	mutex sync.Mutex
   150  
   151  	// cachedResultsUniqueIdCounter stores a counter that should only be incremented atomically and is used
   152  	// as a unique ID when a new CachedResults object is created for a node.
   153  	cachedResultsUniqueIdCounter uint64
   154  }
   155  
   156  // cacheDisposeTuple is a container for a cache and the related function to dispose it.
   157  type cacheDisposeTuple struct {
   158  	cache   sql.RowsCache
   159  	dispose sql.DisposeFunc
   160  }
   161  
   162  func NewCachedResultsManager() *cachedResultsManager {
   163  	return &cachedResultsManager{
   164  		cachedResultsCaches: make(map[uint64]*cacheDisposeTuple),
   165  	}
   166  }
   167  
   168  func (crm *cachedResultsManager) allocateUniqueId() uint64 {
   169  	return atomic.AddUint64(&(crm.cachedResultsUniqueIdCounter), 1)
   170  }
   171  
   172  func (crm *cachedResultsManager) getCachedResultsById(id uint64) []sql.Row {
   173  	crm.mutex.Lock()
   174  	defer crm.mutex.Unlock()
   175  
   176  	if results, ok := crm.cachedResultsCaches[id]; ok {
   177  		return results.cache.Get()
   178  	} else {
   179  		return nil
   180  	}
   181  }
   182  
   183  func (crm *cachedResultsManager) AddNewCache(id uint64, cache sql.RowsCache, dispose sql.DisposeFunc) bool {
   184  	crm.mutex.Lock()
   185  	defer crm.mutex.Unlock()
   186  
   187  	if _, ok := crm.cachedResultsCaches[id]; ok {
   188  		return false
   189  	}
   190  
   191  	crm.cachedResultsCaches[id] = &cacheDisposeTuple{cache: cache, dispose: dispose}
   192  	return true
   193  }
   194  
   195  func (crm *cachedResultsManager) disposeCachedResultsById(id uint64) {
   196  	crm.mutex.Lock()
   197  	defer crm.mutex.Unlock()
   198  
   199  	if results, ok := crm.cachedResultsCaches[id]; ok {
   200  		results.dispose()
   201  		delete(crm.cachedResultsCaches, id)
   202  	}
   203  }