vitess.io/vitess@v0.16.2/go/vt/srvtopo/query.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package srvtopo
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"vitess.io/vitess/go/stats"
    26  	"vitess.io/vitess/go/vt/log"
    27  )
    28  
    29  type queryEntry struct {
    30  	// immutable values
    31  	key fmt.Stringer
    32  
    33  	// the mutex protects any access to this structure (read or write)
    34  	mutex sync.Mutex
    35  
    36  	// refreshingChan is used to synchronize requests and avoid hammering
    37  	// the topo server
    38  	refreshingChan chan struct{}
    39  
    40  	insertionTime time.Time
    41  	lastQueryTime time.Time
    42  	value         any
    43  	lastError     error
    44  }
    45  
    46  type resilientQuery struct {
    47  	query func(ctx context.Context, entry *queryEntry) (any, error)
    48  
    49  	counts               *stats.CountersWithSingleLabel
    50  	cacheRefreshInterval time.Duration
    51  	cacheTTL             time.Duration
    52  
    53  	mutex   sync.Mutex
    54  	entries map[string]*queryEntry
    55  }
    56  
    57  func (q *resilientQuery) getCurrentValue(ctx context.Context, wkey fmt.Stringer, staleOK bool) (any, error) {
    58  	q.counts.Add(queryCategory, 1)
    59  
    60  	// find the entry in the cache, add it if not there
    61  	key := wkey.String()
    62  	q.mutex.Lock()
    63  	entry, ok := q.entries[key]
    64  	if !ok {
    65  		entry = &queryEntry{
    66  			key: wkey,
    67  		}
    68  		q.entries[key] = entry
    69  	}
    70  	q.mutex.Unlock()
    71  
    72  	// Lock the entry, and do everything holding the lock except
    73  	// querying the underlying topo server.
    74  	//
    75  	// This means that even if the topo server is very slow, two concurrent
    76  	// requests will only issue one underlying query.
    77  	entry.mutex.Lock()
    78  	defer entry.mutex.Unlock()
    79  
    80  	cacheValid := entry.value != nil && (time.Since(entry.insertionTime) < q.cacheTTL)
    81  	if !cacheValid && staleOK {
    82  		// Only allow stale results for a bounded period
    83  		cacheValid = entry.value != nil && (time.Since(entry.insertionTime) < (q.cacheTTL + 2*q.cacheRefreshInterval))
    84  	}
    85  	shouldRefresh := time.Since(entry.lastQueryTime) > q.cacheRefreshInterval
    86  
    87  	// If it is not time to check again, then return either the cached
    88  	// value or the cached error but don't ask topo again.
    89  	if !shouldRefresh {
    90  		if cacheValid {
    91  			return entry.value, nil
    92  		}
    93  		return nil, entry.lastError
    94  	}
    95  
    96  	// Refresh the state in a background goroutine if no refresh is already
    97  	// in progress. This way queries are not blocked while the cache is still
    98  	// valid but past the refresh time, and avoids calling out to the topo
    99  	// service while the lock is held.
   100  	if entry.refreshingChan == nil {
   101  		entry.refreshingChan = make(chan struct{})
   102  		entry.lastQueryTime = time.Now()
   103  
   104  		go func() {
   105  			defer func() {
   106  				if err := recover(); err != nil {
   107  					log.Errorf("ResilientQuery uncaught panic, cell :%v, err :%v)", key, err)
   108  				}
   109  			}()
   110  
   111  			newCtx, cancel := context.WithTimeout(ctx, srvTopoTimeout)
   112  			defer cancel()
   113  
   114  			result, err := q.query(newCtx, entry)
   115  
   116  			entry.mutex.Lock()
   117  			defer func() {
   118  				close(entry.refreshingChan)
   119  				entry.refreshingChan = nil
   120  				entry.mutex.Unlock()
   121  			}()
   122  
   123  			if err == nil {
   124  				// save the value we got and the current time in the cache
   125  				entry.insertionTime = time.Now()
   126  				// Avoid a tiny race if TTL == refresh time (the default)
   127  				entry.lastQueryTime = entry.insertionTime
   128  				entry.value = result
   129  			} else {
   130  				q.counts.Add(errorCategory, 1)
   131  				if entry.insertionTime.IsZero() {
   132  					log.Errorf("ResilientQuery(%v, %v) failed: %v (no cached value, caching and returning error)", ctx, wkey, err)
   133  				} else if newCtx.Err() == context.DeadlineExceeded {
   134  					log.Errorf("ResilientQuery(%v, %v) failed: %v (request timeout), (keeping cached value: %v)", ctx, wkey, err, entry.value)
   135  				} else if entry.value != nil && time.Since(entry.insertionTime) < q.cacheTTL {
   136  					q.counts.Add(cachedCategory, 1)
   137  					log.Warningf("ResilientQuery(%v, %v) failed: %v (keeping cached value: %v)", ctx, wkey, err, entry.value)
   138  				} else {
   139  					log.Errorf("ResilientQuery(%v, %v) failed: %v (cached value expired)", ctx, wkey, err)
   140  					entry.insertionTime = time.Time{}
   141  					entry.value = nil
   142  				}
   143  			}
   144  
   145  			entry.lastError = err
   146  		}()
   147  	}
   148  
   149  	// If the cached entry is still valid then use it, otherwise wait
   150  	// for the refresh attempt to complete to get a more up to date
   151  	// response.
   152  	//
   153  	// In the event that the topo service is slow or unresponsive either
   154  	// on the initial fetch or if the cache TTL expires, then several
   155  	// requests could be blocked on refreshingChan waiting for the response
   156  	// to come back.
   157  	if cacheValid {
   158  		return entry.value, nil
   159  	}
   160  
   161  	refreshingChan := entry.refreshingChan
   162  	entry.mutex.Unlock()
   163  	select {
   164  	case <-refreshingChan:
   165  	case <-ctx.Done():
   166  		entry.mutex.Lock()
   167  		return nil, ctx.Err()
   168  	}
   169  	entry.mutex.Lock()
   170  
   171  	if entry.value != nil {
   172  		return entry.value, nil
   173  	}
   174  
   175  	return nil, entry.lastError
   176  }