vitess.io/vitess@v0.16.2/go/vt/srvtopo/query.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package srvtopo 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/stats" 26 "vitess.io/vitess/go/vt/log" 27 ) 28 29 type queryEntry struct { 30 // immutable values 31 key fmt.Stringer 32 33 // the mutex protects any access to this structure (read or write) 34 mutex sync.Mutex 35 36 // refreshingChan is used to synchronize requests and avoid hammering 37 // the topo server 38 refreshingChan chan struct{} 39 40 insertionTime time.Time 41 lastQueryTime time.Time 42 value any 43 lastError error 44 } 45 46 type resilientQuery struct { 47 query func(ctx context.Context, entry *queryEntry) (any, error) 48 49 counts *stats.CountersWithSingleLabel 50 cacheRefreshInterval time.Duration 51 cacheTTL time.Duration 52 53 mutex sync.Mutex 54 entries map[string]*queryEntry 55 } 56 57 func (q *resilientQuery) getCurrentValue(ctx context.Context, wkey fmt.Stringer, staleOK bool) (any, error) { 58 q.counts.Add(queryCategory, 1) 59 60 // find the entry in the cache, add it if not there 61 key := wkey.String() 62 q.mutex.Lock() 63 entry, ok := q.entries[key] 64 if !ok { 65 entry = &queryEntry{ 66 key: wkey, 67 } 68 q.entries[key] = entry 69 } 70 q.mutex.Unlock() 71 72 // Lock the entry, and do everything holding the lock except 73 // querying the underlying topo server. 74 // 75 // This means that even if the topo server is very slow, two concurrent 76 // requests will only issue one underlying query. 77 entry.mutex.Lock() 78 defer entry.mutex.Unlock() 79 80 cacheValid := entry.value != nil && (time.Since(entry.insertionTime) < q.cacheTTL) 81 if !cacheValid && staleOK { 82 // Only allow stale results for a bounded period 83 cacheValid = entry.value != nil && (time.Since(entry.insertionTime) < (q.cacheTTL + 2*q.cacheRefreshInterval)) 84 } 85 shouldRefresh := time.Since(entry.lastQueryTime) > q.cacheRefreshInterval 86 87 // If it is not time to check again, then return either the cached 88 // value or the cached error but don't ask topo again. 89 if !shouldRefresh { 90 if cacheValid { 91 return entry.value, nil 92 } 93 return nil, entry.lastError 94 } 95 96 // Refresh the state in a background goroutine if no refresh is already 97 // in progress. This way queries are not blocked while the cache is still 98 // valid but past the refresh time, and avoids calling out to the topo 99 // service while the lock is held. 100 if entry.refreshingChan == nil { 101 entry.refreshingChan = make(chan struct{}) 102 entry.lastQueryTime = time.Now() 103 104 go func() { 105 defer func() { 106 if err := recover(); err != nil { 107 log.Errorf("ResilientQuery uncaught panic, cell :%v, err :%v)", key, err) 108 } 109 }() 110 111 newCtx, cancel := context.WithTimeout(ctx, srvTopoTimeout) 112 defer cancel() 113 114 result, err := q.query(newCtx, entry) 115 116 entry.mutex.Lock() 117 defer func() { 118 close(entry.refreshingChan) 119 entry.refreshingChan = nil 120 entry.mutex.Unlock() 121 }() 122 123 if err == nil { 124 // save the value we got and the current time in the cache 125 entry.insertionTime = time.Now() 126 // Avoid a tiny race if TTL == refresh time (the default) 127 entry.lastQueryTime = entry.insertionTime 128 entry.value = result 129 } else { 130 q.counts.Add(errorCategory, 1) 131 if entry.insertionTime.IsZero() { 132 log.Errorf("ResilientQuery(%v, %v) failed: %v (no cached value, caching and returning error)", ctx, wkey, err) 133 } else if newCtx.Err() == context.DeadlineExceeded { 134 log.Errorf("ResilientQuery(%v, %v) failed: %v (request timeout), (keeping cached value: %v)", ctx, wkey, err, entry.value) 135 } else if entry.value != nil && time.Since(entry.insertionTime) < q.cacheTTL { 136 q.counts.Add(cachedCategory, 1) 137 log.Warningf("ResilientQuery(%v, %v) failed: %v (keeping cached value: %v)", ctx, wkey, err, entry.value) 138 } else { 139 log.Errorf("ResilientQuery(%v, %v) failed: %v (cached value expired)", ctx, wkey, err) 140 entry.insertionTime = time.Time{} 141 entry.value = nil 142 } 143 } 144 145 entry.lastError = err 146 }() 147 } 148 149 // If the cached entry is still valid then use it, otherwise wait 150 // for the refresh attempt to complete to get a more up to date 151 // response. 152 // 153 // In the event that the topo service is slow or unresponsive either 154 // on the initial fetch or if the cache TTL expires, then several 155 // requests could be blocked on refreshingChan waiting for the response 156 // to come back. 157 if cacheValid { 158 return entry.value, nil 159 } 160 161 refreshingChan := entry.refreshingChan 162 entry.mutex.Unlock() 163 select { 164 case <-refreshingChan: 165 case <-ctx.Done(): 166 entry.mutex.Lock() 167 return nil, ctx.Err() 168 } 169 entry.mutex.Lock() 170 171 if entry.value != nil { 172 return entry.value, nil 173 } 174 175 return nil, entry.lastError 176 }