github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/device_manager.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package query 16 17 import ( 18 "sync" 19 20 "math" 21 "strconv" 22 "time" 23 24 "github.com/uber/aresdb/common" 25 "github.com/uber/aresdb/memutils" 26 "github.com/uber/aresdb/utils" 27 ) 28 29 const ( 30 mb2bytes = 1 << 20 31 defaultDeviceUtilization = 1 32 defaultTimeout = 10 33 ) 34 35 // DeviceInfo stores memory information per device 36 type DeviceInfo struct { 37 // device id 38 DeviceID int `json:"deviceID"` 39 // number of queries being served by device 40 QueryCount int `json:"queryCount"` 41 // device capacity. 42 TotalMemory int `json:"totalMemory"` 43 // device available capacity. 44 TotalAvailableMemory int `json:"totalAvailableMemory"` 45 // total free memory 46 FreeMemory int `json:"totalFreeMemory"` 47 // query to memory map 48 QueryMemoryUsageMap map[*AQLQuery]int `json:"-"` 49 } 50 51 // DeviceManager has the following functionalities: 52 // 1. Keep track of number of queries being served by this device and memory usage info 53 // 2. Estimate the memory requirement for a given query and determine if a device has enough memory to process a query 54 // 3. Assign queries to chosen device according to routing strategy specified 55 type DeviceManager struct { 56 // lock to sync ops. 57 *sync.RWMutex `json:"-"` 58 // device to DeviceInfo map 59 DeviceInfos []*DeviceInfo `json:"deviceInfos"` 60 // default DeviceChoosingTimeout for finding a device 61 Timeout int `json:"timeout"` 62 // Max available memory, this can be used to early determined whether a query can be satisfied or not. 63 MaxAvailableMemory int `json:"maxAvailableMemory"` 64 deviceAvailable *sync.Cond 65 // device choose strategy 66 strategy deviceChooseStrategy 67 } 68 69 // NewDeviceManager is used to init a DeviceManager. 70 func NewDeviceManager(cfg common.QueryConfig) *DeviceManager { 71 deviceMemoryUtilization := cfg.DeviceMemoryUtilization 72 if deviceMemoryUtilization <= 0 || deviceMemoryUtilization > 1 { 73 utils.GetLogger().With("deviceMemoryUtilization", deviceMemoryUtilization). 74 Error("Invalid deviceMemoryUtilization config, setting to default") 75 deviceMemoryUtilization = defaultDeviceUtilization 76 } 77 78 timeout := cfg.DeviceChoosingTimeout 79 if timeout <= 0 { 80 utils.GetLogger().With("timeout", timeout). 81 Error("Invalid timeout config, setting to default") 82 timeout = defaultTimeout 83 } 84 85 // retrieve device counts 86 deviceCount := memutils.GetDeviceCount() 87 utils.GetLogger().With( 88 "utilization", deviceMemoryUtilization, 89 "timeout", timeout).Info("Initialized device manager") 90 91 deviceInfos := make([]*DeviceInfo, deviceCount) 92 maxAvailableMem := 0 93 for device := 0; device < deviceCount; device++ { 94 deviceInfos[device] = getDeviceInfo(device, deviceMemoryUtilization) 95 if deviceInfos[device].TotalAvailableMemory >= maxAvailableMem { 96 maxAvailableMem = deviceInfos[device].TotalAvailableMemory 97 } 98 } 99 100 deviceManager := &DeviceManager{ 101 RWMutex: &sync.RWMutex{}, 102 DeviceInfos: deviceInfos, 103 MaxAvailableMemory: maxAvailableMem, 104 Timeout: timeout, 105 } 106 107 deviceManager.strategy = leastQueryCountAndMemoryStrategy{ 108 deviceManager: deviceManager, 109 } 110 111 deviceManager.deviceAvailable = sync.NewCond(deviceManager) 112 113 // Bootstrap device. 114 utils.GetLogger().Info("Bootstrapping device") 115 bootstrapDevice() 116 utils.GetLogger().Info("Finish bootstrapping device") 117 return deviceManager 118 } 119 120 // getDeviceInfo returns the DeviceInfo struct for a given deviceID. 121 func getDeviceInfo(device int, deviceMemoryUtilization float32) *DeviceInfo { 122 totalGlobalMem := memutils.GetDeviceGlobalMemoryInMB(device) * mb2bytes 123 totalAvailableMem := int(float32(totalGlobalMem) * deviceMemoryUtilization) 124 125 deviceInfo := DeviceInfo{ 126 DeviceID: device, 127 QueryCount: 0, 128 TotalMemory: totalGlobalMem, 129 TotalAvailableMemory: totalAvailableMem, 130 FreeMemory: totalAvailableMem, 131 QueryMemoryUsageMap: make(map[*AQLQuery]int, 0), 132 } 133 utils.GetLogger().Infof("DeviceInfo[%d]=%+v\n", device, deviceInfo) 134 return &deviceInfo 135 } 136 137 // FindDevice finds a device to run a given query. If a device is not found, it will wait until 138 // the DeviceChoosingTimeout seconds elapse. 139 func (d *DeviceManager) FindDevice(query *AQLQuery, requiredMem int, preferredDevice int, timeout int) int { 140 if requiredMem > d.MaxAvailableMemory { 141 utils.GetQueryLogger().With( 142 "query", query, 143 "requiredMem", requiredMem, 144 "preferredDevice", preferredDevice, 145 "maxAvailableMem", d.MaxAvailableMemory, 146 ).Warn("exceeds max memory") 147 return -1 148 } 149 150 // no DeviceChoosingTimeout passed by request, using default DeviceChoosingTimeout. 151 if timeout <= 0 { 152 timeout = d.Timeout 153 } 154 155 timeoutDuration := time.Duration(timeout) * time.Second 156 157 start := utils.Now() 158 d.Lock() 159 device := -1 160 for { 161 if utils.Now().Sub(start) >= timeoutDuration { 162 utils.GetQueryLogger().With( 163 "query", query, 164 "requiredMem", requiredMem, 165 "preferredDevice", preferredDevice, 166 "timeout", timeout, 167 ).Error("DeviceChoosingTimeout when choosing the device for the query") 168 break 169 } 170 171 device = d.findDevice(query, requiredMem, preferredDevice) 172 if device >= 0 { 173 break 174 } 175 d.deviceAvailable.Wait() 176 } 177 d.Unlock() 178 utils.GetRootReporter().GetTimer(utils.QueryWaitForMemoryDuration).Record(utils.Now().Sub(start)) 179 return device 180 } 181 182 // findDevice finds a device to run a given query according to certain strategy.If no such device can't 183 // be found, return -1. Caller needs to hold the write lock. 184 func (d *DeviceManager) findDevice(query *AQLQuery, requiredMem int, preferredDevice int) int { 185 utils.GetQueryLogger().With( 186 "query", query, 187 "requiredMem", requiredMem, 188 "preferredDevice", preferredDevice, 189 ).Debug("trying to find device for query") 190 candidateDevice := -1 191 192 // try to choose preferredDevice if it meets requirements. 193 if preferredDevice >= 0 && preferredDevice < len(d.DeviceInfos) && 194 d.DeviceInfos[preferredDevice].FreeMemory >= requiredMem { 195 candidateDevice = preferredDevice 196 } 197 198 // choose candidateDevice if preferredDevice does not meet requirements 199 if candidateDevice < 0 { 200 candidateDevice = d.strategy.chooseDevice(requiredMem) 201 } 202 203 if candidateDevice < 0 { 204 return candidateDevice 205 } 206 207 // reserve memory for this query. 208 deviceInfo := d.DeviceInfos[candidateDevice] 209 deviceInfo.QueryCount++ 210 deviceInfo.QueryMemoryUsageMap[query] = requiredMem 211 deviceInfo.FreeMemory -= requiredMem 212 deviceInfo.reportMemoryUsage() 213 214 utils.GetLogger().Debugf("Assign device '%d' for query", candidateDevice) 215 utils.GetLogger().Debugf("DeviceInfo=%+v", deviceInfo) 216 return candidateDevice 217 } 218 219 // ReleaseReservedMemory adjust total free global memory for a given device after a query is complete 220 func (d *DeviceManager) ReleaseReservedMemory(device int, query *AQLQuery) { 221 // Don't even need the lock, 222 if device < 0 || device >= len(d.DeviceInfos) { 223 return 224 } 225 226 d.Lock() 227 defer d.Unlock() 228 deviceInfo := d.DeviceInfos[device] 229 usage, ok := deviceInfo.QueryMemoryUsageMap[query] 230 if ok { 231 utils.GetLogger().Debugf("Freed %d bytes memory on device %d", usage, device) 232 deviceInfo.FreeMemory += usage 233 deviceInfo.reportMemoryUsage() 234 delete(deviceInfo.QueryMemoryUsageMap, query) 235 deviceInfo.QueryCount-- 236 d.deviceAvailable.Broadcast() 237 } 238 } 239 240 // reportMemoryUsage reports the memory usage of specified device. Caller needs to hold the lock. 241 func (deviceInfo *DeviceInfo) reportMemoryUsage() { 242 utils.GetRootReporter().GetChildGauge(map[string]string{ 243 "device": strconv.Itoa(deviceInfo.DeviceID), 244 }, utils.EstimatedDeviceMemory).Update( 245 float64(deviceInfo.TotalAvailableMemory - deviceInfo.FreeMemory)) 246 } 247 248 // deviceChooseStrategy defines the interface to choose an available device for 249 // specific query. 250 type deviceChooseStrategy interface { 251 chooseDevice(requiredMem int) int 252 } 253 254 // leastAvailableMemoryStrategy is to pick up device with least query count and 255 // least memory that's larger than required memory of the query. 256 type leastQueryCountAndMemoryStrategy struct { 257 deviceManager *DeviceManager 258 } 259 260 // chooseDevice finds a device to run a given query according to certain strategy 261 // If no such device, return -1. 262 func (s leastQueryCountAndMemoryStrategy) chooseDevice(requiredMem int) int { 263 candidateDevice := -1 264 leastMemory := int(math.MaxInt64) 265 leastQueryCount := int(math.MaxInt32) 266 for device, deviceInfo := range s.deviceManager.DeviceInfos { 267 if deviceInfo.FreeMemory >= requiredMem && (deviceInfo.QueryCount < leastQueryCount || 268 (deviceInfo.QueryCount == leastQueryCount && deviceInfo.FreeMemory <= leastMemory)) { 269 candidateDevice = device 270 leastQueryCount = deviceInfo.QueryCount 271 leastMemory = deviceInfo.FreeMemory 272 } 273 } 274 return candidateDevice 275 }