github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/device_manager.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  import (
    18  	"sync"
    19  
    20  	"math"
    21  	"strconv"
    22  	"time"
    23  
    24  	"github.com/uber/aresdb/common"
    25  	"github.com/uber/aresdb/memutils"
    26  	"github.com/uber/aresdb/utils"
    27  )
    28  
    29  const (
    30  	mb2bytes                 = 1 << 20
    31  	defaultDeviceUtilization = 1
    32  	defaultTimeout           = 10
    33  )
    34  
    35  // DeviceInfo stores memory information per device
    36  type DeviceInfo struct {
    37  	// device id
    38  	DeviceID int `json:"deviceID"`
    39  	// number of queries being served by device
    40  	QueryCount int `json:"queryCount"`
    41  	// device capacity.
    42  	TotalMemory int `json:"totalMemory"`
    43  	// device available capacity.
    44  	TotalAvailableMemory int `json:"totalAvailableMemory"`
    45  	// total free memory
    46  	FreeMemory int `json:"totalFreeMemory"`
    47  	// query to memory map
    48  	QueryMemoryUsageMap map[*AQLQuery]int `json:"-"`
    49  }
    50  
    51  // DeviceManager has the following functionalities:
    52  // 1. Keep track of number of queries being served by this device and memory usage info
    53  // 2. Estimate the memory requirement for a given query and determine if a device has enough memory to process a query
    54  // 3. Assign queries to chosen device according to routing strategy specified
    55  type DeviceManager struct {
    56  	// lock to sync ops.
    57  	*sync.RWMutex `json:"-"`
    58  	// device to DeviceInfo map
    59  	DeviceInfos []*DeviceInfo `json:"deviceInfos"`
    60  	// default DeviceChoosingTimeout for finding a device
    61  	Timeout int `json:"timeout"`
    62  	// Max available memory, this can be used to early determined whether a query can be satisfied or not.
    63  	MaxAvailableMemory int `json:"maxAvailableMemory"`
    64  	deviceAvailable    *sync.Cond
    65  	// device choose strategy
    66  	strategy deviceChooseStrategy
    67  }
    68  
    69  // NewDeviceManager is used to init a DeviceManager.
    70  func NewDeviceManager(cfg common.QueryConfig) *DeviceManager {
    71  	deviceMemoryUtilization := cfg.DeviceMemoryUtilization
    72  	if deviceMemoryUtilization <= 0 || deviceMemoryUtilization > 1 {
    73  		utils.GetLogger().With("deviceMemoryUtilization", deviceMemoryUtilization).
    74  			Error("Invalid deviceMemoryUtilization config, setting to default")
    75  		deviceMemoryUtilization = defaultDeviceUtilization
    76  	}
    77  
    78  	timeout := cfg.DeviceChoosingTimeout
    79  	if timeout <= 0 {
    80  		utils.GetLogger().With("timeout", timeout).
    81  			Error("Invalid timeout config, setting to default")
    82  		timeout = defaultTimeout
    83  	}
    84  
    85  	// retrieve device counts
    86  	deviceCount := memutils.GetDeviceCount()
    87  	utils.GetLogger().With(
    88  		"utilization", deviceMemoryUtilization,
    89  		"timeout", timeout).Info("Initialized device manager")
    90  
    91  	deviceInfos := make([]*DeviceInfo, deviceCount)
    92  	maxAvailableMem := 0
    93  	for device := 0; device < deviceCount; device++ {
    94  		deviceInfos[device] = getDeviceInfo(device, deviceMemoryUtilization)
    95  		if deviceInfos[device].TotalAvailableMemory >= maxAvailableMem {
    96  			maxAvailableMem = deviceInfos[device].TotalAvailableMemory
    97  		}
    98  	}
    99  
   100  	deviceManager := &DeviceManager{
   101  		RWMutex:            &sync.RWMutex{},
   102  		DeviceInfos:        deviceInfos,
   103  		MaxAvailableMemory: maxAvailableMem,
   104  		Timeout:            timeout,
   105  	}
   106  
   107  	deviceManager.strategy = leastQueryCountAndMemoryStrategy{
   108  		deviceManager: deviceManager,
   109  	}
   110  
   111  	deviceManager.deviceAvailable = sync.NewCond(deviceManager)
   112  
   113  	// Bootstrap device.
   114  	utils.GetLogger().Info("Bootstrapping device")
   115  	bootstrapDevice()
   116  	utils.GetLogger().Info("Finish bootstrapping device")
   117  	return deviceManager
   118  }
   119  
   120  // getDeviceInfo returns the DeviceInfo struct for a given deviceID.
   121  func getDeviceInfo(device int, deviceMemoryUtilization float32) *DeviceInfo {
   122  	totalGlobalMem := memutils.GetDeviceGlobalMemoryInMB(device) * mb2bytes
   123  	totalAvailableMem := int(float32(totalGlobalMem) * deviceMemoryUtilization)
   124  
   125  	deviceInfo := DeviceInfo{
   126  		DeviceID:             device,
   127  		QueryCount:           0,
   128  		TotalMemory:          totalGlobalMem,
   129  		TotalAvailableMemory: totalAvailableMem,
   130  		FreeMemory:           totalAvailableMem,
   131  		QueryMemoryUsageMap:  make(map[*AQLQuery]int, 0),
   132  	}
   133  	utils.GetLogger().Infof("DeviceInfo[%d]=%+v\n", device, deviceInfo)
   134  	return &deviceInfo
   135  }
   136  
   137  // FindDevice finds a device to run a given query. If a device is not found, it will wait until
   138  // the DeviceChoosingTimeout seconds elapse.
   139  func (d *DeviceManager) FindDevice(query *AQLQuery, requiredMem int, preferredDevice int, timeout int) int {
   140  	if requiredMem > d.MaxAvailableMemory {
   141  		utils.GetQueryLogger().With(
   142  			"query", query,
   143  			"requiredMem", requiredMem,
   144  			"preferredDevice", preferredDevice,
   145  			"maxAvailableMem", d.MaxAvailableMemory,
   146  		).Warn("exceeds max memory")
   147  		return -1
   148  	}
   149  
   150  	// no DeviceChoosingTimeout passed by request, using default DeviceChoosingTimeout.
   151  	if timeout <= 0 {
   152  		timeout = d.Timeout
   153  	}
   154  
   155  	timeoutDuration := time.Duration(timeout) * time.Second
   156  
   157  	start := utils.Now()
   158  	d.Lock()
   159  	device := -1
   160  	for {
   161  		if utils.Now().Sub(start) >= timeoutDuration {
   162  			utils.GetQueryLogger().With(
   163  				"query", query,
   164  				"requiredMem", requiredMem,
   165  				"preferredDevice", preferredDevice,
   166  				"timeout", timeout,
   167  			).Error("DeviceChoosingTimeout when choosing the device for the query")
   168  			break
   169  		}
   170  
   171  		device = d.findDevice(query, requiredMem, preferredDevice)
   172  		if device >= 0 {
   173  			break
   174  		}
   175  		d.deviceAvailable.Wait()
   176  	}
   177  	d.Unlock()
   178  	utils.GetRootReporter().GetTimer(utils.QueryWaitForMemoryDuration).Record(utils.Now().Sub(start))
   179  	return device
   180  }
   181  
   182  // findDevice finds a device to run a given query according to certain strategy.If no such device can't
   183  // be found, return -1. Caller needs to hold the write lock.
   184  func (d *DeviceManager) findDevice(query *AQLQuery, requiredMem int, preferredDevice int) int {
   185  	utils.GetQueryLogger().With(
   186  		"query", query,
   187  		"requiredMem", requiredMem,
   188  		"preferredDevice", preferredDevice,
   189  	).Debug("trying to find device for query")
   190  	candidateDevice := -1
   191  
   192  	// try to choose preferredDevice if it meets requirements.
   193  	if preferredDevice >= 0 && preferredDevice < len(d.DeviceInfos) &&
   194  		d.DeviceInfos[preferredDevice].FreeMemory >= requiredMem {
   195  		candidateDevice = preferredDevice
   196  	}
   197  
   198  	// choose candidateDevice if preferredDevice does not meet requirements
   199  	if candidateDevice < 0 {
   200  		candidateDevice = d.strategy.chooseDevice(requiredMem)
   201  	}
   202  
   203  	if candidateDevice < 0 {
   204  		return candidateDevice
   205  	}
   206  
   207  	// reserve memory for this query.
   208  	deviceInfo := d.DeviceInfos[candidateDevice]
   209  	deviceInfo.QueryCount++
   210  	deviceInfo.QueryMemoryUsageMap[query] = requiredMem
   211  	deviceInfo.FreeMemory -= requiredMem
   212  	deviceInfo.reportMemoryUsage()
   213  
   214  	utils.GetLogger().Debugf("Assign device '%d' for query", candidateDevice)
   215  	utils.GetLogger().Debugf("DeviceInfo=%+v", deviceInfo)
   216  	return candidateDevice
   217  }
   218  
   219  // ReleaseReservedMemory adjust total free global memory for a given device after a query is complete
   220  func (d *DeviceManager) ReleaseReservedMemory(device int, query *AQLQuery) {
   221  	// Don't even need the lock,
   222  	if device < 0 || device >= len(d.DeviceInfos) {
   223  		return
   224  	}
   225  
   226  	d.Lock()
   227  	defer d.Unlock()
   228  	deviceInfo := d.DeviceInfos[device]
   229  	usage, ok := deviceInfo.QueryMemoryUsageMap[query]
   230  	if ok {
   231  		utils.GetLogger().Debugf("Freed %d bytes memory on device %d", usage, device)
   232  		deviceInfo.FreeMemory += usage
   233  		deviceInfo.reportMemoryUsage()
   234  		delete(deviceInfo.QueryMemoryUsageMap, query)
   235  		deviceInfo.QueryCount--
   236  		d.deviceAvailable.Broadcast()
   237  	}
   238  }
   239  
   240  // reportMemoryUsage reports the memory usage of specified device. Caller needs to hold the lock.
   241  func (deviceInfo *DeviceInfo) reportMemoryUsage() {
   242  	utils.GetRootReporter().GetChildGauge(map[string]string{
   243  		"device": strconv.Itoa(deviceInfo.DeviceID),
   244  	}, utils.EstimatedDeviceMemory).Update(
   245  		float64(deviceInfo.TotalAvailableMemory - deviceInfo.FreeMemory))
   246  }
   247  
   248  // deviceChooseStrategy defines the interface to choose an available device for
   249  // specific query.
   250  type deviceChooseStrategy interface {
   251  	chooseDevice(requiredMem int) int
   252  }
   253  
   254  // leastAvailableMemoryStrategy is to pick up device with least query count and
   255  // least memory that's larger than required memory of the query.
   256  type leastQueryCountAndMemoryStrategy struct {
   257  	deviceManager *DeviceManager
   258  }
   259  
   260  // chooseDevice finds a device to run a given query according to certain strategy
   261  // If no such device, return -1.
   262  func (s leastQueryCountAndMemoryStrategy) chooseDevice(requiredMem int) int {
   263  	candidateDevice := -1
   264  	leastMemory := int(math.MaxInt64)
   265  	leastQueryCount := int(math.MaxInt32)
   266  	for device, deviceInfo := range s.deviceManager.DeviceInfos {
   267  		if deviceInfo.FreeMemory >= requiredMem && (deviceInfo.QueryCount < leastQueryCount ||
   268  			(deviceInfo.QueryCount == leastQueryCount && deviceInfo.FreeMemory <= leastMemory)) {
   269  			candidateDevice = device
   270  			leastQueryCount = deviceInfo.QueryCount
   271  			leastMemory = deviceInfo.FreeMemory
   272  		}
   273  	}
   274  	return candidateDevice
   275  }