github.com/iron-io/functions@v0.0.0-20180820112432-d59d7d1c40b2/api/runner/runner.go (about) 1 package runner 2 3 import ( 4 "bufio" 5 "context" 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "os" 10 "runtime" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/Sirupsen/logrus" 17 "github.com/iron-io/functions/api/runner/task" 18 "github.com/iron-io/runner/common" 19 "github.com/iron-io/runner/drivers" 20 driverscommon "github.com/iron-io/runner/drivers" 21 "github.com/iron-io/runner/drivers/docker" 22 "github.com/iron-io/runner/drivers/mock" 23 ) 24 25 type Runner struct { 26 driver drivers.Driver 27 taskQueue chan *containerTask 28 mlog MetricLogger 29 flog FuncLogger 30 availableMem int64 31 usedMem int64 32 usedMemMutex sync.RWMutex 33 34 stats 35 } 36 37 var ( 38 ErrTimeOutNoMemory = errors.New("Task timed out. No available memory.") 39 ErrFullQueue = errors.New("The runner queue is full") 40 41 WaitMemoryTimeout = 10 * time.Second 42 ) 43 44 func New(ctx context.Context, flog FuncLogger, mlog MetricLogger) (*Runner, error) { 45 // TODO: Is this really required for the container drivers? Can we remove it? 46 env := common.NewEnvironment(func(e *common.Environment) {}) 47 48 // TODO: Create a drivers.New(runnerConfig) in Titan 49 driver, err := selectDriver("docker", env, &driverscommon.Config{}) 50 if err != nil { 51 return nil, err 52 } 53 54 r := &Runner{ 55 driver: driver, 56 taskQueue: make(chan *containerTask, 100), 57 flog: flog, 58 mlog: mlog, 59 availableMem: getAvailableMemory(), 60 usedMem: 0, 61 } 62 63 go r.queueHandler(ctx) 64 65 return r, nil 66 } 67 68 // This routine checks for available memory; 69 // If there's memory then send signal to the task to proceed. 70 // If there's not available memory to run the task it waits 71 // If the task waits for more than X seconds it timeouts 72 func (r *Runner) queueHandler(ctx context.Context) { 73 consumeQueue: 74 for { 75 select { 76 case task := <-r.taskQueue: 77 r.handleTask(task) 78 case <-ctx.Done(): 79 break consumeQueue 80 } 81 } 82 83 // consume remainders 84 for len(r.taskQueue) > 0 { 85 r.handleTask(<-r.taskQueue) 86 } 87 } 88 89 func (r *Runner) handleTask(task *containerTask) { 90 waitStart := time.Now() 91 92 var waitTime time.Duration 93 var timedOut bool 94 95 // Loop waiting for available memory 96 for !r.checkRequiredMem(task.cfg.Memory) { 97 waitTime = time.Since(waitStart) 98 if waitTime > WaitMemoryTimeout { 99 timedOut = true 100 break 101 } 102 time.Sleep(time.Microsecond) 103 } 104 105 metricBaseName := fmt.Sprintf("run.%s.", task.cfg.AppName) 106 r.mlog.LogTime(task.ctx, metricBaseName+"wait_time", waitTime) 107 r.mlog.LogTime(task.ctx, "run.wait_time", waitTime) 108 109 if timedOut { 110 // Send to a signal to this task saying it cannot run 111 r.mlog.LogCount(task.ctx, metricBaseName+"timeout", 1) 112 task.canRun <- false 113 return 114 } 115 116 // Send a signal to this task saying it can run 117 task.canRun <- true 118 } 119 120 func (r *Runner) hasAsyncAvailableMemory() bool { 121 r.usedMemMutex.RLock() 122 defer r.usedMemMutex.RUnlock() 123 // reserve at least half of the memory for sync 124 return (r.availableMem/2)-r.usedMem > 0 125 } 126 127 func (r *Runner) checkRequiredMem(req uint64) bool { 128 r.usedMemMutex.RLock() 129 defer r.usedMemMutex.RUnlock() 130 return (r.availableMem-r.usedMem)/int64(req)*1024*1024 > 0 131 } 132 133 func (r *Runner) addUsedMem(used int64) { 134 r.usedMemMutex.Lock() 135 r.usedMem = r.usedMem + used*1024*1024 136 if r.usedMem < 0 { 137 r.usedMem = 0 138 } 139 r.usedMemMutex.Unlock() 140 } 141 142 func (r *Runner) checkMemAndUse(req uint64) bool { 143 r.usedMemMutex.Lock() 144 defer r.usedMemMutex.Unlock() 145 146 used := int64(req) * 1024 * 1024 147 148 if (r.availableMem-r.usedMem)/used < 0 { 149 return false 150 } 151 152 r.usedMem += used 153 154 return true 155 } 156 157 func (r *Runner) Run(ctx context.Context, cfg *task.Config) (drivers.RunResult, error) { 158 var err error 159 160 if cfg.Memory == 0 { 161 cfg.Memory = 128 162 } 163 164 cfg.Stderr = r.flog.Writer(ctx, cfg.AppName, cfg.Path, cfg.Image, cfg.ID) 165 if cfg.Stdout == nil { 166 cfg.Stdout = cfg.Stderr 167 } 168 169 ctask := &containerTask{ 170 ctx: ctx, 171 cfg: cfg, 172 canRun: make(chan bool), 173 } 174 175 metricBaseName := fmt.Sprintf("run.%s.", cfg.AppName) 176 r.mlog.LogCount(ctx, metricBaseName+"requests", 1) 177 178 // Check if has enough available memory 179 // If available, use it 180 if !r.checkMemAndUse(cfg.Memory) { 181 // If not, try add task to the queue 182 select { 183 case r.taskQueue <- ctask: 184 default: 185 // If queue is full, return error 186 r.mlog.LogCount(ctx, "queue.full", 1) 187 return nil, ErrFullQueue 188 } 189 190 // If task was added to the queue, wait for permission 191 if ok := <-ctask.canRun; !ok { 192 // This task timed out, not available memory 193 return nil, ErrTimeOutNoMemory 194 } 195 } else { 196 r.mlog.LogTime(ctx, metricBaseName+"waittime", 0) 197 } 198 defer r.addUsedMem(-1 * int64(cfg.Memory)) 199 200 cookie, err := r.driver.Prepare(ctx, ctask) 201 if err != nil { 202 return nil, err 203 } 204 defer cookie.Close() 205 206 metricStart := time.Now() 207 208 result, err := cookie.Run(ctx) 209 if err != nil { 210 return nil, err 211 } 212 213 if result.Status() == "success" { 214 r.mlog.LogCount(ctx, metricBaseName+"succeeded", 1) 215 } else { 216 r.mlog.LogCount(ctx, metricBaseName+"error", 1) 217 } 218 219 metricElapsed := time.Since(metricStart) 220 r.mlog.LogTime(ctx, metricBaseName+"time", metricElapsed) 221 r.mlog.LogTime(ctx, "run.exec_time", metricElapsed) 222 223 return result, nil 224 } 225 226 func (r Runner) EnsureImageExists(ctx context.Context, cfg *task.Config) error { 227 ctask := &containerTask{ 228 cfg: cfg, 229 } 230 231 auth, err := ctask.DockerAuth() 232 if err != nil { 233 return err 234 } 235 236 _, err = docker.CheckRegistry(ctask.Image(), auth) 237 return err 238 } 239 240 func selectDriver(driver string, env *common.Environment, conf *driverscommon.Config) (drivers.Driver, error) { 241 switch driver { 242 case "docker": 243 docker := docker.NewDocker(env, *conf) 244 return docker, nil 245 case "mock": 246 return mock.New(), nil 247 } 248 return nil, fmt.Errorf("driver %v not found", driver) 249 } 250 251 func getAvailableMemory() int64 { 252 const tooBig = 322122547200 // #300GB or 0, biggest aws instance is 244GB 253 254 var availableMemory uint64 = tooBig 255 if runtime.GOOS == "linux" { 256 availableMemory, err := checkCgroup() 257 if err != nil { 258 logrus.WithError(err).Error("Error checking for cgroup memory limits, falling back to host memory available..") 259 } 260 if availableMemory > tooBig || availableMemory == 0 { 261 // Then -m flag probably wasn't set, so use max available on system 262 availableMemory, err = checkProc() 263 if err != errCantReadMemInfo && 264 (availableMemory > tooBig || availableMemory == 0) { 265 logrus.WithError(err).Fatal("Cannot get the proper information to. You must specify the maximum available memory by passing the -m command with docker run when starting the runner via docker, eg: `docker run -m 2G ...`") 266 } 267 } 268 } else { 269 // This still lets 10-20 functions execute concurrently assuming a 2GB machine. 270 availableMemory = 2 * 1024 * 1024 * 1024 271 } 272 273 return int64(availableMemory) 274 } 275 276 func checkCgroup() (uint64, error) { 277 f, err := os.Open("/sys/fs/cgroup/memory/memory.limit_in_bytes") 278 if err != nil { 279 return 0, err 280 } 281 defer f.Close() 282 b, err := ioutil.ReadAll(f) 283 limBytes := string(b) 284 limBytes = strings.TrimSpace(limBytes) 285 if err != nil { 286 return 0, err 287 } 288 return strconv.ParseUint(limBytes, 10, 64) 289 } 290 291 var errCantReadMemInfo = errors.New("Didn't find MemAvailable in /proc/meminfo, kernel is probably < 3.14") 292 293 func checkProc() (uint64, error) { 294 f, err := os.Open("/proc/meminfo") 295 if err != nil { 296 return 0, err 297 } 298 defer f.Close() 299 300 scanner := bufio.NewScanner(f) 301 for scanner.Scan() { 302 b := scanner.Text() 303 if !strings.HasPrefix(b, "MemAvailable") { 304 continue 305 } 306 307 // expect form: 308 // MemAvailable: 1234567890 kB 309 tri := strings.Fields(b) 310 if len(tri) != 3 { 311 return 0, fmt.Errorf("MemAvailable line has unexpected format: %v", b) 312 } 313 314 c, err := strconv.ParseUint(tri[1], 10, 64) 315 if err != nil { 316 return 0, fmt.Errorf("Could not parse MemAvailable: %v", b) 317 } 318 switch tri[2] { // convert units to bytes 319 case "kB": 320 c *= 1024 321 case "MB": 322 c *= 1024 * 1024 323 default: 324 return 0, fmt.Errorf("Unexpected units for MemAvailable in /proc/meminfo, need kB or MB, got: %v", tri[2]) 325 } 326 return c, nil 327 } 328 329 return 0, errCantReadMemInfo 330 }