github.com/billybanfield/evergreen@v0.0.0-20170525200750-eeee692790f7/model/scheduler_stats.go (about) 1 package model 2 3 import ( 4 "time" 5 6 "github.com/evergreen-ci/evergreen" 7 "github.com/evergreen-ci/evergreen/db" 8 "github.com/evergreen-ci/evergreen/model/distro" 9 "github.com/evergreen-ci/evergreen/model/host" 10 "github.com/evergreen-ci/evergreen/model/task" 11 "github.com/evergreen-ci/evergreen/util" 12 "github.com/pkg/errors" 13 "gopkg.in/mgo.v2/bson" 14 ) 15 16 // ResourceInfo contains the meta data about a given resource 17 // This includes the id of the resource, the overall start and finish time and any 18 // extra data that should be included about the resource. 19 type ResourceInfo struct { 20 Id string `json:"id"` 21 Start time.Time `json:"start"` 22 End time.Time `json:"end"` 23 Data interface{} `json:"data"` 24 } 25 26 // Bucket stores the total amount of time in a given bucket and a list of the resources that are in that bucket 27 type Bucket struct { 28 TotalTime time.Duration `json:"total_time"` 29 Resources []ResourceInfo `json:"resources"` 30 } 31 32 // AvgBucket is one element in the results of a list of buckets that are created from the agg query. 33 type AvgBucket struct { 34 Id int `bson:"_id" json:"index" csv:"index"` 35 AverageTime time.Duration `bson:"a" json:"avg" csv:"avg_time"` 36 NumberTasks int `bson:"n" json:"number_tasks" csv:"number_tasks"` 37 Start time.Time `json:"start_time" csv:"start_time"` 38 End time.Time `json:"end_time" csv:"end_time"` 39 } 40 41 // FrameBounds is a set of information about the inputs of buckets 42 type FrameBounds struct { 43 StartTime time.Time 44 EndTime time.Time 45 BucketSize time.Duration 46 NumberBuckets int 47 } 48 49 // HostUtilizationBucket represents an aggregate view of the hosts and tasks Bucket for a given time frame. 50 type HostUtilizationBucket struct { 51 StaticHost time.Duration `json:"static_host" csv:"static_host"` 52 DynamicHost time.Duration `json:"dynamic_host" csv:"dynamic_host"` 53 Task time.Duration `json:"task" csv:"task"` 54 StartTime time.Time `json:"start_time" csv:"start_time"` 55 EndTime time.Time `json:"end_time" csv:"end_time"` 56 } 57 58 type AvgBuckets []AvgBucket 59 60 // dependencyPath represents the path of tasks that can 61 // occur by taking one from each layer of the dependencies 62 // TotalTime is the sum of all task's time taken to run that are in Tasks. 63 type dependencyPath struct { 64 TaskId string 65 TotalTime time.Duration 66 Tasks []string 67 } 68 69 func addBucketTime(duration time.Duration, resource ResourceInfo, bucket Bucket) Bucket { 70 return Bucket{ 71 TotalTime: bucket.TotalTime + duration, 72 Resources: append(bucket.Resources, resource), 73 } 74 } 75 76 // CalculateBounds takes in a daysBack and granularity and returns the 77 // start time, end time, bucket size, and number of buckets 78 func CalculateBounds(daysBack, granularity int) FrameBounds { 79 endTime := time.Now() 80 totalTime := 24 * time.Hour * time.Duration(daysBack) 81 startTime := endTime.Add(-1 * totalTime) 82 83 bucketSize := time.Duration(granularity) * time.Second 84 85 numberBuckets := (time.Duration(daysBack) * time.Hour * 24) / (time.Duration(granularity) * time.Second) 86 87 return FrameBounds{ 88 StartTime: startTime, 89 EndTime: endTime, 90 BucketSize: bucketSize, 91 NumberBuckets: int(numberBuckets), 92 } 93 } 94 95 // bucketResource buckets amounts of time based on a number of buckets and the size of them. 96 // Given a resource with a start and end time, where the end >= start, 97 // a time frame with a frameStart and frameEnd, where frameEnd >= frameStart, 98 // a bucketSize that represents the amount of time each bucket holds 99 // and a list of buckets that may already have time in them, 100 // BucketResource will split the time and add the time the corresponds to a given buck to that bucket. 101 func bucketResource(resource ResourceInfo, frameStart, frameEnd time.Time, bucketSize time.Duration, 102 currentBuckets []Bucket) ([]Bucket, error) { 103 104 start := resource.Start 105 end := resource.End 106 // double check so that there are no panics 107 if start.After(frameEnd) || start.Equal(frameEnd) { 108 return currentBuckets, errors.Errorf("invalid resource start time %v that is after the time frame %v", start, frameEnd) 109 } 110 111 if util.IsZeroTime(start) { 112 return currentBuckets, errors.New("start time is zero") 113 } 114 115 if !util.IsZeroTime(end) && (end.Before(frameStart) || end.Equal(frameStart)) { 116 return currentBuckets, errors.Errorf("invalid resource end time, %v that is before the time frame, %v", end, frameStart) 117 } 118 119 if !util.IsZeroTime(end) && end.Before(start) { 120 return currentBuckets, errors.Errorf("termination time, %v is before start time, %v and exists", end, start) 121 } 122 123 // if the times are equal then just return since nothing should be bucketed 124 if end.Equal(start) { 125 return currentBuckets, nil 126 } 127 128 // If the resource starts before the beginning of the frame, 129 // the startBucket is the first one. The startOffset is the offset 130 // of time from the beginning of the start bucket, so that is 0. 131 startOffset := time.Duration(0) 132 startBucket := time.Duration(0) 133 if start.After(frameStart) { 134 startOffset = start.Sub(frameStart) 135 startBucket = startOffset / bucketSize 136 } 137 // If the resource ends after the end of the frame, the end bucket is the last bucket 138 // the end offset is the entirety of that bucket. 139 endBucket := time.Duration(len(currentBuckets) - 1) 140 endOffset := bucketSize * (endBucket + 1) 141 142 if !(util.IsZeroTime(end) || end.After(frameEnd) || end.Equal(frameEnd)) { 143 endOffset = end.Sub(frameStart) 144 endBucket = endOffset / bucketSize 145 } 146 147 // If the startBucket and the endBucket are the same, that means there is only one bucket. 148 // The amount that goes in that bucket is the difference in the resources start time and end time. 149 if startBucket == endBucket { 150 currentBuckets[startBucket] = addBucketTime(endOffset-startOffset, resource, currentBuckets[startBucket]) 151 return currentBuckets, nil 152 153 } else { 154 // add the difference between the startOffset and the amount of time that has passed in the start and end bucket 155 // to the start and end buckets. 156 currentBuckets[startBucket] = addBucketTime((startBucket+1)*bucketSize-startOffset, resource, currentBuckets[startBucket]) 157 currentBuckets[endBucket] = addBucketTime(endOffset-endBucket*bucketSize, resource, currentBuckets[endBucket]) 158 } 159 for i := startBucket + 1; i < endBucket; i++ { 160 currentBuckets[i] = addBucketTime(bucketSize, resource, currentBuckets[i]) 161 } 162 return currentBuckets, nil 163 } 164 165 // CreateHostBuckets takes in a list of hosts with their creation and termination times 166 // and returns durations bucketed based on a start time, number of buckets and the size of each bucket 167 func CreateHostBuckets(hosts []host.Host, bounds FrameBounds) ([]Bucket, []error) { 168 hostBuckets := make([]Bucket, bounds.NumberBuckets) 169 var err error 170 errs := []error{} 171 for _, h := range hosts { 172 hostResource := ResourceInfo{ 173 Id: h.Id, 174 Start: h.CreationTime, 175 End: h.TerminationTime, 176 } 177 178 // static hosts 179 if h.Provider == evergreen.HostTypeStatic { 180 for i, b := range hostBuckets { 181 hostBuckets[i] = addBucketTime(bounds.BucketSize, hostResource, b) 182 } 183 continue 184 } 185 hostBuckets, err = bucketResource(hostResource, bounds.StartTime, bounds.EndTime, bounds.BucketSize, hostBuckets) 186 if err != nil { 187 errs = append(errs, errors.Wrapf(err, "error bucketing host %s", h.Id)) 188 } 189 } 190 return hostBuckets, errs 191 } 192 193 // CreateTaskBuckets takes in a list of tasks with their start and finish times 194 // and returns durations bucketed based on a start time, number of buckets and the size of each bucket 195 func CreateTaskBuckets(tasks []task.Task, oldTasks []task.Task, bounds FrameBounds) ([]Bucket, []error) { 196 taskBuckets := make([]Bucket, bounds.NumberBuckets) 197 var err error 198 errs := []error{} 199 for _, t := range tasks { 200 taskResource := ResourceInfo{ 201 Id: t.Id, 202 Start: t.StartTime, 203 End: t.FinishTime, 204 Data: t.HostId, 205 } 206 taskBuckets, err = bucketResource(taskResource, bounds.StartTime, bounds.EndTime, bounds.BucketSize, taskBuckets) 207 if err != nil { 208 errs = append(errs, errors.Wrapf(err, "error bucketing task %v", t.Id)) 209 } 210 } 211 212 for _, t := range oldTasks { 213 taskResource := ResourceInfo{ 214 Id: t.Id, 215 Start: t.StartTime, 216 End: t.FinishTime, 217 Data: t.HostId, 218 } 219 taskBuckets, err = bucketResource(taskResource, bounds.StartTime, bounds.EndTime, bounds.BucketSize, taskBuckets) 220 if err != nil { 221 errs = append(errs, errors.Wrapf(err, "error bucketing task %v", t.Id)) 222 } 223 } 224 return taskBuckets, errs 225 } 226 227 // CreateAllHostUtilizationBuckets aggregates each bucket by creating a time frame given the number of days back 228 // and the granularity wanted (ie. days, minutes, seconds, hours) all in seconds. It returns a list of Host utilization 229 // information for each bucket. 230 func CreateAllHostUtilizationBuckets(daysBack, granularity int) ([]HostUtilizationBucket, error) { 231 bounds := CalculateBounds(daysBack, granularity) 232 // find non-static hosts 233 dynamicHosts, err := host.Find(host.ByDynamicWithinTime(bounds.StartTime, bounds.EndTime)) 234 if err != nil { 235 return nil, err 236 } 237 // find static hosts 238 staticHosts, err := host.Find(host.AllStatic) 239 if err != nil { 240 return nil, err 241 } 242 243 dynamicBuckets, _ := CreateHostBuckets(dynamicHosts, bounds) 244 staticBuckets, _ := CreateHostBuckets(staticHosts, bounds) 245 246 tasks, err := task.Find(task.ByTimeRun(bounds.StartTime, bounds.EndTime).WithFields(task.StartTimeKey, task.FinishTimeKey, task.HostIdKey)) 247 if err != nil { 248 return nil, err 249 } 250 251 oldTasks, err := task.FindOld(task.ByTimeRun(bounds.StartTime, bounds.EndTime)) 252 if err != nil { 253 return nil, err 254 } 255 256 taskBuckets, _ := CreateTaskBuckets(tasks, oldTasks, bounds) 257 bucketData := []HostUtilizationBucket{} 258 for i, staticBucket := range staticBuckets { 259 b := HostUtilizationBucket{ 260 StaticHost: staticBucket.TotalTime, 261 DynamicHost: dynamicBuckets[i].TotalTime, 262 Task: taskBuckets[i].TotalTime, 263 StartTime: bounds.StartTime.Add(time.Duration(i) * bounds.BucketSize), 264 EndTime: bounds.StartTime.Add(time.Duration(i+1) * bounds.BucketSize), 265 } 266 bucketData = append(bucketData, b) 267 268 } 269 return bucketData, nil 270 } 271 272 // AverageStatistics uses an agg pipeline that creates buckets given a time frame and finds the average scheduled -> 273 // start time for that time frame. 274 // One thing to note is that the average time is in milliseconds, not nanoseconds and must be converted. 275 func AverageStatistics(distroId string, bounds FrameBounds) (AvgBuckets, error) { 276 277 // error out if the distro does not exist 278 _, err := distro.FindOne(distro.ById(distroId)) 279 if err != nil { 280 return nil, err 281 } 282 intBucketSize := util.FromNanoseconds(bounds.BucketSize) 283 buckets := AvgBuckets{} 284 pipeline := []bson.M{ 285 // find all tasks that have started within the time frame for a given distro and only valid statuses. 286 {"$match": bson.M{ 287 task.StartTimeKey: bson.M{ 288 "$gte": bounds.StartTime, 289 "$lte": bounds.EndTime, 290 }, 291 // only need tasks that have already started or those that have finished, 292 // not looking for tasks that have been scheduled but not started. 293 task.StatusKey: bson.M{ 294 "$in": []string{evergreen.TaskStarted, 295 evergreen.TaskFailed, evergreen.TaskSucceeded}, 296 }, 297 task.DistroIdKey: distroId, 298 }}, 299 // project the difference in scheduled -> start, as well as the bucket 300 {"$project": bson.M{ 301 "diff": bson.M{ 302 "$subtract": []interface{}{"$" + task.StartTimeKey, "$" + task.ScheduledTimeKey}, 303 }, 304 "b": bson.M{ 305 "$floor": bson.M{ 306 "$divide": []interface{}{ 307 bson.M{"$subtract": []interface{}{"$" + task.StartTimeKey, bounds.StartTime}}, 308 intBucketSize}, 309 }, 310 }, 311 }}, 312 {"$group": bson.M{ 313 "_id": "$b", 314 "a": bson.M{"$avg": "$diff"}, 315 "n": bson.M{"$sum": 1}, 316 }}, 317 318 {"$sort": bson.M{ 319 "_id": 1, 320 }}, 321 } 322 323 if err := db.Aggregate(task.Collection, pipeline, &buckets); err != nil { 324 return nil, err 325 } 326 return convertBucketsToNanoseconds(buckets, bounds), nil 327 } 328 329 // convertBucketsToNanoseconds fills in 0 time buckets to the list of Average Buckets 330 // and it converts the average times to nanoseconds. 331 func convertBucketsToNanoseconds(buckets AvgBuckets, bounds FrameBounds) AvgBuckets { 332 allBuckets := AvgBuckets{} 333 for i := 0; i < bounds.NumberBuckets; i++ { 334 startTime := bounds.StartTime.Add(time.Duration(i) * bounds.BucketSize) 335 endTime := bounds.StartTime.Add(bounds.BucketSize) 336 currentBucket := AvgBucket{ 337 Id: i, 338 AverageTime: 0, 339 NumberTasks: 0, 340 Start: startTime, 341 End: endTime, 342 } 343 for j := 0; j < len(buckets); j++ { 344 if buckets[j].Id == i { 345 currentBucket.AverageTime = util.ToNanoseconds(buckets[j].AverageTime) 346 currentBucket.NumberTasks = buckets[j].NumberTasks 347 break 348 } 349 } 350 allBuckets = append(allBuckets, currentBucket) 351 } 352 return allBuckets 353 } 354 355 // CalculateActualMakespan finds the amount of time it took for the build to complete from 356 // the first task start to the last task finishing. 357 func CalculateActualMakespan(tasks []task.Task) time.Duration { 358 // find the minimum start time and the maximum finish time and take the difference 359 if len(tasks) == 0 { 360 return time.Duration(0) 361 } 362 363 minStart := tasks[0].StartTime 364 maxFinish := tasks[0].FinishTime 365 366 for _, t := range tasks { 367 if t.StartTime.Before(minStart) { 368 minStart = t.StartTime 369 } 370 if t.FinishTime.After(maxFinish) { 371 maxFinish = t.FinishTime 372 } 373 } 374 return maxFinish.Sub(minStart) 375 } 376 377 // hasTaskId returns true if the dependency list has the task 378 func hasTaskId(taskId string, dependsOn []task.Dependency) bool { 379 for _, d := range dependsOn { 380 if d.TaskId == taskId { 381 return true 382 } 383 } 384 return false 385 } 386 387 // getMaxDependencyPath recursively traverses a task's dependencies to get the dependency path object with the maximum 388 // total time. 389 func getMaxDependencyPath(tasks []task.Task, depPath dependencyPath) dependencyPath { 390 maxDepPath := depPath 391 maxTime := time.Duration(0) 392 // find tasks that depend on the current task in the depPath 393 for _, t := range tasks { 394 if hasTaskId(depPath.TaskId, t.DependsOn) { 395 newDepPath := dependencyPath{ 396 TaskId: t.Id, 397 Tasks: append(depPath.Tasks, t.Id), 398 TotalTime: depPath.TotalTime + t.TimeTaken, 399 } 400 newDepPath = getMaxDependencyPath(tasks, newDepPath) 401 if newDepPath.TotalTime > maxTime { 402 maxTime = newDepPath.TotalTime 403 maxDepPath = newDepPath 404 } 405 } 406 } 407 return maxDepPath 408 } 409 410 // FindPredictedMakespan, given a list of tasks that have been completed, finds the optimal makespan of that build. 411 func FindPredictedMakespan(tasks []task.Task) dependencyPath { 412 maxTime := time.Duration(0) 413 var maxDepPath dependencyPath 414 415 for _, t := range tasks { 416 if len(t.DependsOn) == 0 { 417 depPath := dependencyPath{ 418 TaskId: t.Id, 419 Tasks: []string{t.Id}, 420 TotalTime: t.TimeTaken, 421 } 422 fullDepPath := getMaxDependencyPath(tasks, depPath) 423 if fullDepPath.TotalTime > maxTime { 424 maxTime = fullDepPath.TotalTime 425 maxDepPath = fullDepPath 426 } 427 } 428 } 429 return maxDepPath 430 }