github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/pkg/jobstore/inmemory/inmemory.go (about) 1 package inmemory 2 3 import ( 4 "context" 5 "sort" 6 "time" 7 8 sync "github.com/bacalhau-project/golang-mutex-tracer" 9 "github.com/imdario/mergo" 10 "golang.org/x/exp/maps" 11 "golang.org/x/exp/slices" 12 13 "github.com/filecoin-project/bacalhau/pkg/bacerrors" 14 jobutils "github.com/filecoin-project/bacalhau/pkg/job" 15 "github.com/filecoin-project/bacalhau/pkg/jobstore" 16 "github.com/filecoin-project/bacalhau/pkg/model" 17 ) 18 19 const newJobComment = "Job created" 20 21 type JobStore struct { 22 // we keep pointers to these things because we will update them partially 23 jobs map[string]model.Job 24 states map[string]model.JobState 25 history map[string][]model.JobHistory 26 inprogress map[string]struct{} 27 mtx sync.RWMutex 28 } 29 30 func NewJobStore() *JobStore { 31 res := &JobStore{ 32 jobs: make(map[string]model.Job), 33 states: make(map[string]model.JobState), 34 history: make(map[string][]model.JobHistory), 35 inprogress: make(map[string]struct{}), 36 } 37 res.mtx.EnableTracerWithOpts(sync.Opts{ 38 Threshold: 10 * time.Millisecond, 39 Id: "InMemoryJobStore.mtx", 40 }) 41 return res 42 } 43 44 // Gets a job from the datastore. 45 // 46 // Errors: 47 // 48 // - error-job-not-found -- if the job is not found 49 func (d *JobStore) GetJob(_ context.Context, id string) (model.Job, error) { 50 d.mtx.RLock() 51 defer d.mtx.RUnlock() 52 return d.getJob(id) 53 } 54 55 func (d *JobStore) GetJobs(ctx context.Context, query jobstore.JobQuery) ([]model.Job, error) { 56 d.mtx.RLock() 57 defer d.mtx.RUnlock() 58 var result []model.Job 59 60 if query.ID != "" { 61 j, err := d.getJob(query.ID) 62 if err != nil { 63 return nil, err 64 } 65 return []model.Job{j}, nil 66 } 67 68 for _, j := range maps.Values(d.jobs) { 69 if query.Limit > 0 && len(result) == query.Limit { 70 break 71 } 72 73 if !query.ReturnAll && query.ClientID != "" && query.ClientID != j.Metadata.ClientID { 74 // Job is not for the requesting client, so ignore it. 75 continue 76 } 77 78 // If we are not using include tags, by default every job is included. 79 // If a job is specifically included, that overrides it being excluded. 80 included := len(query.IncludeTags) == 0 81 for _, tag := range j.Spec.Annotations { 82 if slices.Contains(query.IncludeTags, model.IncludedTag(tag)) { 83 included = true 84 break 85 } 86 if slices.Contains(query.ExcludeTags, model.ExcludedTag(tag)) { 87 included = false 88 break 89 } 90 } 91 92 if !included { 93 continue 94 } 95 96 result = append(result, j) 97 } 98 99 listSorter := func(i, j int) bool { 100 switch query.SortBy { 101 case "id": 102 if query.SortReverse { 103 // what does it mean to sort by ID? 104 return result[i].Metadata.ID > result[j].Metadata.ID 105 } else { 106 return result[i].Metadata.ID < result[j].Metadata.ID 107 } 108 case "created_at": 109 if query.SortReverse { 110 return result[i].Metadata.CreatedAt.UTC().Unix() > result[j].Metadata.CreatedAt.UTC().Unix() 111 } else { 112 return result[i].Metadata.CreatedAt.UTC().Unix() < result[j].Metadata.CreatedAt.UTC().Unix() 113 } 114 default: 115 return false 116 } 117 } 118 sort.Slice(result, listSorter) 119 return result, nil 120 } 121 122 func (d *JobStore) GetJobState(_ context.Context, jobID string) (model.JobState, error) { 123 d.mtx.RLock() 124 defer d.mtx.RUnlock() 125 state, ok := d.states[jobID] 126 if !ok { 127 return model.JobState{}, bacerrors.NewJobNotFound(jobID) 128 } 129 return state, nil 130 } 131 132 func (d *JobStore) GetInProgressJobs(ctx context.Context) ([]model.JobWithInfo, error) { 133 d.mtx.RLock() 134 defer d.mtx.RUnlock() 135 var result []model.JobWithInfo 136 for id := range d.inprogress { 137 result = append(result, model.JobWithInfo{ 138 Job: d.jobs[id], 139 State: d.states[id], 140 }) 141 } 142 return result, nil 143 } 144 145 func (d *JobStore) GetJobHistory(_ context.Context, jobID string) ([]model.JobHistory, error) { 146 d.mtx.RLock() 147 defer d.mtx.RUnlock() 148 history, ok := d.history[jobID] 149 if !ok { 150 return nil, jobstore.NewErrJobNotFound(jobID) 151 } 152 return history, nil 153 } 154 155 func (d *JobStore) GetJobsCount(ctx context.Context, query jobstore.JobQuery) (int, error) { 156 useQuery := query 157 useQuery.Limit = 0 158 useQuery.Offset = 0 159 jobs, err := d.GetJobs(ctx, useQuery) 160 if err != nil { 161 return 0, err 162 } 163 return len(jobs), nil 164 } 165 166 func (d *JobStore) CreateJob(_ context.Context, job model.Job) error { 167 d.mtx.Lock() 168 defer d.mtx.Unlock() 169 existingJob, ok := d.jobs[job.Metadata.ID] 170 if ok { 171 return jobstore.NewErrJobAlreadyExists(existingJob.Metadata.ID) 172 } 173 d.jobs[job.Metadata.ID] = job 174 175 // populate shard states 176 shardStates := make(map[int]model.ShardState, job.Spec.ExecutionPlan.TotalShards) 177 for i := 0; i < job.Spec.ExecutionPlan.TotalShards; i++ { 178 shardStates[i] = model.ShardState{ 179 JobID: job.Metadata.ID, 180 ShardIndex: i, 181 State: model.ShardStateInProgress, 182 Version: 1, 183 CreateTime: time.Now(), 184 UpdateTime: time.Now(), 185 } 186 } 187 188 // populate job state 189 jobState := model.JobState{ 190 JobID: job.Metadata.ID, 191 Shards: shardStates, 192 State: model.JobStateInProgress, 193 Version: 1, 194 CreateTime: time.Now(), 195 UpdateTime: time.Now(), 196 } 197 d.states[job.Metadata.ID] = jobState 198 d.inprogress[job.Metadata.ID] = struct{}{} 199 d.appendJobHistory(jobState, model.JobStateNew, newJobComment) 200 return nil 201 } 202 203 // helper method to read a single job from memory. This is used by both GetJob and GetJobs. 204 // It is important that we don't attempt to acquire a lock inside this method to avoid deadlocks since 205 // the callers are expected to be holding a lock, and golang doesn't support reentrant locks. 206 func (d *JobStore) getJob(id string) (model.Job, error) { 207 if len(id) < model.ShortIDLength { 208 return model.Job{}, bacerrors.NewJobNotFound(id) 209 } 210 211 // support for short job IDs 212 if jobutils.ShortID(id) == id { 213 // passed in a short id, need to resolve the long id first 214 for k := range d.jobs { 215 if jobutils.ShortID(k) == id { 216 id = k 217 break 218 } 219 } 220 } 221 222 j, ok := d.jobs[id] 223 if !ok { 224 returnError := bacerrors.NewJobNotFound(id) 225 return model.Job{}, returnError 226 } 227 228 return j, nil 229 } 230 231 func (d *JobStore) UpdateJobState(_ context.Context, request jobstore.UpdateJobStateRequest) error { 232 d.mtx.Lock() 233 defer d.mtx.Unlock() 234 235 // get the existing job state 236 jobState, ok := d.states[request.JobID] 237 if !ok { 238 return jobstore.NewErrJobNotFound(request.JobID) 239 } 240 241 // check the expected state 242 if err := request.Condition.Validate(jobState); err != nil { 243 return err 244 } 245 if jobState.State.IsTerminal() { 246 return jobstore.NewErrJobAlreadyTerminal(request.JobID, jobState.State, request.NewState) 247 } 248 249 // update the job state 250 previousState := jobState.State 251 jobState.State = request.NewState 252 jobState.Version++ 253 jobState.UpdateTime = time.Now() 254 d.states[request.JobID] = jobState 255 if request.NewState.IsTerminal() { 256 delete(d.inprogress, request.JobID) 257 } 258 d.appendJobHistory(jobState, previousState, request.Comment) 259 return nil 260 } 261 262 func (d *JobStore) GetShardState(_ context.Context, shardID model.ShardID) (model.ShardState, error) { 263 d.mtx.RLock() 264 defer d.mtx.RUnlock() 265 jobState, ok := d.states[shardID.JobID] 266 if !ok { 267 return model.ShardState{}, jobstore.NewErrJobNotFound(shardID.JobID) 268 } 269 shardState, ok := jobState.Shards[shardID.Index] 270 if !ok { 271 return model.ShardState{}, jobstore.NewErrShardNotFound(shardID) 272 } 273 return shardState, nil 274 } 275 276 func (d *JobStore) UpdateShardState(_ context.Context, request jobstore.UpdateShardStateRequest) error { 277 d.mtx.Lock() 278 defer d.mtx.Unlock() 279 280 // find the existing shard 281 jobState, ok := d.states[request.ShardID.JobID] 282 if !ok { 283 return jobstore.NewErrJobNotFound(request.ShardID.JobID) 284 } 285 shardState, ok := jobState.Shards[request.ShardID.Index] 286 if !ok { 287 return jobstore.NewErrShardNotFound(request.ShardID) 288 } 289 290 // check the expected state 291 if err := request.Condition.Validate(shardState); err != nil { 292 return err 293 } 294 if shardState.State.IsTerminal() { 295 return jobstore.NewErrShardAlreadyTerminal(request.ShardID, shardState.State, request.NewState) 296 } 297 298 // update the shard state 299 previousState := shardState.State 300 shardState.State = request.NewState 301 shardState.Version++ 302 shardState.UpdateTime = time.Now() 303 jobState.Shards[request.ShardID.Index] = shardState 304 d.states[request.ShardID.JobID] = jobState 305 d.appendShardHistory(shardState, previousState, request.Comment) 306 return nil 307 } 308 309 func (d *JobStore) CreateExecution(_ context.Context, execution model.ExecutionState) error { 310 d.mtx.Lock() 311 defer d.mtx.Unlock() 312 jobState, ok := d.states[execution.JobID] 313 if !ok { 314 return jobstore.NewErrJobNotFound(execution.JobID) 315 } 316 shardState, ok := jobState.Shards[execution.ShardIndex] 317 if !ok { 318 return jobstore.NewErrShardNotFound(execution.ShardID()) 319 } 320 for _, e := range shardState.Executions { 321 if e.ID() == execution.ID() { 322 return jobstore.NewErrExecutionAlreadyExists(execution.ID()) 323 } 324 } 325 if execution.CreateTime.IsZero() { 326 execution.CreateTime = time.Now() 327 } 328 if execution.UpdateTime.IsZero() { 329 execution.UpdateTime = execution.CreateTime 330 } 331 if execution.Version == 0 { 332 execution.Version = 1 333 } 334 shardState.Executions = append(shardState.Executions, execution) 335 jobState.Shards[execution.ShardIndex] = shardState 336 d.states[execution.JobID] = jobState 337 d.appendExecutionHistory(execution, model.ExecutionStateNew, "") 338 return nil 339 } 340 341 func (d *JobStore) UpdateExecution(_ context.Context, request jobstore.UpdateExecutionRequest) error { 342 d.mtx.Lock() 343 defer d.mtx.Unlock() 344 345 // find the existing execution 346 jobState, ok := d.states[request.ExecutionID.JobID] 347 if !ok { 348 return jobstore.NewErrJobNotFound(request.ExecutionID.JobID) 349 } 350 shardState, ok := jobState.Shards[request.ExecutionID.ShardIndex] 351 if !ok { 352 return jobstore.NewErrShardNotFound(request.ExecutionID.ShardID()) 353 } 354 var existingExecution model.ExecutionState 355 executionIndex := -1 356 for i, e := range shardState.Executions { 357 if e.ID() == request.ExecutionID { 358 existingExecution = e 359 executionIndex = i 360 break 361 } 362 } 363 if executionIndex == -1 { 364 return jobstore.NewErrExecutionNotFound(request.ExecutionID) 365 } 366 367 // check the expected state 368 if err := request.Condition.Validate(existingExecution); err != nil { 369 return err 370 } 371 if existingExecution.State.IsTerminal() { 372 return jobstore.NewErrExecutionAlreadyTerminal(request.ExecutionID, existingExecution.State, request.NewValues.State) 373 } 374 375 // populate default values 376 newExecution := request.NewValues 377 if newExecution.CreateTime.IsZero() { 378 newExecution.CreateTime = time.Now() 379 } 380 if newExecution.UpdateTime.IsZero() { 381 newExecution.UpdateTime = existingExecution.CreateTime 382 } 383 if newExecution.Version == 0 { 384 newExecution.Version = existingExecution.Version + 1 385 } 386 387 err := mergo.Merge(&newExecution, existingExecution) 388 if err != nil { 389 return err 390 } 391 392 // update the execution 393 previousState := existingExecution.State 394 shardState.Executions[executionIndex] = newExecution 395 jobState.Shards[newExecution.ShardIndex] = shardState 396 d.states[newExecution.JobID] = jobState 397 d.appendExecutionHistory(newExecution, previousState, request.Comment) 398 return nil 399 } 400 401 func (d *JobStore) appendJobHistory(updateJob model.JobState, previousState model.JobStateType, comment string) { 402 historyEntry := model.JobHistory{ 403 Type: model.JobHistoryTypeJobLevel, 404 JobID: updateJob.JobID, 405 PreviousState: previousState.String(), 406 NewState: updateJob.State.String(), 407 NewVersion: updateJob.Version, 408 Comment: comment, 409 Time: updateJob.UpdateTime, 410 } 411 d.history[updateJob.JobID] = append(d.history[updateJob.JobID], historyEntry) 412 } 413 414 func (d *JobStore) appendShardHistory(updatedShard model.ShardState, previousState model.ShardStateType, comment string) { 415 historyEntry := model.JobHistory{ 416 Type: model.JobHistoryTypeShardLevel, 417 JobID: updatedShard.JobID, 418 ShardIndex: updatedShard.ShardIndex, 419 PreviousState: previousState.String(), 420 NewState: updatedShard.State.String(), 421 NewVersion: updatedShard.Version, 422 Comment: comment, 423 Time: updatedShard.UpdateTime, 424 } 425 d.history[updatedShard.JobID] = append(d.history[updatedShard.JobID], historyEntry) 426 } 427 428 func (d *JobStore) appendExecutionHistory(updatedExecution model.ExecutionState, previousState model.ExecutionStateType, comment string) { 429 historyEntry := model.JobHistory{ 430 Type: model.JobHistoryTypeExecutionLevel, 431 JobID: updatedExecution.JobID, 432 ShardIndex: updatedExecution.ShardIndex, 433 NodeID: updatedExecution.NodeID, 434 ComputeReference: updatedExecution.ComputeReference, 435 PreviousState: previousState.String(), 436 NewState: updatedExecution.State.String(), 437 NewStateType: updatedExecution.State, 438 NewVersion: updatedExecution.Version, 439 Comment: comment, 440 Time: updatedExecution.UpdateTime, 441 } 442 d.history[updatedExecution.JobID] = append(d.history[updatedExecution.JobID], historyEntry) 443 } 444 445 // Static check to ensure that Transport implements Transport: 446 var _ jobstore.Store = (*JobStore)(nil)