github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/graph/checkingresourcestream.go (about) 1 package graph 2 3 import ( 4 "cmp" 5 "context" 6 "slices" 7 "sync" 8 9 "go.uber.org/atomic" 10 "golang.org/x/exp/maps" 11 12 "github.com/authzed/spicedb/internal/dispatch" 13 "github.com/authzed/spicedb/internal/graph/computed" 14 "github.com/authzed/spicedb/pkg/datastore" 15 "github.com/authzed/spicedb/pkg/genutil/mapz" 16 v1 "github.com/authzed/spicedb/pkg/proto/dispatch/v1" 17 "github.com/authzed/spicedb/pkg/spiceerrors" 18 ) 19 20 // possibleResource is a resource that was returned by reachable resources and, after processing, 21 // may be returned by the lookup resources stream. 22 type possibleResource struct { 23 // reachableResult is the result for this resource from the reachable resources stream. 24 reachableResult *v1.DispatchReachableResourcesResponse 25 26 // lookupResult is the result to be published by LookupResources, if the resource is actually 27 // permissioned. Will be nil before processing and nil after processing IF the resource needed 28 // to be checked and the check showed the resource was inaccessible. 29 lookupResult *v1.DispatchLookupResourcesResponse 30 31 // orderingIndex is the index of the resource result as returned by reachable resources. Used to 32 // maintain strict publishing order of results. 33 orderingIndex uint64 34 } 35 36 // resourceQueue is a queue for managing of possibleResources through the various states of the stream (queueing, processing and publishing). 37 type resourceQueue struct { 38 ctx context.Context 39 lock sync.Mutex 40 41 // toProcess are those resources (keyed by orderingIndex) that have not yet been processed (checked). 42 toProcess map[uint64]possibleResource 43 44 // toPublish are those resources (keyed by orderingIndex) that have been processed and 45 // are ready for publishing. Note that resources whose Check calls showed NO_PERMISSION 46 // will *also* be in this map, just with lookupResult set to nil. This is done to ensure 47 // strict ordering of publishing. 48 toPublish map[uint64]possibleResource 49 50 // beingProcessed are those resources (keyed by orderingIndex) that are currently being processed. 51 beingProcessed map[uint64]possibleResource 52 } 53 54 type processingStatus int 55 56 const ( 57 publishDirectly processingStatus = iota 58 awaitingMoreResources 59 readyForProcessing 60 ) 61 62 // addPossibleResource queues a resource for processing (if a check is required) or for 63 // immediate publishing (if a check is not required). 64 func (rq *resourceQueue) addPossibleResource(pr possibleResource) processingStatus { 65 rq.lock.Lock() 66 defer rq.lock.Unlock() 67 68 if pr.lookupResult != nil { 69 rq.toPublish[pr.orderingIndex] = pr 70 return publishDirectly 71 } 72 73 rq.toProcess[pr.orderingIndex] = pr 74 if len(rq.toProcess) < int(datastore.FilterMaximumIDCount) { 75 return awaitingMoreResources 76 } 77 78 return readyForProcessing 79 } 80 81 // updateToBePublished marks a resource as ready for publishing. 82 func (rq *resourceQueue) updateToBePublished(pr possibleResource) { 83 rq.lock.Lock() 84 defer rq.lock.Unlock() 85 86 rq.toPublish[pr.orderingIndex] = pr 87 delete(rq.beingProcessed, pr.orderingIndex) 88 } 89 90 // markResourceCompleted marks that all work has been completed on the given resources. 91 func (rq *resourceQueue) markResourceCompleted(pr possibleResource) { 92 rq.lock.Lock() 93 defer rq.lock.Unlock() 94 95 delete(rq.toPublish, pr.orderingIndex) 96 } 97 98 // selectResourcesToProcess marks all toProcess resources as being processing and returns those resources 99 // for processing. 100 // 101 // If alwaysReturn is false, then resources will only be returned if they meet the chunk size, to ensure 102 // checks occur in larger batches. 103 func (rq *resourceQueue) selectResourcesToProcess(alwaysReturn bool) []possibleResource { 104 rq.lock.Lock() 105 defer rq.lock.Unlock() 106 107 toProcess := maps.Values(rq.toProcess) 108 if !alwaysReturn && len(toProcess) < int(datastore.FilterMaximumIDCount) { 109 return nil 110 } 111 112 for _, pr := range toProcess { 113 rq.beingProcessed[pr.orderingIndex] = pr 114 } 115 116 rq.toProcess = map[uint64]possibleResource{} 117 return toProcess 118 } 119 120 // resourcesToPossiblyPublish returns all resources in the toPublish state. This does *not* mark the resources 121 // as published. 122 func (rq *resourceQueue) resourcesToPossiblyPublish() []possibleResource { 123 rq.lock.Lock() 124 defer rq.lock.Unlock() 125 126 return maps.Values(rq.toPublish) 127 } 128 129 // checkingResourceStream is a Stream[*v1.DispatchLookupResourcesResponse] that consumes reachable resource 130 // responses which are published to it, checks the resource (if necessary), and then publishes the resource 131 // if reachable. This stream performs Checks for the possibly-inaccessible resources in a parallel fashion 132 // but maintains the proper publishing order to the parent stream. 133 // 134 // Resources in the stream are processed as follows: 135 // 1. A reachable resource is published to the stream via a call to the Publish method 136 // 2. The resource is placed into the resourceQueue with an index indicating its publishing order 137 // 3. A processing worker (up to concurrency limits) grabs the resources to be processed in the queue, checks 138 // those resources that need to have CheckPermission invoked, and places all resources processed in the queue 139 // into the "ready for publishing" state. 140 // 4. The *single* publishing worker grabs resources to be published and publishes them in the correct order, 141 // skipping any resources whose CheckPermission calls showed them as being inaccessible. 142 // 5. The waitForPublishing call waits for the stream to have fully processed and published all queued resources 143 // before returning. 144 type checkingResourceStream struct { 145 // ctx is the parent context for the LookupResources. 146 // NOTE: This will be disconnected from the reachableContext below. 147 ctx context.Context 148 cancel func() 149 150 // reachableContext is the context to be returned by this stream for the ReachableResources call, and is 151 // disconnected from the overall context. 152 reachableContext context.Context 153 154 // cancelReachable cancels the reachable resources request once the limit has been reached. Should only 155 // be called from the publishing goroutine, to indicate that there is absolutely no need for further 156 // reachable resources. 157 cancelReachable func() 158 159 // concurrencyLimit is the limit on the number on concurrency processing workers. 160 concurrencyLimit uint16 161 162 req ValidatedLookupResourcesRequest 163 checker dispatch.Check 164 parentStream dispatch.Stream[*v1.DispatchLookupResourcesResponse] 165 166 // sem is a chan of length `concurrencyLimit` used to ensure the task runner does 167 // not exceed the concurrencyLimit with spawned goroutines. 168 sem chan struct{} 169 170 // rq is the resourceQueue for managing the state of all resources returned by the reachable resources call. 171 rq *resourceQueue 172 173 // reachableResourcesAreAvailableForProcessing is a channel which indicates to the processing worker(s) that work is available 174 // for processing. 175 reachableResourcesAreAvailableForProcessing chan struct{} 176 177 // reachableResourcesCompleted is a channel used to indicate to each processing worker that reachable resources has 178 // been completed, and that all further processing work should be done before shutting down. 179 reachableResourcesCompleted chan struct{} 180 181 // availableForPublishing is a channel which indicates to the publishing worker that work is available 182 // for publishing. If given a false value, then the publishing worker should be terminated, as it indicates 183 // there will be no further processed resources. 184 availableForPublishing chan bool 185 186 // limits is the limit tracker for the stream. Should *only* be accessed from the publishing goroutine. 187 limits *limitTracker 188 189 // orderingIndexToBePublished is the current index to be published. Should *only* be accessed from the publishing 190 // goroutine. 191 orderingIndexToBePublished uint64 192 193 // reachableResourcesCount is the count of reachable resources received. Should *only* be accessed from queue() 194 // and waitForPublishing() (after reachable resources has completed). 195 reachableResourcesCount uint64 196 197 // lastReachableResourceCursor is the cursor from the last received reachable resource result. Should *only* be accessed from 198 // Publish() and waitForPublishing() (after reachable resources has completed). 199 lastReachableResourceCursor *v1.Cursor 200 201 // dispatchesToBeReported is the number of dispatches that were skipped from being reported due 202 // to a resource being filtered, and whose count has to be attached to the next outgoing result. 203 dispatchesToBeReported atomic.Uint32 204 205 // cachedDispatchesToBeReported is the number of cached dispatches that were skipped from being reported due 206 // to a resource being filtered, and whose count has to be attached to the next outgoing result. 207 cachedDispatchesToBeReported atomic.Uint32 208 209 errSetter sync.Once 210 err error 211 212 processingWaitGroup sync.WaitGroup 213 publishingWaitGroup sync.WaitGroup 214 } 215 216 func newCheckingResourceStream( 217 lookupContext context.Context, 218 reachableContext context.Context, 219 cancelReachable func(), 220 req ValidatedLookupResourcesRequest, 221 checker dispatch.Check, 222 parentStream dispatch.Stream[*v1.DispatchLookupResourcesResponse], 223 limits *limitTracker, 224 concurrencyLimit uint16, 225 ) *checkingResourceStream { 226 if concurrencyLimit == 0 { 227 concurrencyLimit = 1 228 } 229 230 // Since one goroutine is used for publishing, allocate one less processing goroutine. 231 processingConcurrencyLimit := concurrencyLimit - 1 232 if processingConcurrencyLimit == 0 { 233 processingConcurrencyLimit = 1 234 } 235 236 cancelCtx, cancel := context.WithCancel(lookupContext) 237 238 crs := &checkingResourceStream{ 239 ctx: cancelCtx, 240 cancel: cancel, 241 242 reachableContext: reachableContext, 243 cancelReachable: cancelReachable, 244 concurrencyLimit: concurrencyLimit, 245 246 req: req, 247 checker: checker, 248 parentStream: parentStream, 249 limits: limits, 250 251 sem: make(chan struct{}, processingConcurrencyLimit), 252 253 rq: &resourceQueue{ 254 ctx: lookupContext, 255 toProcess: map[uint64]possibleResource{}, 256 beingProcessed: map[uint64]possibleResource{}, 257 toPublish: map[uint64]possibleResource{}, 258 }, 259 reachableResourcesAreAvailableForProcessing: make(chan struct{}, concurrencyLimit), 260 reachableResourcesCompleted: make(chan struct{}, concurrencyLimit), 261 availableForPublishing: make(chan bool, concurrencyLimit), 262 263 orderingIndexToBePublished: 0, 264 reachableResourcesCount: 0, 265 266 errSetter: sync.Once{}, 267 err: nil, 268 269 processingWaitGroup: sync.WaitGroup{}, 270 publishingWaitGroup: sync.WaitGroup{}, 271 } 272 273 // Spawn the goroutine that will publish resources to the parent stream in the proper order. 274 crs.publishingWaitGroup.Add(1) 275 go crs.resourcePublisher() 276 return crs 277 } 278 279 // waitForPublishing waits for the publishing goroutine to complete its work, and returns the number 280 // of published *reachable* resources or the error that occurred during checking or publishing. 281 func (crs *checkingResourceStream) waitForPublishing() (uint64, *v1.Cursor, error) { 282 // Mark that no new items will come in from the reachable resources stream. 283 for i := 0; i < int(crs.concurrencyLimit); i++ { 284 crs.reachableResourcesCompleted <- struct{}{} 285 } 286 287 // Wait for all existing processing to complete. 288 crs.processingWaitGroup.Wait() 289 290 // Run a final processing call to ensure there are no remaining items. 291 _, err := crs.runProcess(true) 292 if err != nil { 293 return 0, nil, err 294 } 295 296 // Mark publishing as ready for final publishing. 297 select { 298 case crs.availableForPublishing <- false: 299 break 300 301 case <-crs.ctx.Done(): 302 crs.setError(crs.ctx.Err()) 303 break 304 } 305 306 // Wait for any remaining publishing to complete. 307 crs.publishingWaitGroup.Wait() 308 309 return crs.reachableResourcesCount, crs.lastReachableResourceCursor, crs.err 310 } 311 312 // resourcePublisher is the goroutine that publishes resources to the parent stream once they've been 313 // validated by the processing worker(s). 314 func (crs *checkingResourceStream) resourcePublisher() { 315 defer crs.publishingWaitGroup.Done() 316 317 for { 318 select { 319 case <-crs.ctx.Done(): 320 crs.setError(crs.ctx.Err()) 321 return 322 323 case isStillRunning := <-crs.availableForPublishing: 324 err := crs.publishResourcesIfPossible() 325 if err != nil { 326 crs.setError(err) 327 return 328 } 329 330 if isStillRunning { 331 continue 332 } 333 return 334 } 335 } 336 } 337 338 // publishResourcesIfPossible publishes the resources that have been processed, in the correct order, if any. 339 func (crs *checkingResourceStream) publishResourcesIfPossible() error { 340 for { 341 toPublish := crs.rq.resourcesToPossiblyPublish() 342 if len(toPublish) == 0 { 343 return nil 344 } 345 346 for { 347 if len(toPublish) == 0 { 348 break 349 } 350 351 // Sort to ensure they are in the publishable order. 352 slices.SortFunc(toPublish, func(a, b possibleResource) int { 353 return cmp.Compare(a.orderingIndex, b.orderingIndex) 354 }) 355 356 // Ensure that the next resource to be published is the next in the order. If not, 357 // we're still waiting on a resource to be checked. 358 current := toPublish[0] 359 if current.orderingIndex != crs.orderingIndexToBePublished { 360 return nil 361 } 362 363 toPublish = toPublish[1:] 364 crs.orderingIndexToBePublished++ 365 366 // NOTE: lookupResult will be `nil` if the Check for the resource found that the resource is 367 // not actually accessible. The entry is kept in `toPublish` to ensure proper ordering is maintained 368 // on the parent stream. 369 if current.lookupResult != nil { 370 if !crs.limits.prepareForPublishing() { 371 crs.cancelReachable() 372 return nil 373 } 374 375 err := crs.parentStream.Publish(current.lookupResult) 376 if err != nil { 377 crs.setError(err) 378 return err 379 } 380 } 381 382 crs.rq.markResourceCompleted(current) 383 } 384 } 385 } 386 387 // setError sets an error that occurred. 388 func (crs *checkingResourceStream) setError(err error) { 389 crs.errSetter.Do(func() { 390 crs.err = err 391 crs.cancel() 392 crs.cancelReachable() 393 }) 394 } 395 396 // process is a processing worker for a reachable resources result, performing checks if necessary. 397 func (crs *checkingResourceStream) process() { 398 defer crs.processingWaitGroup.Done() 399 400 for { 401 select { 402 case <-crs.ctx.Done(): 403 crs.setError(crs.ctx.Err()) 404 return 405 406 case <-crs.reachableResourcesCompleted: 407 for { 408 ok, err := crs.runProcess(true) 409 if err != nil { 410 crs.setError(err) 411 return 412 } 413 if !ok { 414 break 415 } 416 } 417 return 418 419 case <-crs.reachableResourcesAreAvailableForProcessing: 420 for { 421 ok, err := crs.runProcess(false) 422 if err != nil { 423 crs.setError(err) 424 return 425 } 426 if !ok { 427 break 428 } 429 } 430 continue 431 } 432 } 433 } 434 435 func (crs *checkingResourceStream) runProcess(alwaysProcess bool) (bool, error) { 436 // Collect any resources that need to be checked, up to the configured limit, and issue a check. 437 // If a resource does not require a check, simply place on the toPublish queue. 438 toCheck := mapz.NewMultiMap[string, possibleResource]() 439 440 toProcess := crs.rq.selectResourcesToProcess(alwaysProcess) 441 if len(toProcess) == 0 { 442 return false, nil 443 } 444 445 for _, current := range toProcess { 446 if current.reachableResult.Resource.ResultStatus == v1.ReachableResource_HAS_PERMISSION { 447 return false, spiceerrors.MustBugf("process received a resolved resource") 448 } 449 450 toCheck.Add(current.reachableResult.Resource.ResourceId, current) 451 } 452 453 // Issue the bulk check over all the resources. 454 results, checkResultMetadata, err := computed.ComputeBulkCheck( 455 crs.ctx, 456 crs.checker, 457 computed.CheckParameters{ 458 ResourceType: crs.req.ObjectRelation, 459 Subject: crs.req.Subject, 460 CaveatContext: crs.req.Context.AsMap(), 461 AtRevision: crs.req.Revision, 462 MaximumDepth: crs.req.Metadata.DepthRemaining, 463 DebugOption: computed.NoDebugging, 464 }, 465 toCheck.Keys(), 466 ) 467 if err != nil { 468 return true, err 469 } 470 471 crs.dispatchesToBeReported.Add(checkResultMetadata.DispatchCount) 472 crs.cachedDispatchesToBeReported.Add(checkResultMetadata.CachedDispatchCount) 473 474 for _, rai := range toCheck.Values() { 475 checkResult := results[rai.reachableResult.Resource.ResourceId] 476 477 var permissionship v1.ResolvedResource_Permissionship 478 var missingFields []string 479 480 switch { 481 case checkResult == nil || checkResult.Membership == v1.ResourceCheckResult_NOT_MEMBER: 482 // NOTE: we use `UNKNOWN` here to indicate that the resource was found to be inaccessible, 483 // because ResolvedResource does not have such a state. 484 permissionship = v1.ResolvedResource_UNKNOWN 485 486 case checkResult != nil && checkResult.Membership == v1.ResourceCheckResult_MEMBER: 487 permissionship = v1.ResolvedResource_HAS_PERMISSION 488 489 case checkResult != nil && checkResult.Membership == v1.ResourceCheckResult_CAVEATED_MEMBER: 490 permissionship = v1.ResolvedResource_CONDITIONALLY_HAS_PERMISSION 491 missingFields = checkResult.MissingExprFields 492 493 default: 494 return true, spiceerrors.MustBugf("unknown check result status for reachable resources") 495 } 496 497 // Set the lookupResult iff the permissionship was a valid permission. 498 var lookupResult *v1.DispatchLookupResourcesResponse 499 if permissionship != v1.ResolvedResource_UNKNOWN { 500 metadata := rai.reachableResult.Metadata 501 metadata = crs.addSkippedDispatchCountToBePublished(metadata) 502 metadata.DepthRequired = max(metadata.DepthRequired, checkResultMetadata.DepthRequired) 503 504 lookupResult = &v1.DispatchLookupResourcesResponse{ 505 ResolvedResource: &v1.ResolvedResource{ 506 ResourceId: rai.reachableResult.Resource.ResourceId, 507 Permissionship: permissionship, 508 MissingRequiredContext: missingFields, 509 }, 510 Metadata: metadata, 511 AfterResponseCursor: rai.reachableResult.AfterResponseCursor, 512 } 513 } else { 514 if rai.reachableResult.Metadata.DispatchCount > 0 { 515 crs.dispatchesToBeReported.Add(rai.reachableResult.Metadata.DispatchCount) 516 } 517 518 if rai.reachableResult.Metadata.CachedDispatchCount > 0 { 519 crs.cachedDispatchesToBeReported.Add(rai.reachableResult.Metadata.CachedDispatchCount) 520 } 521 } 522 523 rai.lookupResult = lookupResult 524 crs.rq.updateToBePublished(rai) 525 } 526 527 select { 528 case crs.availableForPublishing <- true: 529 return true, nil 530 531 case <-crs.reachableContext.Done(): 532 return false, nil 533 534 case <-crs.ctx.Done(): 535 crs.setError(crs.ctx.Err()) 536 return false, nil 537 } 538 } 539 540 // addSkippedDispatchCountToBePublished adds any dispatch counts that were skipped due to a resource being filtered, 541 // to the metadata to be published. 542 func (crs *checkingResourceStream) addSkippedDispatchCountToBePublished(metadata *v1.ResponseMeta) *v1.ResponseMeta { 543 dispatchCount := crs.dispatchesToBeReported.Swap(0) 544 cachedDispatchCount := crs.cachedDispatchesToBeReported.Swap(0) 545 cloned := metadata.CloneVT() 546 cloned.DispatchCount += dispatchCount 547 cloned.CachedDispatchCount += cachedDispatchCount 548 return cloned 549 } 550 551 // spawnIfAvailable spawns a processing working, if the concurrency limit has not been reached. 552 func (crs *checkingResourceStream) spawnIfAvailable() { 553 // To spawn a processor, write a struct{} to the sem channel. If the checker 554 // is already at the concurrency limit, then this chan write will fail, 555 // and nothing will be spawned. This also checks if the context has already 556 // been canceled, in which case nothing needs to be done. 557 select { 558 case crs.sem <- struct{}{}: 559 crs.processingWaitGroup.Add(1) 560 go crs.process() 561 562 case <-crs.reachableContext.Done(): 563 return 564 565 case <-crs.ctx.Done(): 566 crs.setError(crs.ctx.Err()) 567 return 568 569 default: 570 return 571 } 572 } 573 574 // Publish implements the Stream interface and is invoked by the ReachableResources call. 575 func (crs *checkingResourceStream) Publish(result *v1.DispatchReachableResourcesResponse) error { 576 currentResource := possibleResource{ 577 reachableResult: result, 578 lookupResult: nil, 579 orderingIndex: crs.reachableResourcesCount, 580 } 581 582 // If the resource found already has permission (i.e. a check is not required), simply set 583 // the lookup result on the resource now. 584 if result.Resource.ResultStatus == v1.ReachableResource_HAS_PERMISSION { 585 metadata := crs.addSkippedDispatchCountToBePublished(result.Metadata) 586 currentResource.lookupResult = &v1.DispatchLookupResourcesResponse{ 587 ResolvedResource: &v1.ResolvedResource{ 588 ResourceId: result.Resource.ResourceId, 589 Permissionship: v1.ResolvedResource_HAS_PERMISSION, 590 }, 591 Metadata: metadata, 592 AfterResponseCursor: result.AfterResponseCursor, 593 } 594 } 595 596 crs.reachableResourcesCount++ 597 crs.lastReachableResourceCursor = result.AfterResponseCursor 598 599 status := crs.rq.addPossibleResource(currentResource) 600 601 switch status { 602 case publishDirectly: 603 // If the resource found already has permission (i.e. a check is not required), immediately 604 // publish it, rather than going through a processing worker. This saves a step for better 605 // performance. 606 if result.Resource.ResultStatus != v1.ReachableResource_HAS_PERMISSION { 607 return spiceerrors.MustBugf("got invalid resource for publish directly") 608 } 609 610 select { 611 case crs.availableForPublishing <- true: 612 return nil 613 614 case <-crs.reachableContext.Done(): 615 return nil 616 617 case <-crs.ctx.Done(): 618 crs.setError(crs.ctx.Err()) 619 return nil 620 } 621 622 case awaitingMoreResources: 623 // If an insufficient amount of resources have been collected for Checking, we're done. 624 return nil 625 626 case readyForProcessing: 627 // Otherwise, kick off a worker to process the resources. 628 select { 629 case crs.reachableResourcesAreAvailableForProcessing <- struct{}{}: 630 crs.spawnIfAvailable() 631 return nil 632 633 case <-crs.reachableContext.Done(): 634 return nil 635 636 case <-crs.ctx.Done(): 637 crs.setError(crs.ctx.Err()) 638 return nil 639 } 640 641 default: 642 return spiceerrors.MustBugf("unknown resource add state") 643 } 644 } 645 646 // Context implements the Stream interface. 647 func (crs *checkingResourceStream) Context() context.Context { 648 // NOTE: we return the reachable context here, because this is the stream to which the reachable resources 649 // call is publishing. 650 return crs.reachableContext 651 }