github.com/lirm/aeron-go@v0.0.0-20230415210743-920325491dc4/aeron/clientconductor.go (about) 1 /* 2 Copyright 2016-2018 Stanislav Liberman 3 Copyright (C) 2022 Talos, Inc. 4 5 Licensed under the Apache License, Version 2.0 (the "License"); 6 you may not use this file except in compliance with the License. 7 You may obtain a copy of the License at 8 9 http://www.apache.org/licenses/LICENSE-2.0 10 11 Unless required by applicable law or agreed to in writing, software 12 distributed under the License is distributed on an "AS IS" BASIS, 13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 See the License for the specific language governing permissions and 15 limitations under the License. 16 */ 17 18 package aeron 19 20 import ( 21 "errors" 22 "fmt" 23 "io" 24 "log" 25 "runtime" 26 "sync" 27 "time" 28 29 "github.com/lirm/aeron-go/aeron/atomic" 30 "github.com/lirm/aeron-go/aeron/broadcast" 31 ctr "github.com/lirm/aeron-go/aeron/counters" 32 "github.com/lirm/aeron-go/aeron/driver" 33 "github.com/lirm/aeron-go/aeron/idlestrategy" 34 "github.com/lirm/aeron-go/aeron/logbuffer" 35 "github.com/lirm/aeron-go/aeron/logging" 36 ) 37 38 var RegistrationStatus = struct { 39 AwaitingMediaDriver int 40 RegisteredMediaDriver int 41 ErroredMediaDriver int 42 }{ 43 0, 44 1, 45 2, 46 } 47 48 const ( 49 keepaliveTimeoutNS = 500 * int64(time.Millisecond) 50 resourceTimeoutNS = 1000 * int64(time.Millisecond) 51 52 // heartbeatTypeId is the type id of a heartbeat counter. 53 heartbeatTypeId = int32(11) 54 55 // registrationIdOffset is the offset in the key metadata for the registration id of the counter. 56 heartheatRegistrationIdOffset = int32(0) 57 ) 58 59 type publicationStateDefn struct { 60 regID int64 61 origRegID int64 62 timeOfRegistration int64 63 streamID int32 64 sessionID int32 65 posLimitCounterID int32 66 channelStatusIndicatorID int32 67 errorCode int32 68 status int 69 channel string 70 errorMessage string 71 buffers *logbuffer.LogBuffers 72 publication *Publication 73 } 74 75 func (pub *publicationStateDefn) Init(channel string, regID int64, streamID int32, now int64) *publicationStateDefn { 76 pub.channel = channel 77 pub.regID = regID 78 pub.streamID = streamID 79 pub.sessionID = -1 80 pub.posLimitCounterID = -1 81 pub.timeOfRegistration = now 82 pub.status = RegistrationStatus.AwaitingMediaDriver 83 84 return pub 85 } 86 87 type subscriptionStateDefn struct { 88 regID int64 89 timeOfRegistration int64 90 streamID int32 91 errorCode int32 92 status int 93 channel string 94 errorMessage string 95 availableImageHandler AvailableImageHandler 96 unavailableImageHandler UnavailableImageHandler 97 subscription *Subscription 98 } 99 100 func (sub *subscriptionStateDefn) Init( 101 ch string, 102 regID int64, 103 sID int32, 104 now int64, 105 availableImageHandler AvailableImageHandler, 106 UnavailableImageHandler UnavailableImageHandler) *subscriptionStateDefn { 107 sub.channel = ch 108 sub.regID = regID 109 sub.streamID = sID 110 sub.timeOfRegistration = now 111 sub.status = RegistrationStatus.AwaitingMediaDriver 112 sub.availableImageHandler = availableImageHandler 113 sub.unavailableImageHandler = UnavailableImageHandler 114 115 return sub 116 } 117 118 type counterStateDefn struct { 119 timeOfRegistration int64 120 counterId int32 121 errorCode int32 122 errorMessage string 123 status int 124 counter *Counter 125 } 126 127 func (c *counterStateDefn) Init(time int64) { 128 c.timeOfRegistration = time 129 c.status = RegistrationStatus.AwaitingMediaDriver 130 } 131 132 type lingerResourse struct { 133 lastTime int64 134 resource io.Closer 135 } 136 137 type IdAndAvailableCounterHandler struct { 138 registrationId int64 139 handler AvailableCounterHandler 140 } 141 142 func NewIdAndAvailablePair(registrationId int64, handler AvailableCounterHandler) *IdAndAvailableCounterHandler { 143 ret := new(IdAndAvailableCounterHandler) 144 ret.registrationId = registrationId 145 ret.handler = handler 146 return ret 147 } 148 149 type IdAndUnavailableCounterHandler struct { 150 registrationId int64 151 handler UnavailableCounterHandler 152 } 153 154 func NewIdAndUnavailablePair(registrationId int64, handler UnavailableCounterHandler) *IdAndUnavailableCounterHandler { 155 ret := new(IdAndUnavailableCounterHandler) 156 ret.registrationId = registrationId 157 ret.handler = handler 158 return ret 159 } 160 161 type DriverProxy interface { 162 ClientID() int64 163 TimeOfLastDriverKeepalive() int64 164 NextCorrelationID() int64 165 AddSubscription(channel string, streamID int32) (int64, error) 166 RemoveSubscription(registrationID int64) error 167 AddPublication(channel string, streamID int32) (int64, error) 168 AddExclusivePublication(channel string, streamID int32) (int64, error) 169 RemovePublication(registrationID int64) error 170 ClientClose() error 171 AddDestination(registrationID int64, channel string) (int64, error) 172 RemoveDestination(registrationID int64, channel string) (int64, error) 173 AddRcvDestination(registrationID int64, channel string) (int64, error) 174 RemoveRcvDestination(registrationID int64, channel string) (int64, error) 175 AddCounter(typeId int32, keyBuffer *atomic.Buffer, keyOffset int32, keyLength int32, 176 labelBuffer *atomic.Buffer, labelOffset int32, labelLength int32) (int64, error) 177 AddCounterByLabel(typeId int32, label string) (int64, error) 178 RemoveCounter(registrationId int64) (int64, error) 179 } 180 181 // ImageFactory allows tests to use fake Images 182 type ImageFactory func(sessionID int32, corrID int64, logFilename string, subRegId int64, sourceIdentity string, 183 counterValuesBuffer *atomic.Buffer, subscriberPositionID int32) Image 184 185 type ClientConductor struct { 186 pubs []*publicationStateDefn 187 subs []*subscriptionStateDefn 188 counters map[int64]*counterStateDefn 189 190 driverProxy DriverProxy 191 192 counterValuesBuffer *atomic.Buffer 193 counterReader *ctr.Reader 194 195 driverListenerAdapter *driver.ListenerAdapter 196 197 adminLock sync.Mutex 198 199 pendingCloses map[int64]chan bool 200 lingeringResources chan lingerResourse 201 202 onNewPublicationHandler NewPublicationHandler 203 onNewSubscriptionHandler NewSubscriptionHandler 204 onAvailableImageHandler AvailableImageHandler 205 onUnavailableImageHandler UnavailableImageHandler 206 207 // Ordering is only important in that the 0-index element must be called first on [un]available counters. 208 availableCounterHandlers []*IdAndAvailableCounterHandler 209 unavailableCounterHandlers []*IdAndUnavailableCounterHandler 210 211 errorHandler func(error) 212 imageFactory ImageFactory 213 214 running atomic.Bool 215 conductorRunning atomic.Bool 216 driverActive atomic.Bool 217 218 timeOfLastKeepalive int64 219 timeOfLastCheckManagedResources int64 220 timeOfLastDoWork int64 221 driverTimeoutNs int64 222 interServiceTimeoutNs int64 223 publicationConnectionTimeoutNs int64 224 resourceLingerTimeoutNs int64 225 226 heartbeatTimestamp *ctr.AtomicCounter 227 } 228 229 // Init is the primary initialization method for ClientConductor 230 func (cc *ClientConductor) Init(driverProxy DriverProxy, bcast *broadcast.CopyReceiver, 231 interServiceTo, driverTo, pubConnectionTo, lingerTo time.Duration, counters *ctr.MetaDataFlyweight) *ClientConductor { 232 233 logger.Debugf("Initializing ClientConductor with: %v %v %d %d %d", driverProxy, bcast, interServiceTo, 234 driverTo, pubConnectionTo) 235 236 cc.driverProxy = driverProxy 237 cc.running.Set(true) 238 cc.driverActive.Set(true) 239 cc.driverListenerAdapter = driver.NewAdapter(cc, bcast) 240 cc.interServiceTimeoutNs = interServiceTo.Nanoseconds() 241 cc.driverTimeoutNs = driverTo.Nanoseconds() 242 cc.publicationConnectionTimeoutNs = pubConnectionTo.Nanoseconds() 243 cc.resourceLingerTimeoutNs = lingerTo.Nanoseconds() 244 245 cc.counterValuesBuffer = counters.ValuesBuf.Get() 246 cc.counterReader = ctr.NewReader(counters.ValuesBuf.Get(), counters.MetaDataBuf.Get()) 247 248 cc.pendingCloses = make(map[int64]chan bool) 249 cc.lingeringResources = make(chan lingerResourse, 1024) 250 cc.imageFactory = DefaultImageFactory 251 252 cc.pubs = make([]*publicationStateDefn, 0) 253 cc.subs = make([]*subscriptionStateDefn, 0) 254 cc.counters = make(map[int64]*counterStateDefn) 255 cc.availableCounterHandlers = make([]*IdAndAvailableCounterHandler, 0) 256 cc.unavailableCounterHandlers = make([]*IdAndUnavailableCounterHandler, 0) 257 return cc 258 } 259 260 // Close will terminate the Run() goroutine body and close all active publications and subscription. Run() can 261 // be restarted in a another goroutine. 262 func (cc *ClientConductor) Close() (err error) { 263 logger.Debugf("Closing ClientConductor") 264 265 now := time.Now().UnixNano() 266 267 running := cc.running.Get() 268 269 cc.closeAllResources(now) 270 if running { 271 cc.driverProxy.ClientClose() 272 } 273 274 timeoutDuration := 5 * time.Second 275 timeout := time.Now().Add(timeoutDuration) 276 for cc.conductorRunning.Get() && time.Now().Before(timeout) { 277 time.Sleep(10 * time.Millisecond) 278 } 279 if cc.conductorRunning.Get() { 280 msg := fmt.Sprintf("failed to stop conductor after %v", timeoutDuration) 281 logger.Warning(msg) 282 err = errors.New(msg) 283 } 284 285 logger.Debugf("Closed ClientConductor") 286 return err 287 } 288 289 // Start begins the main execution loop of ClientConductor on a goroutine. 290 func (cc *ClientConductor) Start(idleStrategy idlestrategy.Idler) { 291 cc.running.Set(true) 292 go cc.run(idleStrategy) 293 } 294 295 // run is the main execution loop of ClientConductor. 296 func (cc *ClientConductor) run(idleStrategy idlestrategy.Idler) { 297 now := time.Now().UnixNano() 298 cc.timeOfLastKeepalive = now 299 cc.timeOfLastCheckManagedResources = now 300 cc.timeOfLastDoWork = now 301 302 // Stay on the same thread for performance 303 runtime.LockOSThread() 304 305 // Clean exit from this particular go routine 306 defer func() { 307 if err := recover(); err != nil { 308 errStr := fmt.Sprintf("Panic: %v", err) 309 logger.Error(errStr) 310 cc.onError(errors.New(errStr)) 311 cc.running.Set(false) 312 } 313 cc.forceCloseResources() 314 cc.conductorRunning.Set(false) 315 316 logger.Infof("ClientConductor done") 317 }() 318 319 cc.conductorRunning.Set(true) 320 for cc.running.Get() { 321 workCount, err := cc.doWork() 322 if err != nil { 323 cc.onError(err) 324 return 325 } 326 idleStrategy.Idle(workCount) 327 } 328 } 329 330 func (cc *ClientConductor) forceCloseResources() { 331 for { 332 select { 333 case r := <-cc.lingeringResources: 334 logger.Debugf("Force closing resource: %v", r) 335 res := r.resource 336 if res != nil { 337 err := res.Close() 338 if err != nil { 339 logger.Warningf("Failed to force close resource: %v", err) 340 cc.onError(err) 341 } 342 } 343 default: 344 return 345 } 346 } 347 } 348 349 func (cc *ClientConductor) doWork() (int, error) { 350 workCount := cc.driverListenerAdapter.ReceiveMessages() 351 heartbeats, err := cc.onHeartbeatCheckTimeouts() 352 return workCount + heartbeats, err 353 } 354 355 func (cc *ClientConductor) getDriverStatus() error { 356 if cc.driverActive.Get() { 357 return nil 358 } else { 359 return errors.New("driver is inactive") 360 } 361 } 362 363 // AddPublication sends the add publication command through the driver proxy 364 func (cc *ClientConductor) AddPublication(channel string, streamID int32) (int64, error) { 365 logger.Debugf("AddPublication: channel=%s, streamId=%d", channel, streamID) 366 367 if err := cc.getDriverStatus(); err != nil { 368 return 0, err 369 } 370 371 cc.adminLock.Lock() 372 defer cc.adminLock.Unlock() 373 374 now := time.Now().UnixNano() 375 376 regID, err := cc.driverProxy.AddPublication(channel, streamID) 377 if err != nil { 378 return 0, err 379 } 380 381 pubState := new(publicationStateDefn) 382 pubState.Init(channel, regID, streamID, now) 383 384 cc.pubs = append(cc.pubs, pubState) 385 386 return regID, nil 387 } 388 389 // AddExclusivePublication sends the add publication command through the driver proxy 390 func (cc *ClientConductor) AddExclusivePublication(channel string, streamID int32) (int64, error) { 391 logger.Debugf("AddExclusivePublication: channel=%s, streamId=%d", channel, streamID) 392 393 if err := cc.getDriverStatus(); err != nil { 394 return 0, err 395 } 396 397 cc.adminLock.Lock() 398 defer cc.adminLock.Unlock() 399 400 now := time.Now().UnixNano() 401 402 regID, err := cc.driverProxy.AddExclusivePublication(channel, streamID) 403 if err != nil { 404 return 0, err 405 } 406 407 pubState := new(publicationStateDefn) 408 pubState.Init(channel, regID, streamID, now) 409 410 cc.pubs = append(cc.pubs, pubState) 411 412 return regID, nil 413 } 414 415 func (cc *ClientConductor) FindPublication(registrationID int64) (*Publication, error) { 416 417 cc.adminLock.Lock() 418 defer cc.adminLock.Unlock() 419 420 var publication *Publication 421 for _, pub := range cc.pubs { 422 if pub.regID != registrationID { 423 continue 424 } 425 if pub.publication != nil { 426 return pub.publication, nil 427 } 428 switch pub.status { 429 case RegistrationStatus.AwaitingMediaDriver: 430 return nil, timeoutExceeded(pub.timeOfRegistration, cc.driverTimeoutNs) 431 case RegistrationStatus.RegisteredMediaDriver: 432 publication = NewPublication(pub.buffers) 433 publication.conductor = cc 434 publication.channel = pub.channel 435 publication.regID = registrationID 436 publication.originalRegID = pub.origRegID 437 publication.streamID = pub.streamID 438 publication.sessionID = pub.sessionID 439 publication.pubLimit = NewPosition(cc.counterValuesBuffer, pub.posLimitCounterID) 440 publication.channelStatusIndicatorID = pub.channelStatusIndicatorID 441 pub.publication = publication 442 return publication, nil 443 case RegistrationStatus.ErroredMediaDriver: 444 return nil, fmt.Errorf("error on %d: %d: %s", registrationID, pub.errorCode, pub.errorMessage) 445 default: 446 return nil, errors.New("unknown registration status") 447 } 448 } 449 return nil, fmt.Errorf("registration ID %d cannot be found", registrationID) 450 } 451 452 func (cc *ClientConductor) releasePublication(regID int64) error { 453 logger.Debugf("ReleasePublication: regID=%d", regID) 454 455 if err := cc.getDriverStatus(); err != nil { 456 return err 457 } 458 459 cc.adminLock.Lock() 460 defer cc.adminLock.Unlock() 461 462 now := time.Now().UnixNano() 463 464 pubcnt := len(cc.pubs) 465 for i, pub := range cc.pubs { 466 if pub != nil && pub.regID == regID { 467 if err := cc.driverProxy.RemovePublication(regID); err != nil { 468 return err 469 } 470 471 cc.pubs[i] = cc.pubs[pubcnt-1] 472 cc.pubs[pubcnt-1] = nil 473 pubcnt-- 474 475 if pub.buffers.DecRef() == 0 { 476 cc.lingeringResources <- lingerResourse{now, pub.buffers} 477 } 478 } 479 } 480 cc.pubs = cc.pubs[:pubcnt] 481 return nil 482 } 483 484 // AddSubscription sends the add subscription command through the driver proxy 485 func (cc *ClientConductor) AddSubscription(channel string, streamID int32) (int64, error) { 486 return cc.AddSubscriptionWithHandlers(channel, streamID, 487 cc.onAvailableImageHandler, cc.onUnavailableImageHandler) 488 } 489 490 // AddSubscriptionWithHandlers sends the add subscription command through the driver proxy. It will use the specified Handlers for 491 // available/unavailable Images instead of the default handlers. 492 func (cc *ClientConductor) AddSubscriptionWithHandlers(channel string, streamID int32, 493 onAvailableImage AvailableImageHandler, onUnavailableImage UnavailableImageHandler) (int64, error) { 494 logger.Debugf("AddSubscription: channel=%s, streamId=%d", channel, streamID) 495 496 if err := cc.getDriverStatus(); err != nil { 497 return 0, err 498 } 499 500 cc.adminLock.Lock() 501 defer cc.adminLock.Unlock() 502 503 now := time.Now().UnixNano() 504 505 regID, err := cc.driverProxy.AddSubscription(channel, streamID) 506 if err != nil { 507 return 0, err 508 } 509 510 subState := new(subscriptionStateDefn) 511 subState.Init(channel, regID, streamID, now, onAvailableImage, onUnavailableImage) 512 513 cc.subs = append(cc.subs, subState) 514 515 return regID, nil 516 } 517 518 // FindSubscription by Registration ID, which is returned by AddSubscription. Returns the Subscription or an error. 519 // A pending Subscription will return nil,nil signifying that there is neither a Subscription nor an error. 520 func (cc *ClientConductor) FindSubscription(registrationID int64) (*Subscription, error) { 521 cc.adminLock.Lock() 522 defer cc.adminLock.Unlock() 523 524 for _, sub := range cc.subs { 525 if sub.regID != registrationID { 526 continue 527 } 528 switch sub.status { 529 case RegistrationStatus.AwaitingMediaDriver: 530 return nil, timeoutExceeded(sub.timeOfRegistration, cc.driverTimeoutNs) 531 case RegistrationStatus.RegisteredMediaDriver: 532 return sub.subscription, nil 533 case RegistrationStatus.ErroredMediaDriver: 534 return nil, fmt.Errorf("error on %d: %d: %s", registrationID, sub.errorCode, sub.errorMessage) 535 default: 536 return nil, errors.New("unknown registration status") 537 } 538 } 539 540 return nil, fmt.Errorf("registration ID %d cannot be found", registrationID) 541 } 542 543 func timeoutExceeded(timeOfRegistration int64, driverTimeoutNs int64) error { 544 if now := time.Now().UnixNano(); now > (timeOfRegistration + driverTimeoutNs) { 545 return fmt.Errorf("no response from driver. started: %d, now: %d, to: %d", 546 timeOfRegistration/time.Millisecond.Nanoseconds(), 547 now/time.Millisecond.Nanoseconds(), 548 driverTimeoutNs/time.Millisecond.Nanoseconds()) 549 } 550 return nil 551 } 552 553 func (cc *ClientConductor) releaseSubscription(regID int64, images []Image) error { 554 logger.Debugf("ReleaseSubscription: regID=%d", regID) 555 556 if err := cc.getDriverStatus(); err != nil { 557 return err 558 } 559 560 cc.adminLock.Lock() 561 defer cc.adminLock.Unlock() 562 563 now := time.Now().UnixNano() 564 565 subcnt := len(cc.subs) 566 for i, sub := range cc.subs { 567 if sub != nil && sub.regID == regID { 568 if logger.IsEnabledFor(logging.DEBUG) { 569 logger.Debugf("Removing subscription: %d; %v", regID, images) 570 } 571 572 if err := cc.driverProxy.RemoveSubscription(regID); err != nil { 573 return err 574 } 575 576 cc.subs[i] = cc.subs[subcnt-1] 577 cc.subs[subcnt-1] = nil 578 subcnt-- 579 var handler func(Image) 580 if sub.subscription != nil { 581 handler = sub.subscription.UnavailableImageHandler() 582 } 583 584 for i := range images { 585 image := images[i] 586 if handler != nil { 587 handler(image) 588 } 589 cc.lingeringResources <- lingerResourse{now, image} 590 } 591 } 592 } 593 cc.subs = cc.subs[:subcnt] 594 return nil 595 } 596 597 func (cc *ClientConductor) releaseCounter(counter Counter) error { 598 logger.Debugf("releaseCounter: regId=%d", counter.RegistrationId()) 599 600 if err := cc.getDriverStatus(); err != nil { 601 return err 602 } 603 604 cc.adminLock.Lock() 605 defer cc.adminLock.Unlock() 606 607 registrationId := counter.RegistrationId() 608 if _, ok := cc.counters[registrationId]; ok { 609 delete(cc.counters, registrationId) 610 _, err := cc.driverProxy.RemoveCounter(registrationId) 611 return err 612 } 613 return nil 614 } 615 616 // AddDestination sends the add destination command through the driver proxy 617 func (cc *ClientConductor) AddDestination(registrationID int64, endpointChannel string) error { 618 logger.Debugf("AddDestination: regID=%d endpointChannel=%s", registrationID, endpointChannel) 619 620 if err := cc.getDriverStatus(); err != nil { 621 return err 622 } 623 624 cc.adminLock.Lock() 625 defer cc.adminLock.Unlock() 626 627 _, err := cc.driverProxy.AddDestination(registrationID, endpointChannel) 628 return err 629 } 630 631 // RemoveDestination sends the remove destination command through the driver proxy 632 func (cc *ClientConductor) RemoveDestination(registrationID int64, endpointChannel string) error { 633 logger.Debugf("RemoveDestination: regID=%d endpointChannel=%s", registrationID, endpointChannel) 634 635 if err := cc.getDriverStatus(); err != nil { 636 return err 637 } 638 639 cc.adminLock.Lock() 640 defer cc.adminLock.Unlock() 641 642 _, err := cc.driverProxy.RemoveDestination(registrationID, endpointChannel) 643 return err 644 } 645 646 // AddRcvDestination sends the add rcv destination command through the driver proxy 647 func (cc *ClientConductor) AddRcvDestination(registrationID int64, endpointChannel string) error { 648 logger.Debugf("AddRcvDestination: regID=%d endpointChannel=%s", registrationID, endpointChannel) 649 650 if err := cc.getDriverStatus(); err != nil { 651 return err 652 } 653 654 cc.adminLock.Lock() 655 defer cc.adminLock.Unlock() 656 657 _, err := cc.driverProxy.AddRcvDestination(registrationID, endpointChannel) 658 return err 659 } 660 661 // RemoveRcvDestination sends the remove rcv destination command through the driver proxy 662 func (cc *ClientConductor) RemoveRcvDestination(registrationID int64, endpointChannel string) error { 663 logger.Debugf("RemoveRcvDestination: regID=%d endpointChannel=%s", registrationID, endpointChannel) 664 665 if err := cc.getDriverStatus(); err != nil { 666 return err 667 } 668 669 cc.adminLock.Lock() 670 defer cc.adminLock.Unlock() 671 672 _, err := cc.driverProxy.RemoveRcvDestination(registrationID, endpointChannel) 673 return err 674 } 675 676 func (cc *ClientConductor) AddCounter(typeId int32, keyBuffer *atomic.Buffer, keyOffset int32, keyLength int32, 677 labelBuffer *atomic.Buffer, labelOffset int32, labelLength int32) (int64, error) { 678 logger.Debugf("AddCounter: typeId=%d", typeId) 679 680 if err := cc.getDriverStatus(); err != nil { 681 return 0, err 682 } 683 if keyLength < 0 || keyLength > ctr.MaxKeyLength { 684 return 0, fmt.Errorf("key length out of bounds: %d", keyLength) 685 } 686 if labelLength < 0 || labelLength > ctr.MaxLabelLength { 687 return 0, fmt.Errorf("label length out of bounds: %d", labelLength) 688 } 689 cc.adminLock.Lock() 690 defer cc.adminLock.Unlock() 691 692 now := time.Now().UnixNano() 693 694 registrationId, err := cc.driverProxy.AddCounter( 695 typeId, keyBuffer, keyOffset, keyLength, labelBuffer, labelOffset, labelLength) 696 if err != nil { 697 return 0, err 698 } 699 counterState := new(counterStateDefn) 700 counterState.Init(now) 701 cc.counters[registrationId] = counterState 702 703 return registrationId, nil 704 } 705 706 func (cc *ClientConductor) AddCounterByLabel(typeId int32, label string) (int64, error) { 707 logger.Debugf("AddCounterByLabel: typeId=%d, label=%s", typeId, label) 708 709 if err := cc.getDriverStatus(); err != nil { 710 return 0, err 711 } 712 if int32(len(label)) > ctr.MaxLabelLength { 713 return 0, fmt.Errorf("label length out of bounds: %d", len(label)) 714 } 715 cc.adminLock.Lock() 716 defer cc.adminLock.Unlock() 717 718 now := time.Now().UnixNano() 719 720 registrationId, err := cc.driverProxy.AddCounterByLabel(typeId, label) 721 if err != nil { 722 return 0, err 723 } 724 counterState := new(counterStateDefn) 725 counterState.Init(now) 726 cc.counters[registrationId] = counterState 727 728 return registrationId, nil 729 } 730 731 func (cc *ClientConductor) FindCounter(registrationID int64) (*Counter, error) { 732 cc.adminLock.Lock() 733 defer cc.adminLock.Unlock() 734 counterDef, ok := cc.counters[registrationID] 735 if !ok { 736 return nil, fmt.Errorf("registration ID %d cannot be found", registrationID) 737 } 738 if counterDef.counter != nil { 739 return counterDef.counter, nil 740 } 741 switch counterDef.status { 742 case RegistrationStatus.AwaitingMediaDriver: 743 return nil, timeoutExceeded(counterDef.timeOfRegistration, cc.driverTimeoutNs) 744 case RegistrationStatus.RegisteredMediaDriver: 745 counter, err := NewCounter(registrationID, cc, counterDef.counterId) 746 if err != nil { 747 return nil, err 748 } 749 counterDef.counter = counter 750 return counter, nil 751 case RegistrationStatus.ErroredMediaDriver: 752 return nil, fmt.Errorf("error on %d: %d: %s", 753 registrationID, counterDef.errorCode, counterDef.errorMessage) 754 default: 755 return nil, errors.New("unknown registration status") 756 } 757 758 } 759 760 func (cc *ClientConductor) AddAvailableCounterHandler(handler AvailableCounterHandler) int64 { 761 cc.adminLock.Lock() 762 defer cc.adminLock.Unlock() 763 registrationID := cc.driverProxy.NextCorrelationID() 764 cc.availableCounterHandlers = append(cc.availableCounterHandlers, NewIdAndAvailablePair(registrationID, handler)) 765 return registrationID 766 } 767 768 func (cc *ClientConductor) RemoveAvailableCounterHandlerById(registrationId int64) bool { 769 cc.adminLock.Lock() 770 defer cc.adminLock.Unlock() 771 for i, pair := range cc.availableCounterHandlers { 772 if pair.registrationId == registrationId { 773 cc.availableCounterHandlers[i] = cc.availableCounterHandlers[len(cc.availableCounterHandlers)-1] 774 cc.availableCounterHandlers = cc.availableCounterHandlers[:len(cc.availableCounterHandlers)-1] 775 return true 776 } 777 } 778 return false 779 } 780 781 func (cc *ClientConductor) RemoveAvailableCounterHandler(handler AvailableCounterHandler) bool { 782 cc.adminLock.Lock() 783 defer cc.adminLock.Unlock() 784 for i, pair := range cc.availableCounterHandlers { 785 if pair.handler == handler { 786 cc.availableCounterHandlers[i] = cc.availableCounterHandlers[len(cc.availableCounterHandlers)-1] 787 cc.availableCounterHandlers = cc.availableCounterHandlers[:len(cc.availableCounterHandlers)-1] 788 return true 789 } 790 } 791 return false 792 } 793 794 func (cc *ClientConductor) AddUnavailableCounterHandler(handler UnavailableCounterHandler) int64 { 795 cc.adminLock.Lock() 796 defer cc.adminLock.Unlock() 797 registrationID := cc.driverProxy.NextCorrelationID() 798 cc.unavailableCounterHandlers = append(cc.unavailableCounterHandlers, NewIdAndUnavailablePair(registrationID, handler)) 799 return registrationID 800 } 801 802 func (cc *ClientConductor) RemoveUnavailableCounterHandlerById(registrationId int64) bool { 803 cc.adminLock.Lock() 804 defer cc.adminLock.Unlock() 805 for i, pair := range cc.unavailableCounterHandlers { 806 if pair.registrationId == registrationId { 807 cc.unavailableCounterHandlers[i] = cc.unavailableCounterHandlers[len(cc.unavailableCounterHandlers)-1] 808 cc.unavailableCounterHandlers = cc.unavailableCounterHandlers[:len(cc.unavailableCounterHandlers)-1] 809 return true 810 } 811 } 812 return false 813 } 814 815 func (cc *ClientConductor) RemoveUnavailableCounterHandler(handler UnavailableCounterHandler) bool { 816 cc.adminLock.Lock() 817 defer cc.adminLock.Unlock() 818 for i, pair := range cc.unavailableCounterHandlers { 819 if &pair.handler == &handler { 820 cc.unavailableCounterHandlers[i] = cc.unavailableCounterHandlers[len(cc.unavailableCounterHandlers)-1] 821 cc.unavailableCounterHandlers = cc.unavailableCounterHandlers[:len(cc.unavailableCounterHandlers)-1] 822 return true 823 } 824 } 825 return false 826 } 827 828 func (cc *ClientConductor) OnNewPublication(streamID int32, sessionID int32, posLimitCounterID int32, 829 channelStatusIndicatorID int32, logFileName string, regID int64, origRegID int64) { 830 831 logger.Debugf("OnNewPublication: streamId=%d, sessionId=%d, posLimitCounterID=%d, channelStatusIndicatorID=%d, logFileName=%s, correlationID=%d, regID=%d", 832 streamID, sessionID, posLimitCounterID, channelStatusIndicatorID, logFileName, regID, origRegID) 833 834 cc.adminLock.Lock() 835 defer cc.adminLock.Unlock() 836 837 for _, pubDef := range cc.pubs { 838 if pubDef.regID == regID { 839 pubDef.status = RegistrationStatus.RegisteredMediaDriver 840 pubDef.sessionID = sessionID 841 pubDef.posLimitCounterID = posLimitCounterID 842 pubDef.channelStatusIndicatorID = channelStatusIndicatorID 843 pubDef.buffers = logbuffer.Wrap(logFileName) 844 pubDef.buffers.IncRef() 845 pubDef.origRegID = origRegID 846 847 logger.Debugf("Updated publication: %v", pubDef) 848 849 if cc.onNewPublicationHandler != nil { 850 cc.onNewPublicationHandler(pubDef.channel, streamID, sessionID, regID) 851 } 852 } 853 } 854 } 855 856 // TODO Implement logic specific to exclusive publications 857 func (cc *ClientConductor) OnNewExclusivePublication(streamID int32, sessionID int32, posLimitCounterID int32, 858 channelStatusIndicatorID int32, logFileName string, regID int64, origRegID int64) { 859 860 logger.Debugf("OnNewExclusivePublication: streamId=%d, sessionId=%d, posLimitCounterID=%d, channelStatusIndicatorID=%d, logFileName=%s, correlationID=%d, regID=%d", 861 streamID, sessionID, posLimitCounterID, channelStatusIndicatorID, logFileName, regID, origRegID) 862 863 cc.adminLock.Lock() 864 defer cc.adminLock.Unlock() 865 866 for _, pubDef := range cc.pubs { 867 if pubDef.regID == regID { 868 pubDef.status = RegistrationStatus.RegisteredMediaDriver 869 pubDef.sessionID = sessionID 870 pubDef.posLimitCounterID = posLimitCounterID 871 pubDef.channelStatusIndicatorID = channelStatusIndicatorID 872 pubDef.buffers = logbuffer.Wrap(logFileName) 873 pubDef.buffers.IncRef() 874 pubDef.origRegID = origRegID 875 876 logger.Debugf("Updated publication: %v", pubDef) 877 878 if cc.onNewPublicationHandler != nil { 879 cc.onNewPublicationHandler(pubDef.channel, streamID, sessionID, regID) 880 } 881 } 882 } 883 } 884 885 func (cc *ClientConductor) OnAvailableCounter(registrationId int64, counterId int32) { 886 logger.Debugf("OnAvailableCounter: registrationId=%d, counterId=%d", 887 registrationId, counterId) 888 889 cc.adminLock.Lock() 890 defer cc.adminLock.Unlock() 891 892 counterDef, ok := cc.counters[registrationId] 893 if ok && counterDef.status == RegistrationStatus.AwaitingMediaDriver { 894 counterDef.counterId = counterId 895 counterDef.status = RegistrationStatus.RegisteredMediaDriver 896 } 897 for _, handler := range cc.availableCounterHandlers { 898 handler.handler.Handle(cc.counterReader, registrationId, counterId) 899 } 900 } 901 902 func (cc *ClientConductor) OnUnavailableCounter(registrationId int64, counterId int32) { 903 logger.Debugf("OnUnavailableCounter: registrationId=%d, counterId=%d", 904 registrationId, counterId) 905 906 cc.adminLock.Lock() 907 defer cc.adminLock.Unlock() 908 909 for _, handler := range cc.unavailableCounterHandlers { 910 handler.handler.Handle(cc.counterReader, registrationId, counterId) 911 } 912 } 913 914 func (cc *ClientConductor) OnClientTimeout(clientID int64) { 915 logger.Debugf("OnClientTimeout: clientID=%d", clientID) 916 917 cc.adminLock.Lock() 918 defer cc.adminLock.Unlock() 919 920 if clientID == cc.driverProxy.ClientID() { 921 errStr := fmt.Sprintf("OnClientTimeout for ClientID:%d", clientID) 922 cc.onError(errors.New(errStr)) 923 cc.running.Set(false) 924 } 925 } 926 927 func (cc *ClientConductor) OnSubscriptionReady(correlationID int64, channelStatusIndicatorID int32) { 928 logger.Debugf("OnSubscriptionReady: correlationID=%d, channelStatusIndicatorID=%d", 929 correlationID, channelStatusIndicatorID) 930 931 cc.adminLock.Lock() 932 defer cc.adminLock.Unlock() 933 934 for _, sub := range cc.subs { 935 936 if sub.regID == correlationID { 937 sub.status = RegistrationStatus.RegisteredMediaDriver 938 sub.subscription = NewSubscription( 939 cc, sub.channel, correlationID, sub.streamID, channelStatusIndicatorID, 940 sub.availableImageHandler, sub.unavailableImageHandler) 941 942 if cc.onNewSubscriptionHandler != nil { 943 cc.onNewSubscriptionHandler(sub.channel, sub.streamID, correlationID) 944 } 945 } 946 } 947 948 } 949 950 func DefaultImageFactory(sessionID int32, corrID int64, logFilename string, subRegId int64, sourceIdentity string, 951 counterValuesBuffer *atomic.Buffer, subscriberPositionID int32) Image { 952 image := NewImage(sessionID, corrID, logbuffer.Wrap(logFilename)) 953 image.subscriptionRegistrationID = subRegId 954 image.sourceIdentity = sourceIdentity 955 image.subscriberPosition = NewPosition(counterValuesBuffer, subscriberPositionID) 956 logger.Debugf("OnAvailableImage: new image position: %v -> %d", 957 image.subscriberPosition, image.subscriberPosition.get()) 958 return image 959 } 960 961 //go:norace 962 func (cc *ClientConductor) OnAvailableImage(streamID int32, sessionID int32, logFilename string, sourceIdentity string, 963 subscriberPositionID int32, subsRegID int64, corrID int64) { 964 logger.Debugf("OnAvailableImage: streamId=%d, sessionId=%d, logFilename=%s, sourceIdentity=%s, subsRegID=%d, corrID=%d", 965 streamID, sessionID, logFilename, sourceIdentity, subsRegID, corrID) 966 967 cc.adminLock.Lock() 968 defer cc.adminLock.Unlock() 969 970 for _, sub := range cc.subs { 971 972 // if sub.streamID == streamID && sub.subscription != nil { 973 if sub.subscription != nil { 974 // logger.Debugf("OnAvailableImage: sub.regID=%d subsRegID=%d corrID=%d %#v", sub.regID, subsRegID, corrID, sub) 975 if sub.regID == subsRegID { 976 image := cc.imageFactory(sessionID, corrID, logFilename, sub.regID, sourceIdentity, 977 cc.counterValuesBuffer, subscriberPositionID) 978 979 sub.subscription.addImage(image) 980 981 if handler := sub.subscription.AvailableImageHandler(); handler != nil { 982 handler(image) 983 } 984 } 985 } 986 } 987 } 988 989 func (cc *ClientConductor) OnUnavailableImage(corrID int64, subscriptionRegistrationID int64) { 990 logger.Debugf("OnUnavailableImage: corrID=%d subscriptionRegistrationID=%d", corrID, subscriptionRegistrationID) 991 992 cc.adminLock.Lock() 993 defer cc.adminLock.Unlock() 994 995 for _, sub := range cc.subs { 996 if sub.regID == subscriptionRegistrationID { 997 if sub.subscription != nil { 998 image := sub.subscription.removeImage(corrID) 999 if handler := sub.subscription.UnavailableImageHandler(); handler != nil { 1000 handler(image) 1001 } 1002 cc.lingeringResources <- lingerResourse{time.Now().UnixNano(), image} 1003 runtime.KeepAlive(image) 1004 } 1005 } 1006 } 1007 } 1008 1009 func (cc *ClientConductor) OnOperationSuccess(corrID int64) { 1010 logger.Debugf("OnOperationSuccess: correlationId=%d", corrID) 1011 1012 cc.adminLock.Lock() 1013 defer cc.adminLock.Unlock() 1014 1015 } 1016 1017 func (cc *ClientConductor) OnChannelEndpointError(corrID int64, errorMessage string) { 1018 logger.Debugf("OnChannelEndpointError: correlationID=%d, errorMessage=%s", corrID, errorMessage) 1019 1020 cc.adminLock.Lock() 1021 defer cc.adminLock.Unlock() 1022 1023 statusIndicatorId := int32(corrID) 1024 1025 for _, pubDef := range cc.pubs { 1026 if pubDef.publication != nil && pubDef.publication.ChannelStatusID() == statusIndicatorId { 1027 cc.onError(fmt.Errorf(errorMessage)) 1028 } 1029 } 1030 1031 for _, subDef := range cc.subs { 1032 if subDef.subscription != nil && subDef.subscription.ChannelStatusId() == statusIndicatorId { 1033 cc.onError(fmt.Errorf(errorMessage)) 1034 } 1035 } 1036 } 1037 1038 func (cc *ClientConductor) OnErrorResponse(corrID int64, errorCode int32, errorMessage string) { 1039 logger.Debugf("OnErrorResponse: correlationID=%d, errorCode=%d, errorMessage=%s", corrID, errorCode, errorMessage) 1040 1041 cc.adminLock.Lock() 1042 defer cc.adminLock.Unlock() 1043 1044 if counterDef, ok := cc.counters[corrID]; ok { 1045 counterDef.status = RegistrationStatus.ErroredMediaDriver 1046 counterDef.errorCode = errorCode 1047 counterDef.errorMessage = errorMessage 1048 return 1049 } 1050 1051 for _, pubDef := range cc.pubs { 1052 if pubDef.regID == corrID { 1053 pubDef.status = RegistrationStatus.ErroredMediaDriver 1054 pubDef.errorCode = errorCode 1055 pubDef.errorMessage = errorMessage 1056 return 1057 } 1058 } 1059 1060 for _, subDef := range cc.subs { 1061 if subDef.regID == corrID { 1062 subDef.status = RegistrationStatus.ErroredMediaDriver 1063 subDef.errorCode = errorCode 1064 subDef.errorMessage = errorMessage 1065 } 1066 } 1067 } 1068 1069 func (cc *ClientConductor) onHeartbeatCheckTimeouts() (int, error) { 1070 var result int 1071 1072 now := time.Now().UnixNano() 1073 1074 if now > (cc.timeOfLastDoWork + cc.interServiceTimeoutNs) { 1075 cc.closeAllResources(now) 1076 1077 return 0, fmt.Errorf("timeout between service calls over %d ms (%d > %d + %d) (%d)", 1078 cc.interServiceTimeoutNs/time.Millisecond.Nanoseconds(), 1079 now/time.Millisecond.Nanoseconds(), 1080 cc.timeOfLastDoWork, 1081 cc.interServiceTimeoutNs/time.Millisecond.Nanoseconds(), 1082 (now-cc.timeOfLastDoWork)/time.Millisecond.Nanoseconds()) 1083 } 1084 1085 cc.timeOfLastDoWork = now 1086 1087 if now > (cc.timeOfLastKeepalive + keepaliveTimeoutNS) { 1088 age := cc.driverProxy.TimeOfLastDriverKeepalive()*time.Millisecond.Nanoseconds() + cc.driverTimeoutNs 1089 if now > age { 1090 cc.driverActive.Set(false) 1091 return 0, fmt.Errorf("MediaDriver keepalive (ms): age=%d > timeout=%d", 1092 age, 1093 cc.driverTimeoutNs/time.Millisecond.Nanoseconds(), 1094 ) 1095 } 1096 1097 if cc.heartbeatTimestamp != nil { 1098 registrationID, ctrErr := cc.counterReader.GetKeyPartInt64(cc.heartbeatTimestamp.CounterId, heartheatRegistrationIdOffset) 1099 if ctrErr == nil && registrationID == cc.driverProxy.ClientID() { 1100 cc.heartbeatTimestamp.Set(now / time.Millisecond.Nanoseconds()) 1101 } else { 1102 cc.closeAllResources(now) 1103 return 0, fmt.Errorf("client heartbeat timestamp not active") 1104 } 1105 } else { 1106 counterId := cc.counterReader.FindCounter(heartbeatTypeId, func(keyBuffer *atomic.Buffer) bool { 1107 return keyBuffer.GetInt64(heartheatRegistrationIdOffset) == cc.driverProxy.ClientID() 1108 }) 1109 if counterId != ctr.NullCounterId { 1110 var ctrErr error 1111 if cc.heartbeatTimestamp, ctrErr = ctr.NewAtomicCounter(cc.counterReader, counterId); ctrErr != nil { 1112 logger.Warning("unable to allocate heartbeat counter %d", counterId) 1113 } else { 1114 cc.heartbeatTimestamp.Set(now / time.Millisecond.Nanoseconds()) 1115 } 1116 } 1117 } 1118 1119 cc.timeOfLastKeepalive = now 1120 result = 1 1121 } 1122 1123 if now > (cc.timeOfLastCheckManagedResources + resourceTimeoutNS) { 1124 cc.onCheckManagedResources(now) 1125 cc.timeOfLastCheckManagedResources = now 1126 result = 1 1127 } 1128 1129 return result, nil 1130 } 1131 1132 func (cc *ClientConductor) onCheckManagedResources(now int64) { 1133 moreToCheck := true 1134 for moreToCheck { 1135 select { 1136 case r := <-cc.lingeringResources: 1137 logger.Debugf("Resource to linger: %v", r) 1138 if cc.resourceLingerTimeoutNs < now-r.lastTime { 1139 res := r.resource 1140 logger.Debugf("lingering resource expired(%dms old): %v", 1141 (now-r.lastTime)/time.Millisecond.Nanoseconds(), res) 1142 if res != nil { 1143 err := res.Close() 1144 if err != nil { 1145 logger.Warningf("Failed to close lingering resource: %v", err) 1146 cc.onError(err) 1147 } 1148 } 1149 } else { 1150 // The assumption is that resources are queued in order 1151 moreToCheck = false 1152 // FIXME ..and we're breaking it here, but since there is no peek... 1153 cc.lingeringResources <- r 1154 } 1155 default: 1156 moreToCheck = false 1157 } 1158 } 1159 } 1160 1161 func (cc *ClientConductor) isPublicationConnected(timeOfLastStatusMessage int64) bool { 1162 return time.Now().UnixNano() <= (timeOfLastStatusMessage*int64(time.Millisecond) + cc.publicationConnectionTimeoutNs) 1163 } 1164 1165 func (cc *ClientConductor) CounterReader() *ctr.Reader { 1166 return cc.counterReader 1167 } 1168 1169 func (cc *ClientConductor) closeAllResources(now int64) { 1170 var err error 1171 if cc.running.CompareAndSet(true, false) { 1172 for _, pub := range cc.pubs { 1173 if pub != nil && pub.publication != nil { 1174 err = pub.publication.Close() 1175 if err != nil { 1176 cc.onError(err) 1177 } 1178 } 1179 } 1180 cc.pubs = nil 1181 1182 for _, sub := range cc.subs { 1183 if sub != nil && sub.subscription != nil { 1184 err = sub.subscription.Close() 1185 if err != nil { 1186 cc.onError(err) 1187 } 1188 } 1189 } 1190 cc.subs = nil 1191 1192 for _, counterDef := range cc.counters { 1193 if counterDef != nil && counterDef.counter != nil { 1194 err = counterDef.counter.Close() 1195 if err != nil { 1196 cc.onError(err) 1197 } 1198 } 1199 } 1200 cc.counters = nil 1201 } 1202 } 1203 1204 func (cc *ClientConductor) onError(err error) { 1205 if cc.errorHandler != nil { 1206 cc.errorHandler(err) 1207 } else { 1208 log.Fatal(err) 1209 } 1210 }