google.golang.org/grpc@v1.72.2/orca/producer.go (about)

     1  /*
     2   * Copyright 2022 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package orca
    18  
    19  import (
    20  	"context"
    21  	"sync"
    22  	"time"
    23  
    24  	"google.golang.org/grpc"
    25  	"google.golang.org/grpc/balancer"
    26  	"google.golang.org/grpc/codes"
    27  	"google.golang.org/grpc/internal/backoff"
    28  	"google.golang.org/grpc/orca/internal"
    29  	"google.golang.org/grpc/status"
    30  
    31  	v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3"
    32  	v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3"
    33  	v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3"
    34  	"google.golang.org/protobuf/types/known/durationpb"
    35  )
    36  
    37  type producerBuilder struct{}
    38  
    39  // Build constructs and returns a producer and its cleanup function
    40  func (*producerBuilder) Build(cci any) (balancer.Producer, func()) {
    41  	p := &producer{
    42  		client:    v3orcaservicegrpc.NewOpenRcaServiceClient(cci.(grpc.ClientConnInterface)),
    43  		intervals: make(map[time.Duration]int),
    44  		listeners: make(map[OOBListener]struct{}),
    45  		backoff:   internal.DefaultBackoffFunc,
    46  	}
    47  	return p, func() {
    48  		p.mu.Lock()
    49  		if p.stop != nil {
    50  			p.stop()
    51  			p.stop = nil
    52  		}
    53  		p.mu.Unlock()
    54  		<-p.stopped
    55  	}
    56  }
    57  
    58  var producerBuilderSingleton = &producerBuilder{}
    59  
    60  // OOBListener is used to receive out-of-band load reports as they arrive.
    61  type OOBListener interface {
    62  	// OnLoadReport is called when a load report is received.
    63  	OnLoadReport(*v3orcapb.OrcaLoadReport)
    64  }
    65  
    66  // OOBListenerOptions contains options to control how an OOBListener is called.
    67  type OOBListenerOptions struct {
    68  	// ReportInterval specifies how often to request the server to provide a
    69  	// load report.  May be provided less frequently if the server requires a
    70  	// longer interval, or may be provided more frequently if another
    71  	// subscriber requests a shorter interval.
    72  	ReportInterval time.Duration
    73  }
    74  
    75  // RegisterOOBListener registers an out-of-band load report listener on a Ready
    76  // sc.  Any OOBListener may only be registered once per subchannel at a time.
    77  // The returned stop function must be called when no longer needed.  Do not
    78  // register a single OOBListener more than once per SubConn.
    79  func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOptions) (stop func()) {
    80  	pr, closeFn := sc.GetOrBuildProducer(producerBuilderSingleton)
    81  	p := pr.(*producer)
    82  
    83  	p.registerListener(l, opts.ReportInterval)
    84  
    85  	// If stop is called multiple times, prevent it from having any effect on
    86  	// subsequent calls.
    87  	return sync.OnceFunc(func() {
    88  		p.unregisterListener(l, opts.ReportInterval)
    89  		closeFn()
    90  	})
    91  }
    92  
    93  type producer struct {
    94  	client v3orcaservicegrpc.OpenRcaServiceClient
    95  
    96  	// backoff is called between stream attempts to determine how long to delay
    97  	// to avoid overloading a server experiencing problems.  The attempt count
    98  	// is incremented when stream errors occur and is reset when the stream
    99  	// reports a result.
   100  	backoff func(int) time.Duration
   101  	stopped chan struct{} // closed when the run goroutine exits
   102  
   103  	mu          sync.Mutex
   104  	intervals   map[time.Duration]int    // map from interval time to count of listeners requesting that time
   105  	listeners   map[OOBListener]struct{} // set of registered listeners
   106  	minInterval time.Duration
   107  	stop        func() // stops the current run goroutine
   108  }
   109  
   110  // registerListener adds the listener and its requested report interval to the
   111  // producer.
   112  func (p *producer) registerListener(l OOBListener, interval time.Duration) {
   113  	p.mu.Lock()
   114  	defer p.mu.Unlock()
   115  
   116  	p.listeners[l] = struct{}{}
   117  	p.intervals[interval]++
   118  	if len(p.listeners) == 1 || interval < p.minInterval {
   119  		p.minInterval = interval
   120  		p.updateRunLocked()
   121  	}
   122  }
   123  
   124  // registerListener removes the listener and its requested report interval to
   125  // the producer.
   126  func (p *producer) unregisterListener(l OOBListener, interval time.Duration) {
   127  	p.mu.Lock()
   128  	defer p.mu.Unlock()
   129  
   130  	delete(p.listeners, l)
   131  	p.intervals[interval]--
   132  	if p.intervals[interval] == 0 {
   133  		delete(p.intervals, interval)
   134  
   135  		if p.minInterval == interval {
   136  			p.recomputeMinInterval()
   137  			p.updateRunLocked()
   138  		}
   139  	}
   140  }
   141  
   142  // recomputeMinInterval sets p.minInterval to the minimum key's value in
   143  // p.intervals.
   144  func (p *producer) recomputeMinInterval() {
   145  	first := true
   146  	for interval := range p.intervals {
   147  		if first || interval < p.minInterval {
   148  			p.minInterval = interval
   149  			first = false
   150  		}
   151  	}
   152  }
   153  
   154  // updateRunLocked is called whenever the run goroutine needs to be started /
   155  // stopped / restarted due to: 1. the initial listener being registered, 2. the
   156  // final listener being unregistered, or 3. the minimum registered interval
   157  // changing.
   158  func (p *producer) updateRunLocked() {
   159  	if p.stop != nil {
   160  		p.stop()
   161  		p.stop = nil
   162  	}
   163  	if len(p.listeners) > 0 {
   164  		var ctx context.Context
   165  		ctx, p.stop = context.WithCancel(context.Background())
   166  		p.stopped = make(chan struct{})
   167  		go p.run(ctx, p.stopped, p.minInterval)
   168  	}
   169  }
   170  
   171  // run manages the ORCA OOB stream on the subchannel.
   172  func (p *producer) run(ctx context.Context, done chan struct{}, interval time.Duration) {
   173  	defer close(done)
   174  
   175  	runStream := func() error {
   176  		resetBackoff, err := p.runStream(ctx, interval)
   177  		if status.Code(err) == codes.Unimplemented {
   178  			// Unimplemented; do not retry.
   179  			logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.")
   180  			return err
   181  		}
   182  		// Retry for all other errors.
   183  		if code := status.Code(err); code != codes.Unavailable && code != codes.Canceled {
   184  			// TODO: Unavailable and Canceled should also ideally log an error,
   185  			// but for now we receive them when shutting down the ClientConn
   186  			// (Unavailable if the stream hasn't started yet, and Canceled if it
   187  			// happens mid-stream).  Once we can determine the state or ensure
   188  			// the producer is stopped before the stream ends, we can log an
   189  			// error when it's not a natural shutdown.
   190  			logger.Error("Received unexpected stream error:", err)
   191  		}
   192  		if resetBackoff {
   193  			return backoff.ErrResetBackoff
   194  		}
   195  		return nil
   196  	}
   197  	backoff.RunF(ctx, runStream, p.backoff)
   198  }
   199  
   200  // runStream runs a single stream on the subchannel and returns the resulting
   201  // error, if any, and whether or not the run loop should reset the backoff
   202  // timer to zero or advance it.
   203  func (p *producer) runStream(ctx context.Context, interval time.Duration) (resetBackoff bool, err error) {
   204  	streamCtx, cancel := context.WithCancel(ctx)
   205  	defer cancel()
   206  	stream, err := p.client.StreamCoreMetrics(streamCtx, &v3orcaservicepb.OrcaLoadReportRequest{
   207  		ReportInterval: durationpb.New(interval),
   208  	})
   209  	if err != nil {
   210  		return false, err
   211  	}
   212  
   213  	for {
   214  		report, err := stream.Recv()
   215  		if err != nil {
   216  			return resetBackoff, err
   217  		}
   218  		resetBackoff = true
   219  		p.mu.Lock()
   220  		for l := range p.listeners {
   221  			l.OnLoadReport(report)
   222  		}
   223  		p.mu.Unlock()
   224  	}
   225  }