google.golang.org/grpc@v1.72.2/orca/producer.go (about) 1 /* 2 * Copyright 2022 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package orca 18 19 import ( 20 "context" 21 "sync" 22 "time" 23 24 "google.golang.org/grpc" 25 "google.golang.org/grpc/balancer" 26 "google.golang.org/grpc/codes" 27 "google.golang.org/grpc/internal/backoff" 28 "google.golang.org/grpc/orca/internal" 29 "google.golang.org/grpc/status" 30 31 v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3" 32 v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3" 33 v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3" 34 "google.golang.org/protobuf/types/known/durationpb" 35 ) 36 37 type producerBuilder struct{} 38 39 // Build constructs and returns a producer and its cleanup function 40 func (*producerBuilder) Build(cci any) (balancer.Producer, func()) { 41 p := &producer{ 42 client: v3orcaservicegrpc.NewOpenRcaServiceClient(cci.(grpc.ClientConnInterface)), 43 intervals: make(map[time.Duration]int), 44 listeners: make(map[OOBListener]struct{}), 45 backoff: internal.DefaultBackoffFunc, 46 } 47 return p, func() { 48 p.mu.Lock() 49 if p.stop != nil { 50 p.stop() 51 p.stop = nil 52 } 53 p.mu.Unlock() 54 <-p.stopped 55 } 56 } 57 58 var producerBuilderSingleton = &producerBuilder{} 59 60 // OOBListener is used to receive out-of-band load reports as they arrive. 61 type OOBListener interface { 62 // OnLoadReport is called when a load report is received. 63 OnLoadReport(*v3orcapb.OrcaLoadReport) 64 } 65 66 // OOBListenerOptions contains options to control how an OOBListener is called. 67 type OOBListenerOptions struct { 68 // ReportInterval specifies how often to request the server to provide a 69 // load report. May be provided less frequently if the server requires a 70 // longer interval, or may be provided more frequently if another 71 // subscriber requests a shorter interval. 72 ReportInterval time.Duration 73 } 74 75 // RegisterOOBListener registers an out-of-band load report listener on a Ready 76 // sc. Any OOBListener may only be registered once per subchannel at a time. 77 // The returned stop function must be called when no longer needed. Do not 78 // register a single OOBListener more than once per SubConn. 79 func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOptions) (stop func()) { 80 pr, closeFn := sc.GetOrBuildProducer(producerBuilderSingleton) 81 p := pr.(*producer) 82 83 p.registerListener(l, opts.ReportInterval) 84 85 // If stop is called multiple times, prevent it from having any effect on 86 // subsequent calls. 87 return sync.OnceFunc(func() { 88 p.unregisterListener(l, opts.ReportInterval) 89 closeFn() 90 }) 91 } 92 93 type producer struct { 94 client v3orcaservicegrpc.OpenRcaServiceClient 95 96 // backoff is called between stream attempts to determine how long to delay 97 // to avoid overloading a server experiencing problems. The attempt count 98 // is incremented when stream errors occur and is reset when the stream 99 // reports a result. 100 backoff func(int) time.Duration 101 stopped chan struct{} // closed when the run goroutine exits 102 103 mu sync.Mutex 104 intervals map[time.Duration]int // map from interval time to count of listeners requesting that time 105 listeners map[OOBListener]struct{} // set of registered listeners 106 minInterval time.Duration 107 stop func() // stops the current run goroutine 108 } 109 110 // registerListener adds the listener and its requested report interval to the 111 // producer. 112 func (p *producer) registerListener(l OOBListener, interval time.Duration) { 113 p.mu.Lock() 114 defer p.mu.Unlock() 115 116 p.listeners[l] = struct{}{} 117 p.intervals[interval]++ 118 if len(p.listeners) == 1 || interval < p.minInterval { 119 p.minInterval = interval 120 p.updateRunLocked() 121 } 122 } 123 124 // registerListener removes the listener and its requested report interval to 125 // the producer. 126 func (p *producer) unregisterListener(l OOBListener, interval time.Duration) { 127 p.mu.Lock() 128 defer p.mu.Unlock() 129 130 delete(p.listeners, l) 131 p.intervals[interval]-- 132 if p.intervals[interval] == 0 { 133 delete(p.intervals, interval) 134 135 if p.minInterval == interval { 136 p.recomputeMinInterval() 137 p.updateRunLocked() 138 } 139 } 140 } 141 142 // recomputeMinInterval sets p.minInterval to the minimum key's value in 143 // p.intervals. 144 func (p *producer) recomputeMinInterval() { 145 first := true 146 for interval := range p.intervals { 147 if first || interval < p.minInterval { 148 p.minInterval = interval 149 first = false 150 } 151 } 152 } 153 154 // updateRunLocked is called whenever the run goroutine needs to be started / 155 // stopped / restarted due to: 1. the initial listener being registered, 2. the 156 // final listener being unregistered, or 3. the minimum registered interval 157 // changing. 158 func (p *producer) updateRunLocked() { 159 if p.stop != nil { 160 p.stop() 161 p.stop = nil 162 } 163 if len(p.listeners) > 0 { 164 var ctx context.Context 165 ctx, p.stop = context.WithCancel(context.Background()) 166 p.stopped = make(chan struct{}) 167 go p.run(ctx, p.stopped, p.minInterval) 168 } 169 } 170 171 // run manages the ORCA OOB stream on the subchannel. 172 func (p *producer) run(ctx context.Context, done chan struct{}, interval time.Duration) { 173 defer close(done) 174 175 runStream := func() error { 176 resetBackoff, err := p.runStream(ctx, interval) 177 if status.Code(err) == codes.Unimplemented { 178 // Unimplemented; do not retry. 179 logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.") 180 return err 181 } 182 // Retry for all other errors. 183 if code := status.Code(err); code != codes.Unavailable && code != codes.Canceled { 184 // TODO: Unavailable and Canceled should also ideally log an error, 185 // but for now we receive them when shutting down the ClientConn 186 // (Unavailable if the stream hasn't started yet, and Canceled if it 187 // happens mid-stream). Once we can determine the state or ensure 188 // the producer is stopped before the stream ends, we can log an 189 // error when it's not a natural shutdown. 190 logger.Error("Received unexpected stream error:", err) 191 } 192 if resetBackoff { 193 return backoff.ErrResetBackoff 194 } 195 return nil 196 } 197 backoff.RunF(ctx, runStream, p.backoff) 198 } 199 200 // runStream runs a single stream on the subchannel and returns the resulting 201 // error, if any, and whether or not the run loop should reset the backoff 202 // timer to zero or advance it. 203 func (p *producer) runStream(ctx context.Context, interval time.Duration) (resetBackoff bool, err error) { 204 streamCtx, cancel := context.WithCancel(ctx) 205 defer cancel() 206 stream, err := p.client.StreamCoreMetrics(streamCtx, &v3orcaservicepb.OrcaLoadReportRequest{ 207 ReportInterval: durationpb.New(interval), 208 }) 209 if err != nil { 210 return false, err 211 } 212 213 for { 214 report, err := stream.Recv() 215 if err != nil { 216 return resetBackoff, err 217 } 218 resetBackoff = true 219 p.mu.Lock() 220 for l := range p.listeners { 221 l.OnLoadReport(report) 222 } 223 p.mu.Unlock() 224 } 225 }