github.com/apache/beam/sdks/v2@v2.48.2/go/examples/native_wordcap/nativepubsubio/native.go (about) 1 // Licensed to the Apache Software Foundation (ASF) under one or more 2 // contributor license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright ownership. 4 // The ASF licenses this file to You under the Apache License, Version 2.0 5 // (the "License"); you may not use this file except in compliance with 6 // the License. You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 // Package nativepubsubio contains a Golang implementation of streaming reads 17 // and writes to PubSub. This is not as fully featured as the cross-language 18 // pubsubio package present in the Beam Go repository and should not be used 19 // in place of it. 20 package nativepubsubio 21 22 import ( 23 "context" 24 "errors" 25 "fmt" 26 "time" 27 28 "cloud.google.com/go/pubsub" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam" 30 "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" 31 "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 32 "github.com/apache/beam/sdks/v2/go/pkg/beam/register" 33 "github.com/apache/beam/sdks/v2/go/pkg/beam/util/pubsubx" 34 ) 35 36 func init() { 37 register.DoFn5x2[context.Context, beam.BundleFinalization, *sdf.LockRTracker, []byte, func(beam.EventTime, []byte), sdf.ProcessContinuation, error](&pubSubRead{}) 38 register.DoFn2x1[context.Context, []byte, error](&pubSubWrite{}) 39 register.Emitter2[beam.EventTime, []byte]() 40 } 41 42 // pubSubRead is a structural DoFn representing a read from a given subscription ID. 43 type pubSubRead struct { 44 ProjectID string 45 Subscription string 46 client *pubsub.Client 47 processedMessages []*pubsub.Message 48 } 49 50 // newPubSubRead inserts an unbounded read from a PubSub topic into the pipeline. If an existing subscription 51 // is provided, the DoFn will read using that subscription; otherwise, a new subscription to the topic 52 // will be created using the provided subscription name. 53 func newPubSubRead(ctx context.Context, projectID, topic, subscription string) (*pubSubRead, error) { 54 if topic == "" { 55 return nil, errors.New("please provide either a topic to read from") 56 } 57 client, err := pubsub.NewClient(ctx, projectID) 58 if err != nil { 59 return nil, err 60 } 61 62 top := client.Topic(topic) 63 if ok, err := top.Exists(ctx); !ok || err != nil { 64 return nil, fmt.Errorf("failed to get topic; exists: %v, error: %v", ok, err) 65 } 66 sub, err := pubsubx.EnsureSubscription(ctx, client, topic, subscription) 67 if err != nil { 68 return nil, err 69 } 70 return &pubSubRead{ProjectID: projectID, Subscription: sub.ID()}, nil 71 } 72 73 // CreateInitialRestriction() establishes the PubSub subscription ID as the 74 // initial restriction 75 func (r *pubSubRead) CreateInitialRestriction(_ []byte) string { 76 return r.Subscription 77 } 78 79 // CreateTracker wraps the PubSub subscription ID in a StaticRTracker 80 // and applies a mutex via LockRTracker. 81 func (r *pubSubRead) CreateTracker(rest string) *sdf.LockRTracker { 82 return sdf.NewLockRTracker(NewSubscriptionRTracker(rest)) 83 } 84 85 // RestrictionSize always returns 1.0, as the restriction is always 1 subscription. 86 func (r *pubSubRead) RestrictionSize(_ []byte, rest string) float64 { 87 return 1.0 88 } 89 90 // SplitRestriction is a no-op as the restriction cannot be split. 91 func (r *pubSubRead) SplitRestriction(_ []byte, rest string) []string { 92 return []string{rest} 93 } 94 95 // Setup initializes a PubSub client if one has not been created already 96 func (r *pubSubRead) Setup(ctx context.Context) error { 97 if r.client == nil { 98 client, err := pubsub.NewClient(ctx, r.ProjectID) 99 if err != nil { 100 return err 101 } 102 r.client = client 103 } 104 return nil 105 } 106 107 var messageTimeout time.Duration = 5 * time.Second 108 109 // ProcessElement initializes a PubSub client if one has not been created already, reads from the PubSub subscription, 110 // and emits elements as it reads them. If no messages are available, the DoFn will schedule itself to resume processing 111 // later. If polling the subscription returns an error, the error will be logged and the DoFn will not reschedule itself. 112 func (r *pubSubRead) ProcessElement(ctx context.Context, bf beam.BundleFinalization, rt *sdf.LockRTracker, _ []byte, emit func(beam.EventTime, []byte)) (sdf.ProcessContinuation, error) { 113 // Register finalization callback 114 bf.RegisterCallback(5*time.Minute, func() error { 115 for _, m := range r.processedMessages { 116 m.Ack() 117 } 118 r.processedMessages = nil 119 return nil 120 }) 121 122 for { 123 ok := rt.TryClaim(r.Subscription) 124 if !ok { 125 return sdf.ResumeProcessingIn(5 * time.Second), nil 126 } 127 sub := r.client.Subscription(r.Subscription) 128 canCtx, cFn := context.WithCancel(ctx) 129 130 // Because emitters are not thread safe and synchronous Receive() behavior 131 // is deprecated, we have to collect messages in a goroutine and pipe them 132 // out through a channel. 133 messChan := make(chan *pubsub.Message, 1) 134 go func(sendch chan<- *pubsub.Message) { 135 err := sub.Receive(canCtx, func(ctx context.Context, m *pubsub.Message) { 136 messChan <- m 137 }) 138 if (err != nil) && (err != context.Canceled) { 139 log.Errorf(ctx, "error reading from PubSub: %v, stopping processing", err) 140 cFn() 141 close(messChan) 142 } 143 }(messChan) 144 145 timeout := time.NewTimer(messageTimeout) 146 for { 147 select { 148 case m, ok := <-messChan: 149 if !ok { 150 log.Debug(ctx, "stopping bundle processing") 151 return sdf.StopProcessing(), nil 152 } 153 r.processedMessages = append(r.processedMessages, m) 154 emit(beam.EventTime(m.PublishTime.UnixMilli()), m.Data) 155 if !timeout.Stop() { 156 <-timeout.C 157 } 158 timeout.Reset(messageTimeout) 159 case <-timeout.C: 160 log.Debugf(ctx, "cancelling receive context, scheduling resumption") 161 cFn() 162 return sdf.ResumeProcessingIn(10 * time.Second), nil 163 } 164 } 165 } 166 } 167 168 // Read reads messages from a PubSub topic in a streaming context, outputting 169 // received messages as a PCollection of byte slices. If the provided subscription 170 // name exists for the given topic, the DoFn will read from that subscription; otherwise, 171 // a new subscription with the given subscription name will be created and read from. 172 // 173 // This is an example and subject to change, including its behavior and function signature. 174 // Please use the cross-language implementation Read() instead. 175 func Read(ctx context.Context, s beam.Scope, project, topic, subscription string) beam.PCollection { 176 s = s.Scope("pubsubio.NativeRead") 177 178 psRead, err := newPubSubRead(ctx, project, topic, subscription) 179 if err != nil { 180 panic(err) 181 } 182 return beam.ParDo(s, psRead, beam.Impulse(s)) 183 } 184 185 // pubSubWrite is a structural DoFn representing writes to a given PubSub topic. 186 type pubSubWrite struct { 187 ProjectID string 188 Topic string 189 client *pubsub.Client 190 } 191 192 // Setup initializes a PubSub client if one has not been created already 193 func (r *pubSubWrite) Setup(ctx context.Context) error { 194 if r.client == nil { 195 client, err := pubsub.NewClient(ctx, r.ProjectID) 196 if err != nil { 197 return err 198 } 199 r.client = client 200 } 201 return nil 202 } 203 204 // ProcessElement takes a []byte element and publishes it to the provided PubSub 205 // topic. 206 func (w *pubSubWrite) ProcessElement(ctx context.Context, elm []byte) error { 207 top := w.client.Topic(w.Topic) 208 209 psMess := &pubsub.Message{Data: elm} 210 result := top.Publish(ctx, psMess) 211 if _, err := result.Get(ctx); err != nil { 212 return err 213 } 214 return nil 215 } 216 217 // newPubSubWrite inserts a write to a PubSub topic into the pipeline. 218 func newPubSubWrite(ctx context.Context, projectID, topic string) (*pubSubWrite, error) { 219 if topic == "" { 220 return nil, errors.New("please provide a topic to write to") 221 } 222 client, err := pubsub.NewClient(ctx, projectID) 223 if err != nil { 224 return nil, err 225 } 226 227 top := client.Topic(topic) 228 if ok, err := top.Exists(ctx); !ok || err != nil { 229 return nil, fmt.Errorf("failed to get topic; exists: %v, error: %v", ok, err) 230 } 231 return &pubSubWrite{ProjectID: projectID, Topic: top.ID()}, nil 232 } 233 234 // Write publishes elements from a PCollection of byte slices to a PubSub topic. 235 // If the topic does not exist at pipeline construction time, the function will panic. 236 // 237 // This is an example and subject to change, including its behavior and function signature. 238 // Please use the cross-language implementation Write() instead. 239 func Write(ctx context.Context, s beam.Scope, col beam.PCollection, project, topic string) { 240 s = s.Scope("pubsubio.NativeWrite") 241 242 psWrite, err := newPubSubWrite(ctx, project, topic) 243 if err != nil { 244 panic(err) 245 } 246 beam.ParDo0(s, psWrite, col) 247 }