github.com/apache/beam/sdks/v2@v2.48.2/go/examples/native_wordcap/nativepubsubio/native.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // Package nativepubsubio contains a Golang implementation of streaming reads
    17  // and writes to PubSub. This is not as fully featured as the cross-language
    18  // pubsubio package present in the Beam Go repository and should not be used
    19  // in place of it.
    20  package nativepubsubio
    21  
    22  import (
    23  	"context"
    24  	"errors"
    25  	"fmt"
    26  	"time"
    27  
    28  	"cloud.google.com/go/pubsub"
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    30  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf"
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    32  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    33  	"github.com/apache/beam/sdks/v2/go/pkg/beam/util/pubsubx"
    34  )
    35  
    36  func init() {
    37  	register.DoFn5x2[context.Context, beam.BundleFinalization, *sdf.LockRTracker, []byte, func(beam.EventTime, []byte), sdf.ProcessContinuation, error](&pubSubRead{})
    38  	register.DoFn2x1[context.Context, []byte, error](&pubSubWrite{})
    39  	register.Emitter2[beam.EventTime, []byte]()
    40  }
    41  
    42  // pubSubRead is a structural DoFn representing a read from a given subscription ID.
    43  type pubSubRead struct {
    44  	ProjectID         string
    45  	Subscription      string
    46  	client            *pubsub.Client
    47  	processedMessages []*pubsub.Message
    48  }
    49  
    50  // newPubSubRead inserts an unbounded read from a PubSub topic into the pipeline. If an existing subscription
    51  // is provided, the DoFn will read using that subscription; otherwise, a new subscription to the topic
    52  // will be created using the provided subscription name.
    53  func newPubSubRead(ctx context.Context, projectID, topic, subscription string) (*pubSubRead, error) {
    54  	if topic == "" {
    55  		return nil, errors.New("please provide either a topic to read from")
    56  	}
    57  	client, err := pubsub.NewClient(ctx, projectID)
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	top := client.Topic(topic)
    63  	if ok, err := top.Exists(ctx); !ok || err != nil {
    64  		return nil, fmt.Errorf("failed to get topic; exists: %v, error: %v", ok, err)
    65  	}
    66  	sub, err := pubsubx.EnsureSubscription(ctx, client, topic, subscription)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  	return &pubSubRead{ProjectID: projectID, Subscription: sub.ID()}, nil
    71  }
    72  
    73  // CreateInitialRestriction() establishes the PubSub subscription ID as the
    74  // initial restriction
    75  func (r *pubSubRead) CreateInitialRestriction(_ []byte) string {
    76  	return r.Subscription
    77  }
    78  
    79  // CreateTracker wraps the PubSub subscription ID in a StaticRTracker
    80  // and applies a mutex via LockRTracker.
    81  func (r *pubSubRead) CreateTracker(rest string) *sdf.LockRTracker {
    82  	return sdf.NewLockRTracker(NewSubscriptionRTracker(rest))
    83  }
    84  
    85  // RestrictionSize always returns 1.0, as the restriction is always 1 subscription.
    86  func (r *pubSubRead) RestrictionSize(_ []byte, rest string) float64 {
    87  	return 1.0
    88  }
    89  
    90  // SplitRestriction is a no-op as the restriction cannot be split.
    91  func (r *pubSubRead) SplitRestriction(_ []byte, rest string) []string {
    92  	return []string{rest}
    93  }
    94  
    95  // Setup initializes a PubSub client if one has not been created already
    96  func (r *pubSubRead) Setup(ctx context.Context) error {
    97  	if r.client == nil {
    98  		client, err := pubsub.NewClient(ctx, r.ProjectID)
    99  		if err != nil {
   100  			return err
   101  		}
   102  		r.client = client
   103  	}
   104  	return nil
   105  }
   106  
   107  var messageTimeout time.Duration = 5 * time.Second
   108  
   109  // ProcessElement initializes a PubSub client if one has not been created already, reads from the PubSub subscription,
   110  // and emits elements as it reads them. If no messages are available, the DoFn will schedule itself to resume processing
   111  // later. If polling the subscription returns an error, the error will be logged and the DoFn will not reschedule itself.
   112  func (r *pubSubRead) ProcessElement(ctx context.Context, bf beam.BundleFinalization, rt *sdf.LockRTracker, _ []byte, emit func(beam.EventTime, []byte)) (sdf.ProcessContinuation, error) {
   113  	// Register finalization callback
   114  	bf.RegisterCallback(5*time.Minute, func() error {
   115  		for _, m := range r.processedMessages {
   116  			m.Ack()
   117  		}
   118  		r.processedMessages = nil
   119  		return nil
   120  	})
   121  
   122  	for {
   123  		ok := rt.TryClaim(r.Subscription)
   124  		if !ok {
   125  			return sdf.ResumeProcessingIn(5 * time.Second), nil
   126  		}
   127  		sub := r.client.Subscription(r.Subscription)
   128  		canCtx, cFn := context.WithCancel(ctx)
   129  
   130  		// Because emitters are not thread safe and synchronous Receive() behavior
   131  		// is deprecated, we have to collect messages in a goroutine and pipe them
   132  		// out through a channel.
   133  		messChan := make(chan *pubsub.Message, 1)
   134  		go func(sendch chan<- *pubsub.Message) {
   135  			err := sub.Receive(canCtx, func(ctx context.Context, m *pubsub.Message) {
   136  				messChan <- m
   137  			})
   138  			if (err != nil) && (err != context.Canceled) {
   139  				log.Errorf(ctx, "error reading from PubSub: %v, stopping processing", err)
   140  				cFn()
   141  				close(messChan)
   142  			}
   143  		}(messChan)
   144  
   145  		timeout := time.NewTimer(messageTimeout)
   146  		for {
   147  			select {
   148  			case m, ok := <-messChan:
   149  				if !ok {
   150  					log.Debug(ctx, "stopping bundle processing")
   151  					return sdf.StopProcessing(), nil
   152  				}
   153  				r.processedMessages = append(r.processedMessages, m)
   154  				emit(beam.EventTime(m.PublishTime.UnixMilli()), m.Data)
   155  				if !timeout.Stop() {
   156  					<-timeout.C
   157  				}
   158  				timeout.Reset(messageTimeout)
   159  			case <-timeout.C:
   160  				log.Debugf(ctx, "cancelling receive context, scheduling resumption")
   161  				cFn()
   162  				return sdf.ResumeProcessingIn(10 * time.Second), nil
   163  			}
   164  		}
   165  	}
   166  }
   167  
   168  // Read reads messages from a PubSub topic in a streaming context, outputting
   169  // received messages as a PCollection of byte slices. If the provided subscription
   170  // name exists for the given topic, the DoFn will read from that subscription; otherwise,
   171  // a new subscription with the given subscription name will be created and read from.
   172  //
   173  // This is an example and subject to change, including its behavior and function signature.
   174  // Please use the cross-language implementation Read() instead.
   175  func Read(ctx context.Context, s beam.Scope, project, topic, subscription string) beam.PCollection {
   176  	s = s.Scope("pubsubio.NativeRead")
   177  
   178  	psRead, err := newPubSubRead(ctx, project, topic, subscription)
   179  	if err != nil {
   180  		panic(err)
   181  	}
   182  	return beam.ParDo(s, psRead, beam.Impulse(s))
   183  }
   184  
   185  // pubSubWrite is a structural DoFn representing writes to a given PubSub topic.
   186  type pubSubWrite struct {
   187  	ProjectID string
   188  	Topic     string
   189  	client    *pubsub.Client
   190  }
   191  
   192  // Setup initializes a PubSub client if one has not been created already
   193  func (r *pubSubWrite) Setup(ctx context.Context) error {
   194  	if r.client == nil {
   195  		client, err := pubsub.NewClient(ctx, r.ProjectID)
   196  		if err != nil {
   197  			return err
   198  		}
   199  		r.client = client
   200  	}
   201  	return nil
   202  }
   203  
   204  // ProcessElement takes a []byte element and publishes it to the provided PubSub
   205  // topic.
   206  func (w *pubSubWrite) ProcessElement(ctx context.Context, elm []byte) error {
   207  	top := w.client.Topic(w.Topic)
   208  
   209  	psMess := &pubsub.Message{Data: elm}
   210  	result := top.Publish(ctx, psMess)
   211  	if _, err := result.Get(ctx); err != nil {
   212  		return err
   213  	}
   214  	return nil
   215  }
   216  
   217  // newPubSubWrite inserts a write to a PubSub topic into the pipeline.
   218  func newPubSubWrite(ctx context.Context, projectID, topic string) (*pubSubWrite, error) {
   219  	if topic == "" {
   220  		return nil, errors.New("please provide a topic to write to")
   221  	}
   222  	client, err := pubsub.NewClient(ctx, projectID)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  
   227  	top := client.Topic(topic)
   228  	if ok, err := top.Exists(ctx); !ok || err != nil {
   229  		return nil, fmt.Errorf("failed to get topic; exists: %v, error: %v", ok, err)
   230  	}
   231  	return &pubSubWrite{ProjectID: projectID, Topic: top.ID()}, nil
   232  }
   233  
   234  // Write publishes elements from a PCollection of byte slices to a PubSub topic.
   235  // If the topic does not exist at pipeline construction time, the function will panic.
   236  //
   237  // This is an example and subject to change, including its behavior and function signature.
   238  // Please use the cross-language implementation Write() instead.
   239  func Write(ctx context.Context, s beam.Scope, col beam.PCollection, project, topic string) {
   240  	s = s.Scope("pubsubio.NativeWrite")
   241  
   242  	psWrite, err := newPubSubWrite(ctx, project, topic)
   243  	if err != nil {
   244  		panic(err)
   245  	}
   246  	beam.ParDo0(s, psWrite, col)
   247  }