github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/pubsub.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Google Cloud PubSub sources and sinks.
    19  
    20  Cloud Pub/Sub sources and sinks are currently supported only in streaming
    21  pipelines, during remote execution.
    22  
    23  This API is currently under development and is subject to change.
    24  
    25  **Updates to the I/O connector code**
    26  
    27  For any significant updates to this I/O connector, please consider involving
    28  corresponding code reviewers mentioned in
    29  https://github.com/apache/beam/blob/master/sdks/python/OWNERS
    30  """
    31  
    32  # pytype: skip-file
    33  
    34  import re
    35  from typing import Any
    36  from typing import List
    37  from typing import NamedTuple
    38  from typing import Optional
    39  from typing import Tuple
    40  
    41  from apache_beam import coders
    42  from apache_beam.io.iobase import Read
    43  from apache_beam.io.iobase import Write
    44  from apache_beam.runners.dataflow.native_io import iobase as dataflow_io
    45  from apache_beam.transforms import Flatten
    46  from apache_beam.transforms import Map
    47  from apache_beam.transforms import PTransform
    48  from apache_beam.transforms.display import DisplayDataItem
    49  from apache_beam.utils.annotations import deprecated
    50  
    51  try:
    52    from google.cloud import pubsub
    53  except ImportError:
    54    pubsub = None
    55  
    56  __all__ = [
    57      'MultipleReadFromPubSub',
    58      'PubsubMessage',
    59      'PubSubSourceDescriptor',
    60      'ReadFromPubSub',
    61      'ReadStringsFromPubSub',
    62      'WriteStringsToPubSub',
    63      'WriteToPubSub'
    64  ]
    65  
    66  
    67  class PubsubMessage(object):
    68    """Represents a Cloud Pub/Sub message.
    69  
    70    Message payload includes the data and attributes fields. For the payload to be
    71    valid, at least one of its fields must be non-empty.
    72  
    73    Attributes:
    74      data: (bytes) Message data. May be None.
    75      attributes: (dict) Key-value map of str to str, containing both user-defined
    76        and service generated attributes (such as id_label and
    77        timestamp_attribute). May be None.
    78      message_id: (str) ID of the message, assigned by the pubsub service when the
    79        message is published. Guaranteed to be unique within the topic. Will be
    80        reset to None if the message is being written to pubsub.
    81      publish_time: (datetime) Time at which the message was published. Will be
    82        reset to None if the Message is being written to pubsub.
    83      ordering_key: (str) If non-empty, identifies related messages for which
    84        publish order is respected by the PubSub subscription.
    85    """
    86    def __init__(
    87        self,
    88        data,
    89        attributes,
    90        message_id=None,
    91        publish_time=None,
    92        ordering_key=""):
    93      if data is None and not attributes:
    94        raise ValueError(
    95            'Either data (%r) or attributes (%r) must be set.', data, attributes)
    96      self.data = data
    97      self.attributes = attributes
    98      self.message_id = message_id
    99      self.publish_time = publish_time
   100      self.ordering_key = ordering_key
   101  
   102    def __hash__(self):
   103      return hash((self.data, frozenset(self.attributes.items())))
   104  
   105    def __eq__(self, other):
   106      return isinstance(other, PubsubMessage) and (
   107          self.data == other.data and self.attributes == other.attributes)
   108  
   109    def __repr__(self):
   110      return 'PubsubMessage(%s, %s)' % (self.data, self.attributes)
   111  
   112    @staticmethod
   113    def _from_proto_str(proto_msg):
   114      # type: (bytes) -> PubsubMessage
   115  
   116      """Construct from serialized form of ``PubsubMessage``.
   117  
   118      Args:
   119        proto_msg: String containing a serialized protobuf of type
   120        https://cloud.google.com/pubsub/docs/reference/rpc/google.pubsub.v1#google.pubsub.v1.PubsubMessage
   121  
   122      Returns:
   123        A new PubsubMessage object.
   124      """
   125      msg = pubsub.types.PubsubMessage.deserialize(proto_msg)
   126      # Convert ScalarMapContainer to dict.
   127      attributes = dict((key, msg.attributes[key]) for key in msg.attributes)
   128      return PubsubMessage(
   129          msg.data,
   130          attributes,
   131          msg.message_id,
   132          msg.publish_time,
   133          msg.ordering_key)
   134  
   135    def _to_proto_str(self, for_publish=False):
   136      """Get serialized form of ``PubsubMessage``.
   137  
   138      The serialized message is validated against pubsub message limits specified
   139      at https://cloud.google.com/pubsub/quotas#resource_limits
   140  
   141      Args:
   142        proto_msg: str containing a serialized protobuf.
   143        for_publish: bool, if True strip out message fields which cannot be
   144          published (currently message_id and publish_time) per
   145          https://cloud.google.com/pubsub/docs/reference/rpc/google.pubsub.v1#pubsubmessage
   146  
   147      Returns:
   148        A str containing a serialized protobuf of type
   149        https://cloud.google.com/pubsub/docs/reference/rpc/google.pubsub.v1#google.pubsub.v1.PubsubMessage
   150        containing the payload of this object.
   151      """
   152      msg = pubsub.types.PubsubMessage()
   153      if len(self.data) > (10 << 20):
   154        raise ValueError('A pubsub message data field must not exceed 10MB')
   155      msg.data = self.data
   156  
   157      if self.attributes:
   158        if len(self.attributes) > 100:
   159          raise ValueError(
   160              'A pubsub message must not have more than 100 attributes.')
   161        for key, value in self.attributes.items():
   162          if len(key) > 256:
   163            raise ValueError(
   164                'A pubsub message attribute key must not exceed 256 bytes.')
   165          if len(value) > 1024:
   166            raise ValueError(
   167                'A pubsub message attribute value must not exceed 1024 bytes')
   168          msg.attributes[key] = value
   169  
   170      if not for_publish:
   171        if self.message_id:
   172          msg.message_id = self.message_id
   173          if self.publish_time:
   174            msg.publish_time = self.publish_time
   175  
   176      if len(self.ordering_key) > 1024:
   177        raise ValueError(
   178            'A pubsub message ordering key must not exceed 1024 bytes.')
   179      msg.ordering_key = self.ordering_key
   180  
   181      serialized = pubsub.types.PubsubMessage.serialize(msg)
   182      if len(serialized) > (10 << 20):
   183        raise ValueError(
   184            'Serialized pubsub message exceeds the publish request limit of 10MB')
   185      return serialized
   186  
   187    @staticmethod
   188    def _from_message(msg):
   189      # type: (Any) -> PubsubMessage
   190  
   191      """Construct from ``google.cloud.pubsub_v1.subscriber.message.Message``.
   192  
   193      https://googleapis.github.io/google-cloud-python/latest/pubsub/subscriber/api/message.html
   194      """
   195      # Convert ScalarMapContainer to dict.
   196      attributes = dict((key, msg.attributes[key]) for key in msg.attributes)
   197      pubsubmessage = PubsubMessage(msg.data, attributes)
   198      if msg.message_id:
   199        pubsubmessage.message_id = msg.message_id
   200      if msg.publish_time:
   201        pubsubmessage.publish_time = msg.publish_time
   202      if msg.ordering_key:
   203        pubsubmessage.ordering_key = msg.ordering_key
   204      return pubsubmessage
   205  
   206  
   207  class ReadFromPubSub(PTransform):
   208    """A ``PTransform`` for reading from Cloud Pub/Sub."""
   209  
   210    # Implementation note: This ``PTransform`` is overridden by Directrunner.
   211  
   212    def __init__(
   213        self,
   214        topic=None,  # type: Optional[str]
   215        subscription=None,  # type: Optional[str]
   216        id_label=None,  # type: Optional[str]
   217        with_attributes=False,  # type: bool
   218        timestamp_attribute=None  # type: Optional[str]
   219    ):
   220      # type: (...) -> None
   221  
   222      """Initializes ``ReadFromPubSub``.
   223  
   224      Args:
   225        topic: Cloud Pub/Sub topic in the form
   226          "projects/<project>/topics/<topic>". If provided, subscription must be
   227          None.
   228        subscription: Existing Cloud Pub/Sub subscription to use in the
   229          form "projects/<project>/subscriptions/<subscription>". If not
   230          specified, a temporary subscription will be created from the specified
   231          topic. If provided, topic must be None.
   232        id_label: The attribute on incoming Pub/Sub messages to use as a unique
   233          record identifier. When specified, the value of this attribute (which
   234          can be any string that uniquely identifies the record) will be used for
   235          deduplication of messages. If not provided, we cannot guarantee
   236          that no duplicate data will be delivered on the Pub/Sub stream. In this
   237          case, deduplication of the stream will be strictly best effort.
   238        with_attributes:
   239          True - output elements will be :class:`~PubsubMessage` objects.
   240          False - output elements will be of type ``bytes`` (message
   241          data only).
   242        timestamp_attribute: Message value to use as element timestamp. If None,
   243          uses message publishing time as the timestamp.
   244  
   245          Timestamp values should be in one of two formats:
   246  
   247          - A numerical value representing the number of milliseconds since the
   248            Unix epoch.
   249          - A string in RFC 3339 format, UTC timezone. Example:
   250            ``2015-10-29T23:41:41.123Z``. The sub-second component of the
   251            timestamp is optional, and digits beyond the first three (i.e., time
   252            units smaller than milliseconds) may be ignored.
   253      """
   254      super().__init__()
   255      self.with_attributes = with_attributes
   256      self._source = _PubSubSource(
   257          topic=topic,
   258          subscription=subscription,
   259          id_label=id_label,
   260          with_attributes=self.with_attributes,
   261          timestamp_attribute=timestamp_attribute)
   262  
   263    def expand(self, pvalue):
   264      pcoll = pvalue.pipeline | Read(self._source)
   265      pcoll.element_type = bytes
   266      if self.with_attributes:
   267        pcoll = pcoll | Map(PubsubMessage._from_proto_str)
   268        pcoll.element_type = PubsubMessage
   269      return pcoll
   270  
   271    def to_runner_api_parameter(self, context):
   272      # Required as this is identified by type in PTransformOverrides.
   273      # TODO(https://github.com/apache/beam/issues/18713): Use an actual URN here.
   274      return self.to_runner_api_pickled(context)
   275  
   276  
   277  @deprecated(since='2.7.0', extra_message='Use ReadFromPubSub instead.')
   278  def ReadStringsFromPubSub(topic=None, subscription=None, id_label=None):
   279    return _ReadStringsFromPubSub(topic, subscription, id_label)
   280  
   281  
   282  class _ReadStringsFromPubSub(PTransform):
   283    """This class is deprecated. Use ``ReadFromPubSub`` instead."""
   284    def __init__(self, topic=None, subscription=None, id_label=None):
   285      super().__init__()
   286      self.topic = topic
   287      self.subscription = subscription
   288      self.id_label = id_label
   289  
   290    def expand(self, pvalue):
   291      p = (
   292          pvalue.pipeline
   293          | ReadFromPubSub(
   294              self.topic, self.subscription, self.id_label, with_attributes=False)
   295          | 'DecodeString' >> Map(lambda b: b.decode('utf-8')))
   296      p.element_type = str
   297      return p
   298  
   299  
   300  @deprecated(since='2.7.0', extra_message='Use WriteToPubSub instead.')
   301  def WriteStringsToPubSub(topic):
   302    return _WriteStringsToPubSub(topic)
   303  
   304  
   305  class _WriteStringsToPubSub(PTransform):
   306    """This class is deprecated. Use ``WriteToPubSub`` instead."""
   307    def __init__(self, topic):
   308      """Initializes ``_WriteStringsToPubSub``.
   309  
   310      Attributes:
   311        topic: Cloud Pub/Sub topic in the form "/topics/<project>/<topic>".
   312      """
   313      super().__init__()
   314      self.topic = topic
   315  
   316    def expand(self, pcoll):
   317      pcoll = pcoll | 'EncodeString' >> Map(lambda s: s.encode('utf-8'))
   318      pcoll.element_type = bytes
   319      return pcoll | WriteToPubSub(self.topic)
   320  
   321  
   322  class WriteToPubSub(PTransform):
   323    """A ``PTransform`` for writing messages to Cloud Pub/Sub."""
   324  
   325    # Implementation note: This ``PTransform`` is overridden by Directrunner.
   326  
   327    def __init__(
   328        self,
   329        topic,  # type: str
   330        with_attributes=False,  # type: bool
   331        id_label=None,  # type: Optional[str]
   332        timestamp_attribute=None  # type: Optional[str]
   333    ):
   334      # type: (...) -> None
   335  
   336      """Initializes ``WriteToPubSub``.
   337  
   338      Args:
   339        topic: Cloud Pub/Sub topic in the form "/topics/<project>/<topic>".
   340        with_attributes:
   341          True - input elements will be :class:`~PubsubMessage` objects.
   342          False - input elements will be of type ``bytes`` (message
   343          data only).
   344        id_label: If set, will set an attribute for each Cloud Pub/Sub message
   345          with the given name and a unique value. This attribute can then be used
   346          in a ReadFromPubSub PTransform to deduplicate messages.
   347        timestamp_attribute: If set, will set an attribute for each Cloud Pub/Sub
   348          message with the given name and the message's publish time as the value.
   349      """
   350      super().__init__()
   351      self.with_attributes = with_attributes
   352      self.id_label = id_label
   353      self.timestamp_attribute = timestamp_attribute
   354      self.project, self.topic_name = parse_topic(topic)
   355      self.full_topic = topic
   356      self._sink = _PubSubSink(topic, id_label, timestamp_attribute)
   357  
   358    @staticmethod
   359    def message_to_proto_str(element):
   360      # type: (PubsubMessage) -> bytes
   361      if not isinstance(element, PubsubMessage):
   362        raise TypeError(
   363            'Unexpected element. Type: %s (expected: PubsubMessage), '
   364            'value: %r' % (type(element), element))
   365      return element._to_proto_str(for_publish=True)
   366  
   367    @staticmethod
   368    def bytes_to_proto_str(element):
   369      # type: (bytes) -> bytes
   370      msg = PubsubMessage(element, {})
   371      return msg._to_proto_str(for_publish=True)
   372  
   373    def expand(self, pcoll):
   374      if self.with_attributes:
   375        pcoll = pcoll | 'ToProtobuf' >> Map(self.message_to_proto_str)
   376      else:
   377        pcoll = pcoll | 'ToProtobuf' >> Map(self.bytes_to_proto_str)
   378      pcoll.element_type = bytes
   379      return pcoll | Write(self._sink)
   380  
   381    def to_runner_api_parameter(self, context):
   382      # Required as this is identified by type in PTransformOverrides.
   383      # TODO(https://github.com/apache/beam/issues/18713): Use an actual URN here.
   384      return self.to_runner_api_pickled(context)
   385  
   386    def display_data(self):
   387      return {
   388          'topic': DisplayDataItem(self.full_topic, label='Pubsub Topic'),
   389          'id_label': DisplayDataItem(self.id_label, label='ID Label Attribute'),
   390          'with_attributes': DisplayDataItem(
   391              True, label='With Attributes').drop_if_none(),
   392          'timestamp_attribute': DisplayDataItem(
   393              self.timestamp_attribute, label='Timestamp Attribute'),
   394      }
   395  
   396  
   397  PROJECT_ID_REGEXP = '[a-z][-a-z0-9:.]{4,61}[a-z0-9]'
   398  SUBSCRIPTION_REGEXP = 'projects/([^/]+)/subscriptions/(.+)'
   399  TOPIC_REGEXP = 'projects/([^/]+)/topics/(.+)'
   400  
   401  
   402  def parse_topic(full_topic: str) -> Tuple[str, str]:
   403    match = re.match(TOPIC_REGEXP, full_topic)
   404    if not match:
   405      raise ValueError(
   406          'PubSub topic must be in the form "projects/<project>/topics'
   407          '/<topic>" (got %r).' % full_topic)
   408    project, topic_name = match.group(1), match.group(2)
   409    if not re.match(PROJECT_ID_REGEXP, project):
   410      raise ValueError('Invalid PubSub project name: %r.' % project)
   411    return project, topic_name
   412  
   413  
   414  def parse_subscription(full_subscription):
   415    match = re.match(SUBSCRIPTION_REGEXP, full_subscription)
   416    if not match:
   417      raise ValueError(
   418          'PubSub subscription must be in the form "projects/<project>'
   419          '/subscriptions/<subscription>" (got %r).' % full_subscription)
   420    project, subscription_name = match.group(1), match.group(2)
   421    if not re.match(PROJECT_ID_REGEXP, project):
   422      raise ValueError('Invalid PubSub project name: %r.' % project)
   423    return project, subscription_name
   424  
   425  
   426  class _PubSubSource(dataflow_io.NativeSource):
   427    """Source for a Cloud Pub/Sub topic or subscription.
   428  
   429    This ``NativeSource`` is overridden by a native Pubsub implementation.
   430  
   431    Attributes:
   432      with_attributes: If False, will fetch just message data. Otherwise,
   433        fetches ``PubsubMessage`` protobufs.
   434    """
   435    def __init__(
   436        self,
   437        topic=None,  # type: Optional[str]
   438        subscription=None,  # type: Optional[str]
   439        id_label=None,  # type: Optional[str]
   440        with_attributes=False,  # type: bool
   441        timestamp_attribute=None  # type: Optional[str]
   442    ):
   443      self.coder = coders.BytesCoder()
   444      self.full_topic = topic
   445      self.full_subscription = subscription
   446      self.topic_name = None
   447      self.subscription_name = None
   448      self.id_label = id_label
   449      self.with_attributes = with_attributes
   450      self.timestamp_attribute = timestamp_attribute
   451  
   452      # Perform some validation on the topic and subscription.
   453      if not (topic or subscription):
   454        raise ValueError('Either a topic or subscription must be provided.')
   455      if topic and subscription:
   456        raise ValueError('Only one of topic or subscription should be provided.')
   457  
   458      if topic:
   459        self.project, self.topic_name = parse_topic(topic)
   460      if subscription:
   461        self.project, self.subscription_name = parse_subscription(subscription)
   462  
   463    @property
   464    def format(self):
   465      """Source format name required for remote execution."""
   466      return 'pubsub'
   467  
   468    def display_data(self):
   469      return {
   470          'id_label': DisplayDataItem(self.id_label,
   471                                      label='ID Label Attribute').drop_if_none(),
   472          'topic': DisplayDataItem(self.full_topic,
   473                                   label='Pubsub Topic').drop_if_none(),
   474          'subscription': DisplayDataItem(
   475              self.full_subscription, label='Pubsub Subscription').drop_if_none(),
   476          'with_attributes': DisplayDataItem(
   477              self.with_attributes, label='With Attributes').drop_if_none(),
   478          'timestamp_attribute': DisplayDataItem(
   479              self.timestamp_attribute,
   480              label='Timestamp Attribute').drop_if_none(),
   481      }
   482  
   483    def reader(self):
   484      raise NotImplementedError
   485  
   486    def is_bounded(self):
   487      return False
   488  
   489  
   490  class _PubSubSink(dataflow_io.NativeSink):
   491    """Sink for a Cloud Pub/Sub topic.
   492  
   493    This ``NativeSource`` is overridden by a native Pubsub implementation.
   494    """
   495    def __init__(
   496        self,
   497        topic: str,
   498        id_label: Optional[str],
   499        timestamp_attribute: Optional[str],
   500    ):
   501      self.coder = coders.BytesCoder()
   502      self.full_topic = topic
   503      self.id_label = id_label
   504      self.timestamp_attribute = timestamp_attribute
   505  
   506      self.project, self.topic_name = parse_topic(topic)
   507  
   508    @property
   509    def format(self):
   510      """Sink format name required for remote execution."""
   511      return 'pubsub'
   512  
   513    def writer(self):
   514      raise NotImplementedError
   515  
   516  
   517  class PubSubSourceDescriptor(NamedTuple):
   518    """A PubSub source descriptor for ``MultipleReadFromPubSub```
   519  
   520    Attributes:
   521      source: Existing Cloud Pub/Sub topic or subscription to use in the
   522        form "projects/<project>/topics/<topic>" or
   523        "projects/<project>/subscriptions/<subscription>"
   524      id_label: The attribute on incoming Pub/Sub messages to use as a unique
   525        record identifier. When specified, the value of this attribute (which
   526        can be any string that uniquely identifies the record) will be used for
   527        deduplication of messages. If not provided, we cannot guarantee
   528        that no duplicate data will be delivered on the Pub/Sub stream. In this
   529        case, deduplication of the stream will be strictly best effort.
   530      timestamp_attribute: Message value to use as element timestamp. If None,
   531        uses message publishing time as the timestamp.
   532  
   533        Timestamp values should be in one of two formats:
   534  
   535        - A numerical value representing the number of milliseconds since the
   536          Unix epoch.
   537        - A string in RFC 3339 format, UTC timezone. Example:
   538          ``2015-10-29T23:41:41.123Z``. The sub-second component of the
   539          timestamp is optional, and digits beyond the first three (i.e., time
   540          units smaller than milliseconds) may be ignored.
   541    """
   542    source: str
   543    id_label: str = None
   544    timestamp_attribute: str = None
   545  
   546  
   547  PUBSUB_DESCRIPTOR_REGEXP = 'projects/([^/]+)/(topics|subscriptions)/(.+)'
   548  
   549  
   550  class MultipleReadFromPubSub(PTransform):
   551    """A ``PTransform`` that expands ``ReadFromPubSub`` to read from multiple
   552    ``PubSubSourceDescriptor``.
   553  
   554    The `MultipleReadFromPubSub` transform allows you to read multiple topics
   555    and/or subscriptions using just one transform. It is the recommended transform
   556    to read multiple Pub/Sub sources when the output `PCollection` are going to be
   557    flattened. The transform takes a list of `PubSubSourceDescriptor` and organize
   558    them by type (topic / subscription) and project:::
   559  
   560      topic_1 = PubSubSourceDescriptor('projects/myproject/topics/a_topic')
   561      topic_2 = PubSubSourceDescriptor(
   562                  'projects/myproject2/topics/b_topic',
   563                  'my_label',
   564                  'my_timestamp_attribute')
   565      subscription_1 = PubSubSourceDescriptor(
   566                  'projects/myproject/subscriptions/a_subscription')
   567  
   568      results = pipeline | MultipleReadFromPubSub(
   569                  [topic_1, topic_2, subscription_1])
   570    """
   571    def __init__(
   572        self,
   573        pubsub_source_descriptors,  # type: List[PubSubSourceDescriptor]
   574        with_attributes=False,  # type: bool
   575    ):
   576      """Initializes ``PubSubMultipleReader``.
   577  
   578      Args:
   579        pubsub_source_descriptors: List of Cloud Pub/Sub topics or subscriptions
   580          of type `~PubSubSourceDescriptor`.
   581        with_attributes:
   582          True - input elements will be :class:`~PubsubMessage` objects.
   583          False - input elements will be of type ``bytes`` (message data only).
   584      """
   585      self.pubsub_source_descriptors = pubsub_source_descriptors
   586      self.with_attributes = with_attributes
   587  
   588      for descriptor in self.pubsub_source_descriptors:
   589        match_descriptor = re.match(PUBSUB_DESCRIPTOR_REGEXP, descriptor.source)
   590  
   591        if not match_descriptor:
   592          raise ValueError(
   593              'PubSub source descriptor must be in the form "projects/<project>'
   594              '/topics/<topic>" or "projects/<project>/subscription'
   595              '/<subscription>" (got %r).' % descriptor.source)
   596  
   597    def expand(self, pcol):
   598      sources_pcol = []
   599      for descriptor in self.pubsub_source_descriptors:
   600        source_match = re.match(PUBSUB_DESCRIPTOR_REGEXP, descriptor.source)
   601        source_project = source_match.group(1)
   602        source_type = source_match.group(2)
   603        source_name = source_match.group(3)
   604  
   605        read_step_name = 'PubSub %s/project:%s/Read %s' % (
   606            source_type, source_project, source_name)
   607  
   608        if source_type == 'topics':
   609          current_source = pcol | read_step_name >> ReadFromPubSub(
   610              topic=descriptor.source,
   611              id_label=descriptor.id_label,
   612              with_attributes=self.with_attributes,
   613              timestamp_attribute=descriptor.timestamp_attribute)
   614        else:
   615          current_source = pcol | read_step_name >> ReadFromPubSub(
   616              subscription=descriptor.source,
   617              id_label=descriptor.id_label,
   618              with_attributes=self.with_attributes,
   619              timestamp_attribute=descriptor.timestamp_attribute)
   620  
   621        sources_pcol.append(current_source)
   622  
   623      return tuple(sources_pcol) | Flatten()