github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/benchmarks/nexmark/queries/winning_bids.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """
    19  A transform to find winning bids for each closed auction. In pseudo CQL syntax:
    20  
    21  SELECT Rstream(A.*, B.auction, B.bidder, MAX(B.price), B.dateTime)
    22  FROM Auction A [ROWS UNBOUNDED], Bid B [ROWS UNBOUNDED]
    23  WHERE A.id = B.auction AND B.datetime < A.expires AND A.expires < CURRENT_TIME
    24  GROUP BY A.id
    25  
    26  We will also check that the winning bid is above the auction reserve. Note that
    27  we ignore the auction opening bid value since it has no impact on which bid
    28  eventually wins, if any.
    29  
    30  Our implementation will use a custom windowing function in order to bring bids
    31  and auctions together without requiring global state.
    32  """
    33  
    34  import apache_beam as beam
    35  from apache_beam.coders import coder_impl
    36  from apache_beam.coders.coders import FastCoder
    37  from apache_beam.testing.benchmarks.nexmark.models import auction_bid
    38  from apache_beam.testing.benchmarks.nexmark.models import nexmark_model
    39  from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util
    40  from apache_beam.transforms.window import IntervalWindow
    41  from apache_beam.transforms.window import WindowFn
    42  from apache_beam.utils.timestamp import Duration
    43  
    44  
    45  class AuctionOrBidWindow(IntervalWindow):
    46    """Windows for open auctions and bids."""
    47    def __init__(self, start, end, auction_id, is_auction_window):
    48      super().__init__(start, end)
    49      self.auction = auction_id
    50      self.is_auction_window = is_auction_window
    51  
    52    @staticmethod
    53    def for_auction(timestamp, auction):
    54      return AuctionOrBidWindow(timestamp, auction.expires, auction.id, True)
    55  
    56    @staticmethod
    57    def for_bid(expected_duration_micro, timestamp, bid):
    58      return AuctionOrBidWindow(
    59          timestamp,
    60          timestamp + Duration(micros=expected_duration_micro * 2),
    61          bid.auction,
    62          False)
    63  
    64    def is_auction_window_fn(self):
    65      return self.is_auction_window
    66  
    67    def __str__(self):
    68      return (
    69          'AuctionOrBidWindow{start:%s; end:%s; auction:%d; isAuctionWindow:%s}' %
    70          (self.start, self.end, self.auction, self.is_auction_window))
    71  
    72  
    73  class AuctionOrBidWindowCoder(FastCoder):
    74    def _create_impl(self):
    75      return AuctionOrBidWindowCoderImpl()
    76  
    77    def is_deterministic(self):
    78      return True
    79  
    80  
    81  class AuctionOrBidWindowCoderImpl(coder_impl.StreamCoderImpl):
    82    _super_coder_impl = coder_impl.IntervalWindowCoderImpl()
    83    _id_coder_impl = coder_impl.VarIntCoderImpl()
    84    _bool_coder_impl = coder_impl.BooleanCoderImpl()
    85  
    86    def encode_to_stream(self, value, stream, nested):
    87      self._super_coder_impl.encode_to_stream(value, stream, True)
    88      self._id_coder_impl.encode_to_stream(value.auction, stream, True)
    89      self._bool_coder_impl.encode_to_stream(
    90          value.is_auction_window, stream, True)
    91  
    92    def decode_from_stream(self, stream, nested):
    93      super_window = self._super_coder_impl.decode_from_stream(stream, True)
    94      auction = self._id_coder_impl.decode_from_stream(stream, True)
    95      is_auction = self._bool_coder_impl.decode_from_stream(stream, True)
    96      return AuctionOrBidWindow(
    97          super_window.start, super_window.end, auction, is_auction)
    98  
    99  
   100  class AuctionOrBidWindowFn(WindowFn):
   101    def __init__(self, expected_duration_micro):
   102      self.expected_duration = expected_duration_micro
   103  
   104    def assign(self, assign_context):
   105      event = assign_context.element
   106      if isinstance(event, nexmark_model.Auction):
   107        return [AuctionOrBidWindow.for_auction(assign_context.timestamp, event)]
   108      elif isinstance(event, nexmark_model.Bid):
   109        return [
   110            AuctionOrBidWindow.for_bid(
   111                self.expected_duration, assign_context.timestamp, event)
   112        ]
   113      else:
   114        raise ValueError(
   115            '%s can only assign windows to auctions and bids, but received %s' %
   116            (self.__class__.__name__, event))
   117  
   118    def merge(self, merge_context):
   119      auction_id_to_auction_window = {}
   120      auction_id_to_bid_window = {}
   121      for window in merge_context.windows:
   122        if window.is_auction_window_fn():
   123          auction_id_to_auction_window[window.auction] = window
   124        else:
   125          if window.auction not in auction_id_to_bid_window:
   126            auction_id_to_bid_window[window.auction] = []
   127          auction_id_to_bid_window[window.auction].append(window)
   128  
   129      for auction, auction_window in auction_id_to_auction_window.items():
   130        bid_window_list = auction_id_to_bid_window.get(auction)
   131        if bid_window_list is not None:
   132          to_merge = []
   133          for bid_window in bid_window_list:
   134            if bid_window.start < auction_window.end:
   135              to_merge.append(bid_window)
   136          if len(to_merge) > 0:
   137            to_merge.append(auction_window)
   138            merge_context.merge(to_merge, auction_window)
   139  
   140    def get_window_coder(self):
   141      return AuctionOrBidWindowCoder()
   142  
   143    def get_transformed_output_time(self, window, input_timestamp):
   144      return window.max_timestamp()
   145  
   146  
   147  class JoinAuctionBidFn(beam.DoFn):
   148    @staticmethod
   149    def higher_bid(bid, other):
   150      if bid.price > other.price:
   151        return True
   152      elif bid.price < other.price:
   153        return False
   154      else:
   155        return bid.date_time < other.date_time
   156  
   157    def process(self, element):
   158      _, group = element
   159      auctions = group[nexmark_query_util.AUCTION_TAG]
   160      auction = auctions[0] if auctions else None
   161      if auction is None:
   162        return
   163      best_bid = None
   164      for bid in group[nexmark_query_util.BID_TAG]:
   165        if bid.price < auction.reserve:
   166          continue
   167        if best_bid is None or JoinAuctionBidFn.higher_bid(bid, best_bid):
   168          best_bid = bid
   169      if best_bid:
   170        yield auction_bid.AuctionBid(auction, best_bid)
   171  
   172  
   173  class WinningBids(beam.PTransform):
   174    def __init__(self):
   175      #TODO: change this to be calculated by event generation
   176      expected_duration = 16667000
   177      self.auction_or_bid_windowFn = AuctionOrBidWindowFn(expected_duration)
   178  
   179    def expand(self, pcoll):
   180      events = pcoll | beam.WindowInto(self.auction_or_bid_windowFn)
   181  
   182      auction_by_id = (
   183          events
   184          | nexmark_query_util.JustAuctions()
   185          | 'auction_by_id' >> beam.ParDo(nexmark_query_util.AuctionByIdFn()))
   186      bids_by_auction_id = (
   187          events
   188          | nexmark_query_util.JustBids()
   189          | 'bid_by_auction' >> beam.ParDo(nexmark_query_util.BidByAuctionIdFn()))
   190  
   191      return ({
   192          nexmark_query_util.AUCTION_TAG: auction_by_id,
   193          nexmark_query_util.BID_TAG: bids_by_auction_id
   194      }
   195              | beam.CoGroupByKey()
   196              | beam.ParDo(JoinAuctionBidFn()))