github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/benchmarks/nexmark/queries/winning_bids.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 A transform to find winning bids for each closed auction. In pseudo CQL syntax: 20 21 SELECT Rstream(A.*, B.auction, B.bidder, MAX(B.price), B.dateTime) 22 FROM Auction A [ROWS UNBOUNDED], Bid B [ROWS UNBOUNDED] 23 WHERE A.id = B.auction AND B.datetime < A.expires AND A.expires < CURRENT_TIME 24 GROUP BY A.id 25 26 We will also check that the winning bid is above the auction reserve. Note that 27 we ignore the auction opening bid value since it has no impact on which bid 28 eventually wins, if any. 29 30 Our implementation will use a custom windowing function in order to bring bids 31 and auctions together without requiring global state. 32 """ 33 34 import apache_beam as beam 35 from apache_beam.coders import coder_impl 36 from apache_beam.coders.coders import FastCoder 37 from apache_beam.testing.benchmarks.nexmark.models import auction_bid 38 from apache_beam.testing.benchmarks.nexmark.models import nexmark_model 39 from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util 40 from apache_beam.transforms.window import IntervalWindow 41 from apache_beam.transforms.window import WindowFn 42 from apache_beam.utils.timestamp import Duration 43 44 45 class AuctionOrBidWindow(IntervalWindow): 46 """Windows for open auctions and bids.""" 47 def __init__(self, start, end, auction_id, is_auction_window): 48 super().__init__(start, end) 49 self.auction = auction_id 50 self.is_auction_window = is_auction_window 51 52 @staticmethod 53 def for_auction(timestamp, auction): 54 return AuctionOrBidWindow(timestamp, auction.expires, auction.id, True) 55 56 @staticmethod 57 def for_bid(expected_duration_micro, timestamp, bid): 58 return AuctionOrBidWindow( 59 timestamp, 60 timestamp + Duration(micros=expected_duration_micro * 2), 61 bid.auction, 62 False) 63 64 def is_auction_window_fn(self): 65 return self.is_auction_window 66 67 def __str__(self): 68 return ( 69 'AuctionOrBidWindow{start:%s; end:%s; auction:%d; isAuctionWindow:%s}' % 70 (self.start, self.end, self.auction, self.is_auction_window)) 71 72 73 class AuctionOrBidWindowCoder(FastCoder): 74 def _create_impl(self): 75 return AuctionOrBidWindowCoderImpl() 76 77 def is_deterministic(self): 78 return True 79 80 81 class AuctionOrBidWindowCoderImpl(coder_impl.StreamCoderImpl): 82 _super_coder_impl = coder_impl.IntervalWindowCoderImpl() 83 _id_coder_impl = coder_impl.VarIntCoderImpl() 84 _bool_coder_impl = coder_impl.BooleanCoderImpl() 85 86 def encode_to_stream(self, value, stream, nested): 87 self._super_coder_impl.encode_to_stream(value, stream, True) 88 self._id_coder_impl.encode_to_stream(value.auction, stream, True) 89 self._bool_coder_impl.encode_to_stream( 90 value.is_auction_window, stream, True) 91 92 def decode_from_stream(self, stream, nested): 93 super_window = self._super_coder_impl.decode_from_stream(stream, True) 94 auction = self._id_coder_impl.decode_from_stream(stream, True) 95 is_auction = self._bool_coder_impl.decode_from_stream(stream, True) 96 return AuctionOrBidWindow( 97 super_window.start, super_window.end, auction, is_auction) 98 99 100 class AuctionOrBidWindowFn(WindowFn): 101 def __init__(self, expected_duration_micro): 102 self.expected_duration = expected_duration_micro 103 104 def assign(self, assign_context): 105 event = assign_context.element 106 if isinstance(event, nexmark_model.Auction): 107 return [AuctionOrBidWindow.for_auction(assign_context.timestamp, event)] 108 elif isinstance(event, nexmark_model.Bid): 109 return [ 110 AuctionOrBidWindow.for_bid( 111 self.expected_duration, assign_context.timestamp, event) 112 ] 113 else: 114 raise ValueError( 115 '%s can only assign windows to auctions and bids, but received %s' % 116 (self.__class__.__name__, event)) 117 118 def merge(self, merge_context): 119 auction_id_to_auction_window = {} 120 auction_id_to_bid_window = {} 121 for window in merge_context.windows: 122 if window.is_auction_window_fn(): 123 auction_id_to_auction_window[window.auction] = window 124 else: 125 if window.auction not in auction_id_to_bid_window: 126 auction_id_to_bid_window[window.auction] = [] 127 auction_id_to_bid_window[window.auction].append(window) 128 129 for auction, auction_window in auction_id_to_auction_window.items(): 130 bid_window_list = auction_id_to_bid_window.get(auction) 131 if bid_window_list is not None: 132 to_merge = [] 133 for bid_window in bid_window_list: 134 if bid_window.start < auction_window.end: 135 to_merge.append(bid_window) 136 if len(to_merge) > 0: 137 to_merge.append(auction_window) 138 merge_context.merge(to_merge, auction_window) 139 140 def get_window_coder(self): 141 return AuctionOrBidWindowCoder() 142 143 def get_transformed_output_time(self, window, input_timestamp): 144 return window.max_timestamp() 145 146 147 class JoinAuctionBidFn(beam.DoFn): 148 @staticmethod 149 def higher_bid(bid, other): 150 if bid.price > other.price: 151 return True 152 elif bid.price < other.price: 153 return False 154 else: 155 return bid.date_time < other.date_time 156 157 def process(self, element): 158 _, group = element 159 auctions = group[nexmark_query_util.AUCTION_TAG] 160 auction = auctions[0] if auctions else None 161 if auction is None: 162 return 163 best_bid = None 164 for bid in group[nexmark_query_util.BID_TAG]: 165 if bid.price < auction.reserve: 166 continue 167 if best_bid is None or JoinAuctionBidFn.higher_bid(bid, best_bid): 168 best_bid = bid 169 if best_bid: 170 yield auction_bid.AuctionBid(auction, best_bid) 171 172 173 class WinningBids(beam.PTransform): 174 def __init__(self): 175 #TODO: change this to be calculated by event generation 176 expected_duration = 16667000 177 self.auction_or_bid_windowFn = AuctionOrBidWindowFn(expected_duration) 178 179 def expand(self, pcoll): 180 events = pcoll | beam.WindowInto(self.auction_or_bid_windowFn) 181 182 auction_by_id = ( 183 events 184 | nexmark_query_util.JustAuctions() 185 | 'auction_by_id' >> beam.ParDo(nexmark_query_util.AuctionByIdFn())) 186 bids_by_auction_id = ( 187 events 188 | nexmark_query_util.JustBids() 189 | 'bid_by_auction' >> beam.ParDo(nexmark_query_util.BidByAuctionIdFn())) 190 191 return ({ 192 nexmark_query_util.AUCTION_TAG: auction_by_id, 193 nexmark_query_util.BID_TAG: bids_by_auction_id 194 } 195 | beam.CoGroupByKey() 196 | beam.ParDo(JoinAuctionBidFn()))