github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/benchmarks/nexmark/queries/query6.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 Query 6, 'Average Selling Price by Seller'. Select the average selling price 20 over the last 10 closed auctions by the same seller. In CQL syntax:: 21 22 SELECT Istream(AVG(Q.final), Q.seller) 23 FROM (SELECT Rstream(MAX(B.price) AS final, A.seller) 24 FROM Auction A [ROWS UNBOUNDED], Bid B [ROWS UNBOUNDED] 25 WHERE A.id=B.auction 26 AND B.datetime < A.expires AND A.expires < CURRENT_TIME 27 GROUP BY A.id, A.seller) [PARTITION BY A.seller ROWS 10] Q 28 GROUP BY Q.seller; 29 """ 30 31 import apache_beam as beam 32 from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util 33 from apache_beam.testing.benchmarks.nexmark.queries import winning_bids 34 from apache_beam.testing.benchmarks.nexmark.queries.nexmark_query_util import ResultNames 35 from apache_beam.transforms import trigger 36 from apache_beam.transforms import window 37 38 39 def load(events, metadata=None, pipeline_options=None): 40 # find winning bids for each closed auction 41 return ( 42 events 43 # find winning bids 44 | beam.Filter(nexmark_query_util.auction_or_bid) 45 | winning_bids.WinningBids() 46 # (auction_bids -> (aution.seller, bid) 47 | beam.Map(lambda auc_bid: (auc_bid.auction.seller, auc_bid.bid)) 48 # calculate and output mean as data arrives 49 | beam.WindowInto( 50 window.GlobalWindows(), 51 trigger=trigger.Repeatedly(trigger.AfterCount(1)), 52 accumulation_mode=trigger.AccumulationMode.ACCUMULATING, 53 allowed_lateness=0) 54 | beam.CombinePerKey(MovingMeanSellingPriceFn(10)) 55 | beam.Map(lambda t: { 56 ResultNames.SELLER: t[0], ResultNames.PRICE: t[1] 57 })) 58 59 60 class MovingMeanSellingPriceFn(beam.CombineFn): 61 """ 62 Combiner to keep track of up to max_num_bids of the most recent wining 63 bids and calculate their average selling price. 64 """ 65 def __init__(self, max_num_bids): 66 self.max_num_bids = max_num_bids 67 68 def create_accumulator(self): 69 return [] 70 71 def add_input(self, accumulator, element): 72 accumulator.append(element) 73 new_accu = sorted(accumulator, key=lambda bid: (-bid.date_time, -bid.price)) 74 if len(new_accu) > self.max_num_bids: 75 del new_accu[self.max_num_bids] 76 return new_accu 77 78 def merge_accumulators(self, accumulators): 79 new_accu = [] 80 for accumulator in accumulators: 81 new_accu += accumulator 82 new_accu.sort(key=lambda bid: (bid.date_time, bid.price)) 83 return new_accu[-self.max_num_bids:] 84 85 def extract_output(self, accumulator): 86 if len(accumulator) == 0: 87 return 0 88 sum_price = sum(bid.price for bid in accumulator) 89 return int(sum_price / len(accumulator))