github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/benchmarks/nexmark/queries/query4.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 Query 4, 'Average Price for a Category'. Select the average of the wining bid 20 prices for all closed auctions in each category. In CQL syntax:: 21 22 SELECT Istream(AVG(Q.final)) 23 FROM Category C, (SELECT Rstream(MAX(B.price) AS final, A.category) 24 FROM Auction A [ROWS UNBOUNDED], Bid B [ROWS UNBOUNDED] 25 WHERE A.id=B.auction 26 AND B.datetime < A.expires AND A.expires < CURRENT_TIME 27 GROUP BY A.id, A.category) Q 28 WHERE Q.category = C.id 29 GROUP BY C.id; 30 31 For extra spiciness our implementation differs slightly from the above: 32 33 * We select both the average winning price and the category. 34 * We don't bother joining with a static category table, since it's 35 contents are never used. 36 * We only consider bids which are above the auction's reserve price. 37 * We accept the highest-price, earliest valid bid as the winner. 38 * We calculate the averages oven a sliding window of size 39 window_size_sec and period window_period_sec. 40 """ 41 42 import apache_beam as beam 43 from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util 44 from apache_beam.testing.benchmarks.nexmark.queries import winning_bids 45 from apache_beam.testing.benchmarks.nexmark.queries.nexmark_query_util import ResultNames 46 from apache_beam.transforms import window 47 48 49 def load(events, metadata=None, pipeline_options=None): 50 # find winning bids for each closed auction 51 all_winning_bids = ( 52 events 53 | beam.Filter(nexmark_query_util.auction_or_bid) 54 | winning_bids.WinningBids()) 55 return ( 56 all_winning_bids 57 # key winning bids by auction category 58 | beam.Map(lambda auc_bid: (auc_bid.auction.category, auc_bid.bid.price)) 59 # re-window for sliding average 60 | beam.WindowInto( 61 window.SlidingWindows( 62 metadata.get('window_size_sec'), 63 metadata.get('window_period_sec'))) 64 # average for each category 65 | beam.CombinePerKey(beam.combiners.MeanCombineFn()) 66 # TODO(leiyiz): fanout with sliding window produces duplicated results, 67 # uncomment after it is fixed 68 # [https://github.com/apache/beam/issues/20528] 69 # .with_hot_key_fanout(metadata.get('fanout')) 70 # produce output 71 | beam.ParDo(ProjectToCategoryPriceFn())) 72 73 74 class ProjectToCategoryPriceFn(beam.DoFn): 75 def process(self, element, pane_info=beam.DoFn.PaneInfoParam): 76 yield { 77 ResultNames.CATEGORY: element[0], 78 ResultNames.PRICE: element[1], 79 ResultNames.IS_LAST: pane_info.is_last 80 }