github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/benchmarks/nexmark/queries/query6.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """
    19  Query 6, 'Average Selling Price by Seller'. Select the average selling price
    20  over the last 10 closed auctions by the same seller. In CQL syntax::
    21  
    22    SELECT Istream(AVG(Q.final), Q.seller)
    23    FROM (SELECT Rstream(MAX(B.price) AS final, A.seller)
    24      FROM Auction A [ROWS UNBOUNDED], Bid B [ROWS UNBOUNDED]
    25      WHERE A.id=B.auction
    26        AND B.datetime < A.expires AND A.expires < CURRENT_TIME
    27      GROUP BY A.id, A.seller) [PARTITION BY A.seller ROWS 10] Q
    28    GROUP BY Q.seller;
    29  """
    30  
    31  import apache_beam as beam
    32  from apache_beam.testing.benchmarks.nexmark.queries import nexmark_query_util
    33  from apache_beam.testing.benchmarks.nexmark.queries import winning_bids
    34  from apache_beam.testing.benchmarks.nexmark.queries.nexmark_query_util import ResultNames
    35  from apache_beam.transforms import trigger
    36  from apache_beam.transforms import window
    37  
    38  
    39  def load(events, metadata=None, pipeline_options=None):
    40    # find winning bids for each closed auction
    41    return (
    42        events
    43        # find winning bids
    44        | beam.Filter(nexmark_query_util.auction_or_bid)
    45        | winning_bids.WinningBids()
    46        # (auction_bids -> (aution.seller, bid)
    47        | beam.Map(lambda auc_bid: (auc_bid.auction.seller, auc_bid.bid))
    48        # calculate and output mean as data arrives
    49        | beam.WindowInto(
    50            window.GlobalWindows(),
    51            trigger=trigger.Repeatedly(trigger.AfterCount(1)),
    52            accumulation_mode=trigger.AccumulationMode.ACCUMULATING,
    53            allowed_lateness=0)
    54        | beam.CombinePerKey(MovingMeanSellingPriceFn(10))
    55        | beam.Map(lambda t: {
    56            ResultNames.SELLER: t[0], ResultNames.PRICE: t[1]
    57        }))
    58  
    59  
    60  class MovingMeanSellingPriceFn(beam.CombineFn):
    61    """
    62    Combiner to keep track of up to max_num_bids of the most recent wining
    63    bids and calculate their average selling price.
    64    """
    65    def __init__(self, max_num_bids):
    66      self.max_num_bids = max_num_bids
    67  
    68    def create_accumulator(self):
    69      return []
    70  
    71    def add_input(self, accumulator, element):
    72      accumulator.append(element)
    73      new_accu = sorted(accumulator, key=lambda bid: (-bid.date_time, -bid.price))
    74      if len(new_accu) > self.max_num_bids:
    75        del new_accu[self.max_num_bids]
    76      return new_accu
    77  
    78    def merge_accumulators(self, accumulators):
    79      new_accu = []
    80      for accumulator in accumulators:
    81        new_accu += accumulator
    82      new_accu.sort(key=lambda bid: (bid.date_time, bid.price))
    83      return new_accu[-self.max_num_bids:]
    84  
    85    def extract_output(self, accumulator):
    86      if len(accumulator) == 0:
    87        return 0
    88      sum_price = sum(bid.price for bid in accumulator)
    89      return int(sum_price / len(accumulator))