github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/tools/distribution_counter_microbenchmark.py (about)

     1  # Licensed to the Apache Software Foundation (ASF) under one or more
     2  # contributor license agreements.  See the NOTICE file distributed with
     3  # this work for additional information regarding copyright ownership.
     4  # The ASF licenses this file to You under the Apache License, Version 2.0
     5  # (the "License"); you may not use this file except in compliance with
     6  # the License.  You may obtain a copy of the License at
     7  #
     8  #    http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  #
    16  
    17  """A microbenchmark for measuring DistributionAccumulator performance
    18  
    19  This runs a sequence of distribution.update for random input value to calculate
    20  average update time per input.
    21  A typical update operation should run into 0.6 microseconds
    22  
    23  Run as
    24    python -m apache_beam.tools.distribution_counter_microbenchmark
    25  """
    26  
    27  # pytype: skip-file
    28  
    29  import logging
    30  import random
    31  import sys
    32  import time
    33  
    34  from apache_beam.tools import utils
    35  
    36  
    37  def generate_input_values(num_input, lower_bound, upper_bound):
    38    values = []
    39    # pylint: disable=unused-variable
    40    for i in range(num_input):
    41      values.append(random.randint(lower_bound, upper_bound))
    42    return values
    43  
    44  
    45  def run_benchmark(num_runs=100, num_input=10000, seed=time.time()):
    46    total_time = 0
    47    random.seed(seed)
    48    lower_bound = 0
    49    upper_bound = sys.maxsize
    50    inputs = generate_input_values(num_input, lower_bound, upper_bound)
    51    from apache_beam.transforms import DataflowDistributionCounter
    52    print("Number of runs:", num_runs)
    53    print("Input size:", num_input)
    54    print("Input sequence from %d to %d" % (lower_bound, upper_bound))
    55    print("Random seed:", seed)
    56    for i in range(num_runs):
    57      counter = DataflowDistributionCounter()
    58      start = time.time()
    59      counter.add_inputs_for_test(inputs)
    60      time_cost = time.time() - start
    61      print("Run %d: Total time cost %g sec" % (i + 1, time_cost))
    62      total_time += time_cost / num_input
    63    print("Per element update time cost:", total_time / num_runs)
    64  
    65  
    66  if __name__ == '__main__':
    67    logging.basicConfig()
    68    utils.check_compiled(
    69        'apache_beam.transforms.cy_dataflow_distribution_counter')
    70    run_benchmark()