github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/tools/distribution_counter_microbenchmark.py (about) 1 # Licensed to the Apache Software Foundation (ASF) under one or more 2 # contributor license agreements. See the NOTICE file distributed with 3 # this work for additional information regarding copyright ownership. 4 # The ASF licenses this file to You under the Apache License, Version 2.0 5 # (the "License"); you may not use this file except in compliance with 6 # the License. You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 # 16 17 """A microbenchmark for measuring DistributionAccumulator performance 18 19 This runs a sequence of distribution.update for random input value to calculate 20 average update time per input. 21 A typical update operation should run into 0.6 microseconds 22 23 Run as 24 python -m apache_beam.tools.distribution_counter_microbenchmark 25 """ 26 27 # pytype: skip-file 28 29 import logging 30 import random 31 import sys 32 import time 33 34 from apache_beam.tools import utils 35 36 37 def generate_input_values(num_input, lower_bound, upper_bound): 38 values = [] 39 # pylint: disable=unused-variable 40 for i in range(num_input): 41 values.append(random.randint(lower_bound, upper_bound)) 42 return values 43 44 45 def run_benchmark(num_runs=100, num_input=10000, seed=time.time()): 46 total_time = 0 47 random.seed(seed) 48 lower_bound = 0 49 upper_bound = sys.maxsize 50 inputs = generate_input_values(num_input, lower_bound, upper_bound) 51 from apache_beam.transforms import DataflowDistributionCounter 52 print("Number of runs:", num_runs) 53 print("Input size:", num_input) 54 print("Input sequence from %d to %d" % (lower_bound, upper_bound)) 55 print("Random seed:", seed) 56 for i in range(num_runs): 57 counter = DataflowDistributionCounter() 58 start = time.time() 59 counter.add_inputs_for_test(inputs) 60 time_cost = time.time() - start 61 print("Run %d: Total time cost %g sec" % (i + 1, time_cost)) 62 total_time += time_cost / num_input 63 print("Per element update time cost:", total_time / num_runs) 64 65 66 if __name__ == '__main__': 67 logging.basicConfig() 68 utils.check_compiled( 69 'apache_beam.transforms.cy_dataflow_distribution_counter') 70 run_benchmark()