github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/util.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Utility functions used throughout the package. 19 20 For internal use only. No backwards compatibility guarantees. 21 """ 22 23 # pytype: skip-file 24 25 import logging 26 import threading 27 import weakref 28 from multiprocessing.pool import ThreadPool 29 from typing import Any 30 from typing import Dict 31 from typing import Iterable 32 from typing import List 33 from typing import Tuple 34 from typing import Type 35 from typing import TypeVar 36 from typing import Union 37 38 T = TypeVar('T') 39 40 41 class ArgumentPlaceholder(object): 42 """For internal use only; no backwards-compatibility guarantees. 43 44 A place holder object replacing PValues in argument lists. 45 46 A Fn object can take any number of "side inputs", which are PValues that will 47 be evaluated during pipeline execution and will be provided to the function 48 at the moment of its execution as positional or keyword arguments. 49 50 This is used only internally and should never be used by user code. A custom 51 Fn object by the time it executes will have such values replaced with real 52 computed values. 53 """ 54 def __eq__(self, other): 55 """Tests for equality of two placeholder objects. 56 57 Args: 58 other: Another placeholder object to compare to. 59 60 This method is used only for test code. All placeholder objects are 61 equal to each other. 62 """ 63 return isinstance(other, ArgumentPlaceholder) 64 65 def __hash__(self): 66 return hash(type(self)) 67 68 69 def remove_objects_from_args(args, # type: Iterable[Any] 70 kwargs, # type: Dict[str, Any] 71 pvalue_class # type: Union[Type[T], Tuple[Type[T], ...]] 72 ): 73 # type: (...) -> Tuple[List[Any], Dict[str, Any], List[T]] 74 75 """For internal use only; no backwards-compatibility guarantees. 76 77 Replaces all objects of a given type in args/kwargs with a placeholder. 78 79 Args: 80 args: A list of positional arguments. 81 kwargs: A dictionary of keyword arguments. 82 pvalue_class: A class object representing the types of arguments that must 83 be replaced with a placeholder value (instance of ArgumentPlaceholder). 84 85 Returns: 86 A 3-tuple containing a modified list of positional arguments, a modified 87 dictionary of keyword arguments, and a list of all objects replaced with 88 a placeholder value. 89 """ 90 pvals = [] 91 92 def swapper(value): 93 pvals.append(value) 94 return ArgumentPlaceholder() 95 96 new_args = [swapper(v) if isinstance(v, pvalue_class) else v for v in args] 97 # Make sure the order in which we process the dictionary keys is predictable 98 # by sorting the entries first. This will be important when putting back 99 # PValues. 100 new_kwargs = dict((k, swapper(v)) if isinstance(v, pvalue_class) else (k, v) 101 for k, 102 v in sorted(kwargs.items())) 103 return (new_args, new_kwargs, pvals) 104 105 106 def insert_values_in_args(args, kwargs, values): 107 """For internal use only; no backwards-compatibility guarantees. 108 109 Replaces all placeholders in args/kwargs with actual values. 110 111 Args: 112 args: A list of positional arguments. 113 kwargs: A dictionary of keyword arguments. 114 values: A list of values that will be used to replace placeholder values. 115 116 Returns: 117 A 2-tuple containing a modified list of positional arguments, and a 118 modified dictionary of keyword arguments. 119 """ 120 # Use a local iterator so that we don't modify values. 121 v_iter = iter(values) 122 new_args = [ 123 next(v_iter) if isinstance(arg, ArgumentPlaceholder) else arg 124 for arg in args 125 ] 126 new_kwargs = dict( 127 (k, next(v_iter)) if isinstance(v, ArgumentPlaceholder) else (k, v) for k, 128 v in sorted(kwargs.items())) 129 return (new_args, new_kwargs) 130 131 132 def run_using_threadpool(fn_to_execute, inputs, pool_size): 133 """For internal use only; no backwards-compatibility guarantees. 134 135 Runs the given function on given inputs using a thread pool. 136 137 Args: 138 fn_to_execute: Function to execute 139 inputs: Inputs on which given function will be executed in parallel. 140 pool_size: Size of thread pool. 141 Returns: 142 Results retrieved after executing the given function on given inputs. 143 """ 144 145 # ThreadPool crashes in old versions of Python (< 2.7.5) if created 146 # from a child thread. (http://bugs.python.org/issue10015) 147 if not hasattr(threading.current_thread(), '_children'): 148 threading.current_thread()._children = weakref.WeakKeyDictionary() 149 pool = ThreadPool(min(pool_size, len(inputs))) 150 try: 151 # We record and reset logging level here since 'apitools' library Beam 152 # depends on updates the logging level when used with a threadpool - 153 # https://github.com/google/apitools/issues/141 154 # TODO: Remove this once above issue in 'apitools' is fixed. 155 old_level = logging.getLogger().level 156 return pool.map(fn_to_execute, inputs) 157 finally: 158 pool.terminate() 159 logging.getLogger().setLevel(old_level)