github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/util.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Utility functions used throughout the package.
    19  
    20  For internal use only. No backwards compatibility guarantees.
    21  """
    22  
    23  # pytype: skip-file
    24  
    25  import logging
    26  import threading
    27  import weakref
    28  from multiprocessing.pool import ThreadPool
    29  from typing import Any
    30  from typing import Dict
    31  from typing import Iterable
    32  from typing import List
    33  from typing import Tuple
    34  from typing import Type
    35  from typing import TypeVar
    36  from typing import Union
    37  
    38  T = TypeVar('T')
    39  
    40  
    41  class ArgumentPlaceholder(object):
    42    """For internal use only; no backwards-compatibility guarantees.
    43  
    44    A place holder object replacing PValues in argument lists.
    45  
    46    A Fn object can take any number of "side inputs", which are PValues that will
    47    be evaluated during pipeline execution and will be provided to the function
    48    at the moment of its execution as positional or keyword arguments.
    49  
    50    This is used only internally and should never be used by user code. A custom
    51    Fn object by the time it executes will have such values replaced with real
    52    computed values.
    53    """
    54    def __eq__(self, other):
    55      """Tests for equality of two placeholder objects.
    56  
    57      Args:
    58        other: Another placeholder object to compare to.
    59  
    60      This method is used only for test code. All placeholder objects are
    61      equal to each other.
    62      """
    63      return isinstance(other, ArgumentPlaceholder)
    64  
    65    def __hash__(self):
    66      return hash(type(self))
    67  
    68  
    69  def remove_objects_from_args(args,  # type: Iterable[Any]
    70                               kwargs,  # type: Dict[str, Any]
    71                               pvalue_class  # type: Union[Type[T], Tuple[Type[T], ...]]
    72                              ):
    73    # type: (...) -> Tuple[List[Any], Dict[str, Any], List[T]]
    74  
    75    """For internal use only; no backwards-compatibility guarantees.
    76  
    77    Replaces all objects of a given type in args/kwargs with a placeholder.
    78  
    79    Args:
    80      args: A list of positional arguments.
    81      kwargs: A dictionary of keyword arguments.
    82      pvalue_class: A class object representing the types of arguments that must
    83        be replaced with a placeholder value (instance of ArgumentPlaceholder).
    84  
    85    Returns:
    86      A 3-tuple containing a modified list of positional arguments, a modified
    87      dictionary of keyword arguments, and a list of all objects replaced with
    88      a placeholder value.
    89    """
    90    pvals = []
    91  
    92    def swapper(value):
    93      pvals.append(value)
    94      return ArgumentPlaceholder()
    95  
    96    new_args = [swapper(v) if isinstance(v, pvalue_class) else v for v in args]
    97    # Make sure the order in which we process the dictionary keys is predictable
    98    # by sorting the entries first. This will be important when putting back
    99    # PValues.
   100    new_kwargs = dict((k, swapper(v)) if isinstance(v, pvalue_class) else (k, v)
   101                      for k,
   102                      v in sorted(kwargs.items()))
   103    return (new_args, new_kwargs, pvals)
   104  
   105  
   106  def insert_values_in_args(args, kwargs, values):
   107    """For internal use only; no backwards-compatibility guarantees.
   108  
   109    Replaces all placeholders in args/kwargs with actual values.
   110  
   111    Args:
   112      args: A list of positional arguments.
   113      kwargs: A dictionary of keyword arguments.
   114      values: A list of values that will be used to replace placeholder values.
   115  
   116    Returns:
   117      A 2-tuple containing a modified list of positional arguments, and a
   118      modified dictionary of keyword arguments.
   119    """
   120    # Use a local iterator so that we don't modify values.
   121    v_iter = iter(values)
   122    new_args = [
   123        next(v_iter) if isinstance(arg, ArgumentPlaceholder) else arg
   124        for arg in args
   125    ]
   126    new_kwargs = dict(
   127        (k, next(v_iter)) if isinstance(v, ArgumentPlaceholder) else (k, v) for k,
   128        v in sorted(kwargs.items()))
   129    return (new_args, new_kwargs)
   130  
   131  
   132  def run_using_threadpool(fn_to_execute, inputs, pool_size):
   133    """For internal use only; no backwards-compatibility guarantees.
   134  
   135    Runs the given function on given inputs using a thread pool.
   136  
   137    Args:
   138      fn_to_execute: Function to execute
   139      inputs: Inputs on which given function will be executed in parallel.
   140      pool_size: Size of thread pool.
   141    Returns:
   142      Results retrieved after executing the given function on given inputs.
   143    """
   144  
   145    # ThreadPool crashes in old versions of Python (< 2.7.5) if created
   146    # from a child thread. (http://bugs.python.org/issue10015)
   147    if not hasattr(threading.current_thread(), '_children'):
   148      threading.current_thread()._children = weakref.WeakKeyDictionary()
   149    pool = ThreadPool(min(pool_size, len(inputs)))
   150    try:
   151      # We record and reset logging level here since 'apitools' library Beam
   152      # depends on updates the logging level when used with a threadpool -
   153      # https://github.com/google/apitools/issues/141
   154      # TODO: Remove this once above issue in 'apitools' is fixed.
   155      old_level = logging.getLogger().level
   156      return pool.map(fn_to_execute, inputs)
   157    finally:
   158      pool.terminate()
   159      logging.getLogger().setLevel(old_level)