github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/resources.py (about)

     1  # Licensed to the Apache Software Foundation (ASF) under one or more
     2  # contributor license agreements.  See the NOTICE file distributed with
     3  # this work for additional information regarding copyright ownership.
     4  # The ASF licenses this file to You under the Apache License, Version 2.0
     5  # (the "License"); you may not use this file except in compliance with
     6  # the License.  You may obtain a copy of the License at
     7  #
     8  #    http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  #
    16  
    17  """A module for defining resource requirements for execution of transforms.
    18  
    19  Pipeline authors can use resource hints to provide additional information to
    20  runners about the desired aspects of the execution environment.
    21  
    22  Resource hints can be specified on a transform level for parts of the pipeline,
    23  or globally via --resource_hint pipeline option.
    24  
    25  See also: PTransforms.with_resource_hints().
    26  """
    27  
    28  import re
    29  from typing import TYPE_CHECKING
    30  from typing import Any
    31  from typing import Dict
    32  from typing import Optional
    33  
    34  from apache_beam.options.pipeline_options import StandardOptions
    35  from apache_beam.portability.common_urns import resource_hints
    36  
    37  if TYPE_CHECKING:
    38    from typing import Mapping
    39    from apache_beam.options.pipeline_options import PipelineOptions
    40  
    41  __all__ = [
    42      'ResourceHint',
    43      'AcceleratorHint',
    44      'MinRamHint',
    45      'merge_resource_hints',
    46      'parse_resource_hints',
    47      'resource_hints_from_options',
    48  ]
    49  
    50  
    51  class ResourceHint:
    52    """A superclass to define resource hints."""
    53    # A unique URN, one per Resource Hint class.
    54    urn = None  # type: Optional[str]
    55  
    56    _urn_to_known_hints = {}  # type: Dict[str, type]
    57    _name_to_known_hints = {}  # type: Dict[str, type]
    58  
    59    @classmethod
    60    def parse(cls, value):  # type: (str) -> Dict[str, bytes]
    61      """Describes how to parse the hint.
    62      Override to specify a custom parsing logic."""
    63      assert cls.urn is not None
    64      # Override this method to have a custom parsing logic.
    65      return {cls.urn: ResourceHint._parse_str(value)}
    66  
    67    @classmethod
    68    def get_merged_value(
    69        cls, outer_value, inner_value):  # type: (bytes, bytes) -> bytes
    70      """Reconciles values of a hint when the hint specified on a transform is
    71      also defined in an outer context, for example on a composite transform, or
    72      specified in the transform's execution environment.
    73      Override to specify a custom merging logic.
    74      """
    75      # Defaults to the inner value as it is the most specific one.
    76      return inner_value
    77  
    78    @staticmethod
    79    def get_by_urn(urn):
    80      return ResourceHint._urn_to_known_hints[urn]
    81  
    82    @staticmethod
    83    def get_by_name(name):
    84      return ResourceHint._name_to_known_hints[name]
    85  
    86    @staticmethod
    87    def is_registered(name):
    88      return name in ResourceHint._name_to_known_hints
    89  
    90    @staticmethod
    91    def register_resource_hint(
    92        hint_name, hint_class):  # type: (str, type) -> None
    93      assert issubclass(hint_class, ResourceHint)
    94      assert hint_class.urn is not None
    95      ResourceHint._name_to_known_hints[hint_name] = hint_class
    96      ResourceHint._urn_to_known_hints[hint_class.urn] = hint_class
    97  
    98    @staticmethod
    99    def _parse_str(value):
   100      if not isinstance(value, str):
   101        raise ValueError("Input must be a string.")
   102      return value.encode('ascii')
   103  
   104    @staticmethod
   105    def _parse_int(value):
   106      if isinstance(value, str):
   107        value = int(value)
   108      if not isinstance(value, int):
   109        raise ValueError("Input must be an integer.")
   110      return str(value).encode('ascii')
   111  
   112    @staticmethod
   113    def _parse_storage_size_str(value):
   114      """Parses a human-friendly storage size string into a number of bytes.
   115      """
   116      if isinstance(value, int):
   117        return ResourceHint._parse_int(value)
   118  
   119      if not isinstance(value, str):
   120        raise ValueError("Input must be a string or integer.")
   121  
   122      value = value.strip().replace(" ", "")
   123      units = {
   124          'PiB': 2**50,
   125          'TiB': 2**40,
   126          'GiB': 2**30,
   127          'MiB': 2**20,
   128          'KiB': 2**10,
   129          'PB': 10**15,
   130          'TB': 10**12,
   131          'GB': 10**9,
   132          'MB': 10**6,
   133          'KB': 10**3,
   134          'B': 1,
   135      }
   136      match = re.match(r'.*?(\D+)$', value)
   137      if not match:
   138        raise ValueError("Unrecognized value pattern.")
   139  
   140      suffix = match.group(1)
   141      if suffix not in units:
   142        raise ValueError("Unrecognized unit.")
   143      multiplier = units[suffix]
   144      value = value[:-len(suffix)]
   145  
   146      return str(round(float(value) * multiplier)).encode('ascii')
   147  
   148    @staticmethod
   149    def _use_max(v1, v2):
   150      return str(max(int(v1), int(v2))).encode('ascii')
   151  
   152  
   153  class AcceleratorHint(ResourceHint):
   154    """Describes desired hardware accelerators in execution environment."""
   155    urn = resource_hints.ACCELERATOR.urn
   156  
   157  
   158  ResourceHint.register_resource_hint('accelerator', AcceleratorHint)
   159  
   160  
   161  class MinRamHint(ResourceHint):
   162    """Describes min RAM requirements for transform's execution environment."""
   163    urn = resource_hints.MIN_RAM_BYTES.urn
   164  
   165    @classmethod
   166    def parse(cls, value):  # type: (str) -> Dict[str, bytes]
   167      return {cls.urn: ResourceHint._parse_storage_size_str(value)}
   168  
   169    @classmethod
   170    def get_merged_value(
   171        cls, outer_value, inner_value):  # type: (bytes, bytes) -> bytes
   172      return ResourceHint._use_max(outer_value, inner_value)
   173  
   174  
   175  ResourceHint.register_resource_hint('min_ram', MinRamHint)
   176  # Alias for interoperability with SDKs preferring camelCase.
   177  ResourceHint.register_resource_hint('minRam', MinRamHint)
   178  
   179  
   180  def parse_resource_hints(hints):  # type: (Dict[Any, Any]) -> Dict[str, bytes]
   181    parsed_hints = {}
   182    for hint, value in hints.items():
   183      try:
   184        hint_cls = ResourceHint.get_by_name(hint)
   185        try:
   186          parsed_hints.update(hint_cls.parse(value))
   187        except ValueError:
   188          raise ValueError(f"Resource hint {hint} has invalid value {value}.")
   189      except KeyError:
   190        raise ValueError(f"Unknown resource hint: {hint}.")
   191  
   192    return parsed_hints
   193  
   194  
   195  def resource_hints_from_options(options):
   196    # type: (Optional[PipelineOptions]) -> Dict[str, bytes]
   197    if options is None:
   198      return {}
   199    hints = {}
   200    option_specified_hints = options.view_as(StandardOptions).resource_hints
   201    for hint in option_specified_hints:
   202      if '=' in hint:
   203        k, v = hint.split('=', maxsplit=1)
   204        hints[k] = v
   205      else:
   206        hints[hint] = None
   207  
   208    return parse_resource_hints(hints)
   209  
   210  
   211  def merge_resource_hints(
   212      outer_hints, inner_hints
   213  ):  # type: (Mapping[str, bytes], Mapping[str, bytes]) -> Dict[str, bytes]
   214    merged_hints = dict(inner_hints)
   215    for urn, outer_value in outer_hints.items():
   216      if urn in inner_hints:
   217        merged_value = ResourceHint.get_by_urn(urn).get_merged_value(
   218            outer_value=outer_value, inner_value=inner_hints[urn])
   219      else:
   220        merged_value = outer_value
   221      merged_hints[urn] = merged_value
   222    return merged_hints