github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/resources.py (about) 1 # Licensed to the Apache Software Foundation (ASF) under one or more 2 # contributor license agreements. See the NOTICE file distributed with 3 # this work for additional information regarding copyright ownership. 4 # The ASF licenses this file to You under the Apache License, Version 2.0 5 # (the "License"); you may not use this file except in compliance with 6 # the License. You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 # 16 17 """A module for defining resource requirements for execution of transforms. 18 19 Pipeline authors can use resource hints to provide additional information to 20 runners about the desired aspects of the execution environment. 21 22 Resource hints can be specified on a transform level for parts of the pipeline, 23 or globally via --resource_hint pipeline option. 24 25 See also: PTransforms.with_resource_hints(). 26 """ 27 28 import re 29 from typing import TYPE_CHECKING 30 from typing import Any 31 from typing import Dict 32 from typing import Optional 33 34 from apache_beam.options.pipeline_options import StandardOptions 35 from apache_beam.portability.common_urns import resource_hints 36 37 if TYPE_CHECKING: 38 from typing import Mapping 39 from apache_beam.options.pipeline_options import PipelineOptions 40 41 __all__ = [ 42 'ResourceHint', 43 'AcceleratorHint', 44 'MinRamHint', 45 'merge_resource_hints', 46 'parse_resource_hints', 47 'resource_hints_from_options', 48 ] 49 50 51 class ResourceHint: 52 """A superclass to define resource hints.""" 53 # A unique URN, one per Resource Hint class. 54 urn = None # type: Optional[str] 55 56 _urn_to_known_hints = {} # type: Dict[str, type] 57 _name_to_known_hints = {} # type: Dict[str, type] 58 59 @classmethod 60 def parse(cls, value): # type: (str) -> Dict[str, bytes] 61 """Describes how to parse the hint. 62 Override to specify a custom parsing logic.""" 63 assert cls.urn is not None 64 # Override this method to have a custom parsing logic. 65 return {cls.urn: ResourceHint._parse_str(value)} 66 67 @classmethod 68 def get_merged_value( 69 cls, outer_value, inner_value): # type: (bytes, bytes) -> bytes 70 """Reconciles values of a hint when the hint specified on a transform is 71 also defined in an outer context, for example on a composite transform, or 72 specified in the transform's execution environment. 73 Override to specify a custom merging logic. 74 """ 75 # Defaults to the inner value as it is the most specific one. 76 return inner_value 77 78 @staticmethod 79 def get_by_urn(urn): 80 return ResourceHint._urn_to_known_hints[urn] 81 82 @staticmethod 83 def get_by_name(name): 84 return ResourceHint._name_to_known_hints[name] 85 86 @staticmethod 87 def is_registered(name): 88 return name in ResourceHint._name_to_known_hints 89 90 @staticmethod 91 def register_resource_hint( 92 hint_name, hint_class): # type: (str, type) -> None 93 assert issubclass(hint_class, ResourceHint) 94 assert hint_class.urn is not None 95 ResourceHint._name_to_known_hints[hint_name] = hint_class 96 ResourceHint._urn_to_known_hints[hint_class.urn] = hint_class 97 98 @staticmethod 99 def _parse_str(value): 100 if not isinstance(value, str): 101 raise ValueError("Input must be a string.") 102 return value.encode('ascii') 103 104 @staticmethod 105 def _parse_int(value): 106 if isinstance(value, str): 107 value = int(value) 108 if not isinstance(value, int): 109 raise ValueError("Input must be an integer.") 110 return str(value).encode('ascii') 111 112 @staticmethod 113 def _parse_storage_size_str(value): 114 """Parses a human-friendly storage size string into a number of bytes. 115 """ 116 if isinstance(value, int): 117 return ResourceHint._parse_int(value) 118 119 if not isinstance(value, str): 120 raise ValueError("Input must be a string or integer.") 121 122 value = value.strip().replace(" ", "") 123 units = { 124 'PiB': 2**50, 125 'TiB': 2**40, 126 'GiB': 2**30, 127 'MiB': 2**20, 128 'KiB': 2**10, 129 'PB': 10**15, 130 'TB': 10**12, 131 'GB': 10**9, 132 'MB': 10**6, 133 'KB': 10**3, 134 'B': 1, 135 } 136 match = re.match(r'.*?(\D+)$', value) 137 if not match: 138 raise ValueError("Unrecognized value pattern.") 139 140 suffix = match.group(1) 141 if suffix not in units: 142 raise ValueError("Unrecognized unit.") 143 multiplier = units[suffix] 144 value = value[:-len(suffix)] 145 146 return str(round(float(value) * multiplier)).encode('ascii') 147 148 @staticmethod 149 def _use_max(v1, v2): 150 return str(max(int(v1), int(v2))).encode('ascii') 151 152 153 class AcceleratorHint(ResourceHint): 154 """Describes desired hardware accelerators in execution environment.""" 155 urn = resource_hints.ACCELERATOR.urn 156 157 158 ResourceHint.register_resource_hint('accelerator', AcceleratorHint) 159 160 161 class MinRamHint(ResourceHint): 162 """Describes min RAM requirements for transform's execution environment.""" 163 urn = resource_hints.MIN_RAM_BYTES.urn 164 165 @classmethod 166 def parse(cls, value): # type: (str) -> Dict[str, bytes] 167 return {cls.urn: ResourceHint._parse_storage_size_str(value)} 168 169 @classmethod 170 def get_merged_value( 171 cls, outer_value, inner_value): # type: (bytes, bytes) -> bytes 172 return ResourceHint._use_max(outer_value, inner_value) 173 174 175 ResourceHint.register_resource_hint('min_ram', MinRamHint) 176 # Alias for interoperability with SDKs preferring camelCase. 177 ResourceHint.register_resource_hint('minRam', MinRamHint) 178 179 180 def parse_resource_hints(hints): # type: (Dict[Any, Any]) -> Dict[str, bytes] 181 parsed_hints = {} 182 for hint, value in hints.items(): 183 try: 184 hint_cls = ResourceHint.get_by_name(hint) 185 try: 186 parsed_hints.update(hint_cls.parse(value)) 187 except ValueError: 188 raise ValueError(f"Resource hint {hint} has invalid value {value}.") 189 except KeyError: 190 raise ValueError(f"Unknown resource hint: {hint}.") 191 192 return parsed_hints 193 194 195 def resource_hints_from_options(options): 196 # type: (Optional[PipelineOptions]) -> Dict[str, bytes] 197 if options is None: 198 return {} 199 hints = {} 200 option_specified_hints = options.view_as(StandardOptions).resource_hints 201 for hint in option_specified_hints: 202 if '=' in hint: 203 k, v = hint.split('=', maxsplit=1) 204 hints[k] = v 205 else: 206 hints[hint] = None 207 208 return parse_resource_hints(hints) 209 210 211 def merge_resource_hints( 212 outer_hints, inner_hints 213 ): # type: (Mapping[str, bytes], Mapping[str, bytes]) -> Dict[str, bytes] 214 merged_hints = dict(inner_hints) 215 for urn, outer_value in outer_hints.items(): 216 if urn in inner_hints: 217 merged_value = ResourceHint.get_by_urn(urn).get_merged_value( 218 outer_value=outer_value, inner_value=inner_hints[urn]) 219 else: 220 merged_value = outer_value 221 merged_hints[urn] = merged_value 222 return merged_hints