github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/transforms/display.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 :class:`DisplayData`, its classes, interfaces and methods. 20 21 The classes in this module allow users and transform developers to define 22 static display data to be displayed when a pipeline runs. 23 :class:`~apache_beam.transforms.ptransform.PTransform` s, 24 :class:`~apache_beam.transforms.core.DoFn` s 25 and other pipeline components are subclasses of the :class:`HasDisplayData` 26 mixin. To add static display data to a component, you can override the 27 :meth:`HasDisplayData.display_data()` method. 28 29 Available classes: 30 31 * :class:`HasDisplayData` - Components that inherit from this class can have 32 static display data shown in the UI. 33 * :class:`DisplayDataItem` - This class represents static display data 34 elements. 35 * :class:`DisplayData` - Internal class that is used to create display data 36 and communicate it to the API. 37 """ 38 39 # pytype: skip-file 40 41 import calendar 42 import inspect 43 import json 44 from datetime import datetime 45 from datetime import timedelta 46 from typing import TYPE_CHECKING 47 from typing import List 48 49 from apache_beam.portability import common_urns 50 from apache_beam.portability.api import beam_runner_api_pb2 51 52 if TYPE_CHECKING: 53 from apache_beam.options.pipeline_options import PipelineOptions 54 55 __all__ = ['HasDisplayData', 'DisplayDataItem', 'DisplayData'] 56 57 58 class HasDisplayData(object): 59 """ Basic mixin for elements that contain display data. 60 61 It implements only the display_data method and a 62 _get_display_data_namespace method. 63 """ 64 def display_data(self): 65 # type: () -> dict 66 67 """ Returns the display data associated to a pipeline component. 68 69 It should be reimplemented in pipeline components that wish to have 70 static display data. 71 72 Returns: 73 Dict[str, Any]: A dictionary containing ``key:value`` pairs. 74 The value might be an integer, float or string value; a 75 :class:`DisplayDataItem` for values that have more data 76 (e.g. short value, label, url); or a :class:`HasDisplayData` instance 77 that has more display data that should be picked up. For example:: 78 79 { 80 'key1': 'string_value', 81 'key2': 1234, 82 'key3': 3.14159265, 83 'key4': DisplayDataItem('apache.org', url='http://apache.org'), 84 'key5': subComponent 85 } 86 """ 87 return {} 88 89 def _get_display_data_namespace(self): 90 # type: () -> str 91 return '{}.{}'.format(self.__module__, self.__class__.__name__) 92 93 94 class DisplayData(object): 95 """ Static display data associated with a pipeline component. 96 """ 97 def __init__( 98 self, 99 namespace, # type: str 100 display_data_dict # type: dict 101 ): 102 # type: (...) -> None 103 self.namespace = namespace 104 self.items = [] # type: List[DisplayDataItem] 105 self._populate_items(display_data_dict) 106 107 def _populate_items(self, display_data_dict): 108 """ Populates the list of display data items. 109 """ 110 for key, element in display_data_dict.items(): 111 if isinstance(element, HasDisplayData): 112 subcomponent_display_data = DisplayData( 113 element._get_display_data_namespace(), element.display_data()) 114 self.items += subcomponent_display_data.items 115 continue 116 117 if isinstance(element, DisplayDataItem): 118 if element.should_drop(): 119 continue 120 element.key = key 121 element.namespace = self.namespace 122 self.items.append(element) 123 continue 124 125 # If it's not a HasDisplayData element, 126 # nor a dictionary, then it's a simple value 127 self.items.append( 128 DisplayDataItem(element, namespace=self.namespace, key=key)) 129 130 def to_proto(self): 131 # type: (...) -> List[beam_runner_api_pb2.DisplayData] 132 133 """Returns a List of Beam proto representation of Display data.""" 134 def create_payload(dd): 135 display_data_dict = None 136 try: 137 display_data_dict = dd.get_dict() 138 except ValueError: 139 # Skip if the display data is invalid. 140 return None 141 142 # We use 'label' or 'key' properties to populate the 'label' attribute of 143 # 'LabelledPayload'. 'label' is a better choice since it's expected to be 144 # more human readable but some transforms, sources, etc. may not set a 145 # 'label' property when configuring DisplayData. 146 label = ( 147 display_data_dict['label'] 148 if 'label' in display_data_dict else display_data_dict['key']) 149 150 value = display_data_dict['value'] 151 if isinstance(value, str): 152 return beam_runner_api_pb2.LabelledPayload( 153 label=label, 154 string_value=value, 155 key=display_data_dict['key'], 156 namespace=display_data_dict.get('namespace', '')) 157 elif isinstance(value, bool): 158 return beam_runner_api_pb2.LabelledPayload( 159 label=label, 160 bool_value=value, 161 key=display_data_dict['key'], 162 namespace=display_data_dict.get('namespace', '')) 163 elif isinstance(value, int): 164 return beam_runner_api_pb2.LabelledPayload( 165 label=label, 166 int_value=value, 167 key=display_data_dict['key'], 168 namespace=display_data_dict.get('namespace', '')) 169 elif isinstance(value, (float, complex)): 170 return beam_runner_api_pb2.LabelledPayload( 171 label=label, 172 double_value=value, 173 key=display_data_dict['key'], 174 namespace=display_data_dict.get('namespace', '')) 175 else: 176 raise ValueError( 177 'Unsupported type %s for value of display data %s' % 178 (type(value), label)) 179 180 dd_protos = [] 181 for dd in self.items: 182 dd_proto = create_payload(dd) 183 if dd_proto: 184 dd_protos.append( 185 beam_runner_api_pb2.DisplayData( 186 urn=common_urns.StandardDisplayData.DisplayData.LABELLED.urn, 187 payload=create_payload(dd).SerializeToString())) 188 return dd_protos 189 190 @classmethod 191 def create_from_options(cls, pipeline_options): 192 """ Creates :class:`~apache_beam.transforms.display.DisplayData` from a 193 :class:`~apache_beam.options.pipeline_options.PipelineOptions` instance. 194 195 When creating :class:`~apache_beam.transforms.display.DisplayData`, this 196 method will convert the value of any item of a non-supported type to its 197 string representation. 198 The normal :meth:`.create_from()` method rejects those items. 199 200 Returns: 201 ~apache_beam.transforms.display.DisplayData: 202 A :class:`~apache_beam.transforms.display.DisplayData` instance with 203 populated items. 204 205 Raises: 206 ValueError: If the **has_display_data** argument is 207 not an instance of :class:`HasDisplayData`. 208 """ 209 from apache_beam.options.pipeline_options import PipelineOptions 210 if not isinstance(pipeline_options, PipelineOptions): 211 raise ValueError( 212 'Element of class {}.{} does not subclass PipelineOptions'.format( 213 pipeline_options.__module__, pipeline_options.__class__.__name__)) 214 215 items = { 216 k: (v if DisplayDataItem._get_value_type(v) is not None else str(v)) 217 for k, 218 v in pipeline_options.display_data().items() 219 } 220 return cls(pipeline_options._get_display_data_namespace(), items) 221 222 @classmethod 223 def create_from(cls, has_display_data): 224 """ Creates :class:`~apache_beam.transforms.display.DisplayData` from a 225 :class:`HasDisplayData` instance. 226 227 Returns: 228 ~apache_beam.transforms.display.DisplayData: 229 A :class:`~apache_beam.transforms.display.DisplayData` instance with 230 populated items. 231 232 Raises: 233 ValueError: If the **has_display_data** argument is 234 not an instance of :class:`HasDisplayData`. 235 """ 236 if not isinstance(has_display_data, HasDisplayData): 237 raise ValueError( 238 'Element of class {}.{} does not subclass HasDisplayData'.format( 239 has_display_data.__module__, has_display_data.__class__.__name__)) 240 return cls( 241 has_display_data._get_display_data_namespace(), 242 has_display_data.display_data()) 243 244 245 class DisplayDataItem(object): 246 """ A DisplayDataItem represents a unit of static display data. 247 248 Each item is identified by a key and the namespace of the component the 249 display item belongs to. 250 """ 251 typeDict = { 252 str: 'STRING', 253 int: 'INTEGER', 254 float: 'FLOAT', 255 bool: 'BOOLEAN', 256 timedelta: 'DURATION', 257 datetime: 'TIMESTAMP' 258 } 259 260 def __init__( 261 self, 262 value, 263 url=None, 264 label=None, 265 namespace=None, 266 key=None, 267 shortValue=None): 268 self.namespace = namespace 269 self.key = key 270 self.type = self._get_value_type(value) 271 self.shortValue = ( 272 shortValue if shortValue is not None else self._get_short_value( 273 value, self.type)) 274 self.value = value 275 self.url = url 276 self.label = label 277 self._drop_if_none = False 278 self._drop_if_default = False 279 280 def drop_if_none(self): 281 # type: () -> DisplayDataItem 282 283 """ The item should be dropped if its value is None. 284 285 Returns: 286 Returns self. 287 """ 288 self._drop_if_none = True 289 return self 290 291 def drop_if_default(self, default): 292 # type: (...) -> DisplayDataItem 293 294 """ The item should be dropped if its value is equal to its default. 295 296 Returns: 297 Returns self. 298 """ 299 self._default = default 300 self._drop_if_default = True 301 return self 302 303 def should_drop(self): 304 # type: () -> bool 305 306 """ Return True if the item should be dropped, or False if it should not 307 be dropped. This depends on the drop_if_none, and drop_if_default calls. 308 309 Returns: 310 True or False; depending on whether the item should be dropped or kept. 311 """ 312 if self._drop_if_none and self.value is None: 313 return True 314 if self._drop_if_default and self.value == self._default: 315 return True 316 return False 317 318 def is_valid(self): 319 # type: () -> None 320 321 """ Checks that all the necessary fields of the :class:`DisplayDataItem` 322 are filled in. It checks that neither key, namespace, value or type are 323 :data:`None`. 324 325 Raises: 326 ValueError: If the item does not have a key, namespace, 327 value or type. 328 """ 329 if self.key is None: 330 raise ValueError( 331 'Invalid DisplayDataItem %s. Key must not be None.' % self) 332 if self.namespace is None: 333 raise ValueError( 334 'Invalid DisplayDataItem %s. Namespace must not be None' % self) 335 if self.value is None: 336 raise ValueError( 337 'Invalid DisplayDataItem %s. Value must not be None' % self) 338 if self.type is None: 339 raise ValueError( 340 'Invalid DisplayDataItem. Value {} is of an unsupported type.'.format( 341 self.value)) 342 343 def _get_dict(self): 344 res = { 345 'key': self.key, 346 'namespace': self.namespace, 347 'type': self.type if self.type != 'CLASS' else 'STRING' 348 } 349 # TODO: Python Class types should not be special-cased once 350 # the Fn API is in. 351 if self.url is not None: 352 res['url'] = self.url 353 if self.shortValue is not None: 354 res['shortValue'] = self.shortValue 355 if self.label is not None: 356 res['label'] = self.label 357 res['value'] = self._format_value(self.value, self.type) 358 return res 359 360 def get_dict(self): 361 # type: () -> dict 362 363 """ Returns the internal-API dictionary representing the 364 :class:`DisplayDataItem`. 365 366 Returns: 367 Dict[str, Any]: A dictionary. The internal-API dictionary representing 368 the :class:`DisplayDataItem`. 369 370 Raises: 371 ValueError: if the item is not valid. 372 """ 373 self.is_valid() 374 return self._get_dict() 375 376 def __repr__(self): 377 return 'DisplayDataItem({})'.format(json.dumps(self._get_dict())) 378 379 def __eq__(self, other): 380 if isinstance(other, self.__class__): 381 return self._get_dict() == other._get_dict() 382 return False 383 384 def __hash__(self): 385 return hash(tuple(sorted(self._get_dict().items()))) 386 387 @classmethod 388 def _format_value(cls, value, type_): 389 """ Returns the API representation of a value given its type. 390 391 Args: 392 value: The value of the item that needs to be shortened. 393 type_(string): The type of the value. 394 395 Returns: 396 A formatted value in the form of a float, int, or string. 397 """ 398 res = value 399 if type_ == 'CLASS': 400 res = '{}.{}'.format(value.__module__, value.__name__) 401 elif type_ == 'DURATION': 402 res = value.total_seconds() * 1000 403 elif type_ == 'TIMESTAMP': 404 res = calendar.timegm( 405 value.timetuple()) * 1000 + value.microsecond // 1000 406 return res 407 408 @classmethod 409 def _get_short_value(cls, value, type_): 410 """ Calculates the short value for an item. 411 412 Args: 413 value: The value of the item that needs to be shortened. 414 type_(string): The type of the value. 415 416 Returns: 417 The unqualified name of a class if type_ is 'CLASS'. None otherwise. 418 """ 419 if type_ == 'CLASS': 420 return value.__name__ 421 return None 422 423 @classmethod 424 def _get_value_type(cls, value): 425 """ Infers the type of a given value. 426 427 Args: 428 value: The value whose type needs to be inferred. For 'DURATION' and 429 'TIMESTAMP', the corresponding Python type is datetime.timedelta and 430 datetime.datetime respectively. For Python classes, the API type is 431 just 'STRING' at the moment. 432 433 Returns: 434 One of 'STRING', 'INTEGER', 'FLOAT', 'CLASS', 'DURATION', or 435 'TIMESTAMP', depending on the type of the value. 436 """ 437 #TODO: Fix Args: documentation once the Python classes handling has changed 438 type_ = cls.typeDict.get(type(value)) 439 if type_ is None: 440 type_ = 'CLASS' if inspect.isclass(value) else None 441 if type_ is None and value is None: 442 type_ = 'STRING' 443 return type_