github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/cloudpickle_pickler.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Pickler for values, functions, and classes.
    19  
    20  For internal use only. No backwards compatibility guarantees.
    21  
    22  Uses the cloudpickle library to pickle data, functions, lambdas
    23  and classes.
    24  
    25  dump_session and load_session are no-ops.
    26  """
    27  
    28  # pytype: skip-file
    29  
    30  import base64
    31  import bz2
    32  import io
    33  import threading
    34  import zlib
    35  
    36  import cloudpickle
    37  
    38  try:
    39    from absl import flags
    40  except (ImportError, ModuleNotFoundError):
    41    pass
    42  
    43  # Pickling, especially unpickling, causes broken module imports on Python 3
    44  # if executed concurrently, see: BEAM-8651, http://bugs.python.org/issue38884.
    45  _pickle_lock = threading.RLock()
    46  RLOCK_TYPE = type(_pickle_lock)
    47  
    48  
    49  def dumps(o, enable_trace=True, use_zlib=False):
    50    # type: (...) -> bytes
    51  
    52    """For internal use only; no backwards-compatibility guarantees."""
    53    with _pickle_lock:
    54      with io.BytesIO() as file:
    55        pickler = cloudpickle.CloudPickler(file)
    56        try:
    57          pickler.dispatch_table[type(flags.FLAGS)] = _pickle_absl_flags
    58        except NameError:
    59          pass
    60        try:
    61          pickler.dispatch_table[RLOCK_TYPE] = _pickle_rlock
    62        except NameError:
    63          pass
    64        pickler.dump(o)
    65        s = file.getvalue()
    66  
    67    # Compress as compactly as possible (compresslevel=9) to decrease peak memory
    68    # usage (of multiple in-memory copies) and to avoid hitting protocol buffer
    69    # limits.
    70    # WARNING: Be cautious about compressor change since it can lead to pipeline
    71    # representation change, and can break streaming job update compatibility on
    72    # runners such as Dataflow.
    73    if use_zlib:
    74      c = zlib.compress(s, 9)
    75    else:
    76      c = bz2.compress(s, compresslevel=9)
    77    del s  # Free up some possibly large and no-longer-needed memory.
    78  
    79    return base64.b64encode(c)
    80  
    81  
    82  def loads(encoded, enable_trace=True, use_zlib=False):
    83    """For internal use only; no backwards-compatibility guarantees."""
    84  
    85    c = base64.b64decode(encoded)
    86  
    87    if use_zlib:
    88      s = zlib.decompress(c)
    89    else:
    90      s = bz2.decompress(c)
    91  
    92    del c  # Free up some possibly large and no-longer-needed memory.
    93  
    94    with _pickle_lock:
    95      unpickled = cloudpickle.loads(s)
    96      return unpickled
    97  
    98  
    99  def _pickle_absl_flags(obj):
   100    return _create_absl_flags, tuple([])
   101  
   102  
   103  def _create_absl_flags():
   104    return flags.FLAGS
   105  
   106  
   107  def _pickle_rlock(obj):
   108    return RLOCK_TYPE, tuple([])
   109  
   110  
   111  def dump_session(file_path):
   112    # It is possible to dump session with cloudpickle. However, since references
   113    # are saved it should not be necessary. See https://s.apache.org/beam-picklers
   114    pass
   115  
   116  
   117  def load_session(file_path):
   118    # It is possible to load_session with cloudpickle. However, since references
   119    # are saved it should not be necessary. See https://s.apache.org/beam-picklers
   120    pass