github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/cloudpickle_pickler.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Pickler for values, functions, and classes. 19 20 For internal use only. No backwards compatibility guarantees. 21 22 Uses the cloudpickle library to pickle data, functions, lambdas 23 and classes. 24 25 dump_session and load_session are no-ops. 26 """ 27 28 # pytype: skip-file 29 30 import base64 31 import bz2 32 import io 33 import threading 34 import zlib 35 36 import cloudpickle 37 38 try: 39 from absl import flags 40 except (ImportError, ModuleNotFoundError): 41 pass 42 43 # Pickling, especially unpickling, causes broken module imports on Python 3 44 # if executed concurrently, see: BEAM-8651, http://bugs.python.org/issue38884. 45 _pickle_lock = threading.RLock() 46 RLOCK_TYPE = type(_pickle_lock) 47 48 49 def dumps(o, enable_trace=True, use_zlib=False): 50 # type: (...) -> bytes 51 52 """For internal use only; no backwards-compatibility guarantees.""" 53 with _pickle_lock: 54 with io.BytesIO() as file: 55 pickler = cloudpickle.CloudPickler(file) 56 try: 57 pickler.dispatch_table[type(flags.FLAGS)] = _pickle_absl_flags 58 except NameError: 59 pass 60 try: 61 pickler.dispatch_table[RLOCK_TYPE] = _pickle_rlock 62 except NameError: 63 pass 64 pickler.dump(o) 65 s = file.getvalue() 66 67 # Compress as compactly as possible (compresslevel=9) to decrease peak memory 68 # usage (of multiple in-memory copies) and to avoid hitting protocol buffer 69 # limits. 70 # WARNING: Be cautious about compressor change since it can lead to pipeline 71 # representation change, and can break streaming job update compatibility on 72 # runners such as Dataflow. 73 if use_zlib: 74 c = zlib.compress(s, 9) 75 else: 76 c = bz2.compress(s, compresslevel=9) 77 del s # Free up some possibly large and no-longer-needed memory. 78 79 return base64.b64encode(c) 80 81 82 def loads(encoded, enable_trace=True, use_zlib=False): 83 """For internal use only; no backwards-compatibility guarantees.""" 84 85 c = base64.b64decode(encoded) 86 87 if use_zlib: 88 s = zlib.decompress(c) 89 else: 90 s = bz2.decompress(c) 91 92 del c # Free up some possibly large and no-longer-needed memory. 93 94 with _pickle_lock: 95 unpickled = cloudpickle.loads(s) 96 return unpickled 97 98 99 def _pickle_absl_flags(obj): 100 return _create_absl_flags, tuple([]) 101 102 103 def _create_absl_flags(): 104 return flags.FLAGS 105 106 107 def _pickle_rlock(obj): 108 return RLOCK_TYPE, tuple([]) 109 110 111 def dump_session(file_path): 112 # It is possible to dump session with cloudpickle. However, since references 113 # are saved it should not be necessary. See https://s.apache.org/beam-picklers 114 pass 115 116 117 def load_session(file_path): 118 # It is possible to load_session with cloudpickle. However, since references 119 # are saved it should not be necessary. See https://s.apache.org/beam-picklers 120 pass