github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/utils/multi_process_shared.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Implements a shared object that spans processes. 19 20 This object will be instanciated once per VM and methods will be invoked 21 on it via rpc. 22 """ 23 # pytype: skip-file 24 25 import logging 26 import multiprocessing.managers 27 import os 28 import tempfile 29 import threading 30 from typing import Any 31 from typing import Callable 32 from typing import Dict 33 from typing import Generic 34 from typing import Optional 35 from typing import TypeVar 36 37 import fasteners 38 39 T = TypeVar('T') 40 AUTH_KEY = b'mps' 41 42 43 class _SingletonProxy: 44 """Proxies the shared object so we can release it with better errors and no 45 risk of dangling references in the multiprocessing manager infrastructure. 46 """ 47 def __init__(self, entry): 48 # Guard names so as to not conflict with names of underlying object. 49 self._SingletonProxy_entry = entry 50 self._SingletonProxy_valid = True 51 52 # Used to make the shared object callable (see _AutoProxyWrapper below) 53 def singletonProxy_call__(self, *args, **kwargs): 54 if not self._SingletonProxy_valid: 55 raise RuntimeError('Entry was released.') 56 return self._SingletonProxy_entry.obj.__call__(*args, **kwargs) 57 58 def _SingletonProxy_release(self): 59 assert self._SingletonProxy_valid 60 self._SingletonProxy_valid = False 61 62 def __getattr__(self, name): 63 if not self._SingletonProxy_valid: 64 raise RuntimeError('Entry was released.') 65 return getattr(self._SingletonProxy_entry.obj, name) 66 67 def __dir__(self): 68 # Needed for multiprocessing.managers's proxying. 69 dir = self._SingletonProxy_entry.obj.__dir__() 70 dir.append('singletonProxy_call__') 71 return dir 72 73 74 class _SingletonEntry: 75 """Represents a single, refcounted entry in this process.""" 76 def __init__(self, constructor, initialize_eagerly=True): 77 self.constructor = constructor 78 self.refcount = 0 79 self.lock = threading.Lock() 80 if initialize_eagerly: 81 self.obj = constructor() 82 self.initialied = True 83 else: 84 self.initialied = False 85 86 def acquire(self): 87 with self.lock: 88 if not self.initialied: 89 self.obj = self.constructor() 90 self.initialied = True 91 self.refcount += 1 92 return _SingletonProxy(self) 93 94 def release(self, proxy): 95 proxy._SingletonProxy_release() 96 with self.lock: 97 self.refcount -= 1 98 if self.refcount == 0: 99 del self.obj 100 self.initialied = False 101 102 103 class _SingletonManager: 104 entries: Dict[Any, Any] = {} 105 106 def register_singleton(self, constructor, tag, initialize_eagerly=True): 107 assert tag not in self.entries, tag 108 self.entries[tag] = _SingletonEntry(constructor, initialize_eagerly) 109 110 def has_singleton(self, tag): 111 return tag in self.entries 112 113 def acquire_singleton(self, tag): 114 return self.entries[tag].acquire() 115 116 def release_singleton(self, tag, obj): 117 return self.entries[tag].release(obj) 118 119 120 _process_level_singleton_manager = _SingletonManager() 121 122 _process_local_lock = threading.Lock() 123 124 125 class _SingletonRegistrar(multiprocessing.managers.BaseManager): 126 pass 127 128 129 _SingletonRegistrar.register( 130 'acquire_singleton', 131 callable=_process_level_singleton_manager.acquire_singleton) 132 _SingletonRegistrar.register( 133 'release_singleton', 134 callable=_process_level_singleton_manager.release_singleton) 135 136 137 # By default, objects registered with BaseManager.register will have only 138 # public methods available (excluding __call__). If you know the functions 139 # you would like to expose, you can do so at register time with the `exposed` 140 # attribute. Since we don't, we will add a wrapper around the returned AutoProxy 141 # object to handle __call__ function calls and turn them into 142 # singletonProxy_call__ calls (which is a wrapper around the underlying 143 # object's __call__ function) 144 class _AutoProxyWrapper: 145 def __init__(self, proxyObject: multiprocessing.managers.BaseProxy): 146 self._proxyObject = proxyObject 147 148 def __call__(self, *args, **kwargs): 149 return self._proxyObject.singletonProxy_call__(*args, **kwargs) 150 151 def __getattr__(self, name): 152 return getattr(self._proxyObject, name) 153 154 155 class MultiProcessShared(Generic[T]): 156 """MultiProcessShared is used to share a single object across processes. 157 158 For example, one could have the class:: 159 160 class MyExpensiveObject(object): 161 def __init__(self, args): 162 [expensive initialization and memory allocation] 163 164 def method(self, arg): 165 ... 166 167 One could share a single instance of this class by wrapping it as:: 168 169 shared_ptr = MultiProcessShared(lambda: MyExpensiveObject(...)) 170 my_expensive_object = shared_ptr.acquire() 171 172 which could then be invoked as:: 173 174 my_expensive_object.method(arg) 175 176 This can then be released with:: 177 178 shared_ptr.release(my_expensive_object) 179 180 but care should be taken to avoid releasing the object too soon or 181 expensive re-initialization may be required, defeating the point of 182 using a shared object. 183 184 185 Args: 186 constructor: function that initialises / constructs the object if not 187 present in the cache. This function should take no arguments. It should 188 return an initialised object, or raise an exception if the object could 189 not be initialised / constructed. 190 tag: an indentifier to store with the cached object. If multiple 191 MultiProcessShared instances are created with the same tag, they will all 192 share the same proxied object. 193 path: a temporary path in which to create the inter-process lock 194 always_proxy: whether to direct all calls through the proxy, rather than 195 call the object directly for the process that created it 196 """ 197 def __init__( 198 self, 199 constructor: Callable[[], T], 200 tag: Any, 201 *, 202 path: str = tempfile.gettempdir(), 203 always_proxy: Optional[bool] = None): 204 self._constructor = constructor 205 self._tag = tag 206 self._path = path 207 self._always_proxy = False if always_proxy is None else always_proxy 208 self._proxy = None 209 self._manager = None 210 self._rpc_address = None 211 self._cross_process_lock = fasteners.InterProcessLock( 212 os.path.join(self._path, self._tag) + '.lock') 213 214 def _get_manager(self): 215 if self._manager is None: 216 address_file = os.path.join(self._path, self._tag) + ".address" 217 while self._manager is None: 218 with _process_local_lock: 219 with self._cross_process_lock: 220 if not os.path.exists(address_file): 221 self._create_server(address_file) 222 223 if _process_level_singleton_manager.has_singleton( 224 self._tag) and not self._always_proxy: 225 self._manager = _process_level_singleton_manager 226 else: 227 with open(address_file) as fin: 228 address = fin.read() 229 logging.info('Connecting to remote proxy at %s', address) 230 host, port = address.split(':') 231 # We need to be able to authenticate with both the manager and 232 # the process. 233 manager = _SingletonRegistrar( 234 address=(host, int(port)), authkey=AUTH_KEY) 235 multiprocessing.current_process().authkey = AUTH_KEY 236 try: 237 manager.connect() 238 self._manager = manager 239 except ConnectionError: 240 # The server is no longer good, assume it died. 241 os.unlink(address_file) 242 243 return self._manager 244 245 def acquire(self): 246 # TODO: Allow passing/parameterizing the callable here, in case they are 247 # not available at MultiProcessShared construction time (e.g. from side 248 # inputs) 249 # Caveat: They must always agree, as they will be ignored if the object 250 # is already constructed. 251 singleton = self._get_manager().acquire_singleton(self._tag) 252 return _AutoProxyWrapper(singleton) 253 254 def release(self, obj): 255 self._manager.release_singleton(self._tag, obj) 256 257 def _create_server(self, address_file): 258 # We need to be able to authenticate with both the manager and the process. 259 self._serving_manager = _SingletonRegistrar( 260 address=('localhost', 0), authkey=AUTH_KEY) 261 multiprocessing.current_process().authkey = AUTH_KEY 262 # Initialize eagerly to avoid acting as the server if there are issues. 263 # Note, however, that _create_server itself is called lazily. 264 _process_level_singleton_manager.register_singleton( 265 self._constructor, self._tag, initialize_eagerly=True) 266 self._server = self._serving_manager.get_server() 267 logging.info( 268 'Starting proxy server at %s for shared %s', 269 self._server.address, 270 self._tag) 271 with open(address_file + '.tmp', 'w') as fout: 272 fout.write('%s:%d' % self._server.address) 273 os.rename(address_file + '.tmp', address_file) 274 t = threading.Thread(target=self._server.serve_forever, daemon=True) 275 t.start() 276 logging.info('Done starting server')