github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/utils/profiler.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """A profiler context manager based on cProfile.Profile and guppy.hpy objects. 19 20 For internal use only; no backwards-compatibility guarantees. 21 """ 22 23 # pytype: skip-file 24 # mypy: check-untyped-defs 25 26 import cProfile 27 import io 28 import logging 29 import os 30 import pstats 31 import random 32 import tempfile 33 import time 34 from typing import Callable 35 from typing import Optional 36 37 from apache_beam.io import filesystems 38 39 _LOGGER = logging.getLogger(__name__) 40 41 42 class Profile(object): 43 """cProfile and Heapy wrapper context for saving and logging profiler 44 results.""" 45 46 SORTBY = 'cumulative' 47 48 profile_output = None # type: str 49 stats = None # type: pstats.Stats 50 51 def __init__( 52 self, 53 profile_id, # type: str 54 profile_location=None, # type: Optional[str] 55 log_results=False, # type: bool 56 file_copy_fn=None, # type: Optional[Callable[[str, str], None]] 57 time_prefix='%Y-%m-%d_%H_%M_%S-', # type: str 58 enable_cpu_profiling=False, # type: bool 59 enable_memory_profiling=False, # type: bool 60 ): 61 """Creates a Profile object. 62 63 Args: 64 profile_id: Unique id of the profiling session. 65 profile_location: The file location where the profiling results will be 66 stored. 67 log_results: Log the result to console if true. 68 file_copy_fn: Lambda function for copying files. 69 time_prefix: Format of the timestamp prefix in profiling result files. 70 enable_cpu_profiling: CPU profiler will be enabled during the profiling 71 session. 72 enable_memory_profiling: Memory profiler will be enabled during the 73 profiling session, the profiler only records the newly allocated objects 74 in this session. 75 """ 76 self.profile_id = str(profile_id) 77 self.profile_location = profile_location 78 self.log_results = log_results 79 self.file_copy_fn = file_copy_fn or self.default_file_copy_fn 80 self.time_prefix = time_prefix 81 self.enable_cpu_profiling = enable_cpu_profiling 82 self.enable_memory_profiling = enable_memory_profiling 83 84 def __enter__(self): 85 _LOGGER.info('Start profiling: %s', self.profile_id) 86 if self.enable_cpu_profiling: 87 self.profile = cProfile.Profile() 88 self.profile.enable() 89 if self.enable_memory_profiling: 90 try: 91 from guppy import hpy 92 self.hpy = hpy() 93 self.hpy.setrelheap() 94 except ImportError: 95 _LOGGER.info("Unable to import guppy for memory profiling") 96 self.hpy = None 97 return self 98 99 def __exit__(self, *args): 100 _LOGGER.info('Stop profiling: %s', self.profile_id) 101 102 if self.profile_location: 103 if self.enable_cpu_profiling: 104 self.profile.create_stats() 105 self.profile_output = self._upload_profile_data( 106 # typing: seems stats attr is missing from typeshed 107 self.profile_location, 'cpu_profile', self.profile.stats) # type: ignore[attr-defined] 108 109 if self.enable_memory_profiling: 110 if not self.hpy: 111 pass 112 else: 113 h = self.hpy.heap() 114 heap_dump_data = '%s\n%s' % (h, h.more) 115 self._upload_profile_data( 116 self.profile_location, 117 'memory_profile', 118 heap_dump_data, 119 write_binary=False) 120 121 if self.log_results: 122 if self.enable_cpu_profiling: 123 s = io.StringIO() 124 self.stats = pstats.Stats( 125 self.profile, stream=s).sort_stats(Profile.SORTBY) 126 self.stats.print_stats() 127 _LOGGER.info('Cpu profiler data: [%s]', s.getvalue()) 128 if self.enable_memory_profiling and self.hpy: 129 _LOGGER.info('Memory profiler data: \n%s' % self.hpy.heap()) 130 131 @staticmethod 132 def default_file_copy_fn(src, dest): 133 dest_handle = filesystems.FileSystems.create(dest + '.tmp') 134 try: 135 with open(src, 'rb') as src_handle: 136 dest_handle.write(src_handle.read()) 137 finally: 138 dest_handle.close() 139 filesystems.FileSystems.rename([dest + '.tmp'], [dest]) 140 141 @staticmethod 142 def factory_from_options(options): 143 # type: (...) -> Optional[Callable[..., Profile]] 144 if options.profile_cpu or options.profile_memory: 145 146 def create_profiler(profile_id, **kwargs): 147 if random.random() < options.profile_sample_rate: 148 return Profile( 149 profile_id, 150 options.profile_location, 151 enable_cpu_profiling=options.profile_cpu, 152 enable_memory_profiling=options.profile_memory, 153 **kwargs) 154 155 return create_profiler 156 return None 157 158 def _upload_profile_data( 159 self, profile_location, dir, data, write_binary=True): 160 # type: (...) -> str 161 dump_location = os.path.join( 162 profile_location, 163 dir, 164 time.strftime(self.time_prefix + self.profile_id)) 165 fd, filename = tempfile.mkstemp() 166 try: 167 os.close(fd) 168 if write_binary: 169 with open(filename, 'wb') as fb: 170 import marshal 171 marshal.dump(data, fb) 172 else: 173 with open(filename, 'w') as f: 174 f.write(data) 175 _LOGGER.info('Copying profiler data to: [%s]', dump_location) 176 self.file_copy_fn(filename, dump_location) 177 finally: 178 os.remove(filename) 179 180 return dump_location