github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/utils/profiler.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """A profiler context manager based on cProfile.Profile and guppy.hpy objects.
    19  
    20  For internal use only; no backwards-compatibility guarantees.
    21  """
    22  
    23  # pytype: skip-file
    24  # mypy: check-untyped-defs
    25  
    26  import cProfile
    27  import io
    28  import logging
    29  import os
    30  import pstats
    31  import random
    32  import tempfile
    33  import time
    34  from typing import Callable
    35  from typing import Optional
    36  
    37  from apache_beam.io import filesystems
    38  
    39  _LOGGER = logging.getLogger(__name__)
    40  
    41  
    42  class Profile(object):
    43    """cProfile and Heapy wrapper context for saving and logging profiler
    44    results."""
    45  
    46    SORTBY = 'cumulative'
    47  
    48    profile_output = None  # type: str
    49    stats = None  # type: pstats.Stats
    50  
    51    def __init__(
    52        self,
    53        profile_id, # type: str
    54        profile_location=None, # type: Optional[str]
    55        log_results=False, # type: bool
    56        file_copy_fn=None, # type: Optional[Callable[[str, str], None]]
    57        time_prefix='%Y-%m-%d_%H_%M_%S-', # type: str
    58        enable_cpu_profiling=False, # type: bool
    59        enable_memory_profiling=False, # type: bool
    60    ):
    61      """Creates a Profile object.
    62  
    63      Args:
    64        profile_id: Unique id of the profiling session.
    65        profile_location: The file location where the profiling results will be
    66          stored.
    67        log_results: Log the result to console if true.
    68        file_copy_fn: Lambda function for copying files.
    69        time_prefix: Format of the timestamp prefix in profiling result files.
    70        enable_cpu_profiling: CPU profiler will be enabled during the profiling
    71          session.
    72        enable_memory_profiling: Memory profiler will be enabled during the
    73          profiling session, the profiler only records the newly allocated objects
    74          in this session.
    75      """
    76      self.profile_id = str(profile_id)
    77      self.profile_location = profile_location
    78      self.log_results = log_results
    79      self.file_copy_fn = file_copy_fn or self.default_file_copy_fn
    80      self.time_prefix = time_prefix
    81      self.enable_cpu_profiling = enable_cpu_profiling
    82      self.enable_memory_profiling = enable_memory_profiling
    83  
    84    def __enter__(self):
    85      _LOGGER.info('Start profiling: %s', self.profile_id)
    86      if self.enable_cpu_profiling:
    87        self.profile = cProfile.Profile()
    88        self.profile.enable()
    89      if self.enable_memory_profiling:
    90        try:
    91          from guppy import hpy
    92          self.hpy = hpy()
    93          self.hpy.setrelheap()
    94        except ImportError:
    95          _LOGGER.info("Unable to import guppy for memory profiling")
    96          self.hpy = None
    97      return self
    98  
    99    def __exit__(self, *args):
   100      _LOGGER.info('Stop profiling: %s', self.profile_id)
   101  
   102      if self.profile_location:
   103        if self.enable_cpu_profiling:
   104          self.profile.create_stats()
   105          self.profile_output = self._upload_profile_data(
   106              # typing: seems stats attr is missing from typeshed
   107              self.profile_location, 'cpu_profile', self.profile.stats)  # type: ignore[attr-defined]
   108  
   109        if self.enable_memory_profiling:
   110          if not self.hpy:
   111            pass
   112          else:
   113            h = self.hpy.heap()
   114            heap_dump_data = '%s\n%s' % (h, h.more)
   115            self._upload_profile_data(
   116                self.profile_location,
   117                'memory_profile',
   118                heap_dump_data,
   119                write_binary=False)
   120  
   121      if self.log_results:
   122        if self.enable_cpu_profiling:
   123          s = io.StringIO()
   124          self.stats = pstats.Stats(
   125              self.profile, stream=s).sort_stats(Profile.SORTBY)
   126          self.stats.print_stats()
   127          _LOGGER.info('Cpu profiler data: [%s]', s.getvalue())
   128        if self.enable_memory_profiling and self.hpy:
   129          _LOGGER.info('Memory profiler data: \n%s' % self.hpy.heap())
   130  
   131    @staticmethod
   132    def default_file_copy_fn(src, dest):
   133      dest_handle = filesystems.FileSystems.create(dest + '.tmp')
   134      try:
   135        with open(src, 'rb') as src_handle:
   136          dest_handle.write(src_handle.read())
   137      finally:
   138        dest_handle.close()
   139      filesystems.FileSystems.rename([dest + '.tmp'], [dest])
   140  
   141    @staticmethod
   142    def factory_from_options(options):
   143      # type: (...) -> Optional[Callable[..., Profile]]
   144      if options.profile_cpu or options.profile_memory:
   145  
   146        def create_profiler(profile_id, **kwargs):
   147          if random.random() < options.profile_sample_rate:
   148            return Profile(
   149                profile_id,
   150                options.profile_location,
   151                enable_cpu_profiling=options.profile_cpu,
   152                enable_memory_profiling=options.profile_memory,
   153                **kwargs)
   154  
   155        return create_profiler
   156      return None
   157  
   158    def _upload_profile_data(
   159        self, profile_location, dir, data, write_binary=True):
   160      # type: (...) -> str
   161      dump_location = os.path.join(
   162          profile_location,
   163          dir,
   164          time.strftime(self.time_prefix + self.profile_id))
   165      fd, filename = tempfile.mkstemp()
   166      try:
   167        os.close(fd)
   168        if write_binary:
   169          with open(filename, 'wb') as fb:
   170            import marshal
   171            marshal.dump(data, fb)
   172        else:
   173          with open(filename, 'w') as f:
   174            f.write(data)
   175        _LOGGER.info('Copying profiler data to: [%s]', dump_location)
   176        self.file_copy_fn(filename, dump_location)
   177      finally:
   178        os.remove(filename)
   179  
   180      return dump_location