github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/restriction_trackers.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """`iobase.RestrictionTracker` implementations provided with Apache Beam."""
    19  # pytype: skip-file
    20  
    21  from typing import Tuple
    22  
    23  from apache_beam.io.iobase import RestrictionProgress
    24  from apache_beam.io.iobase import RestrictionTracker
    25  from apache_beam.io.range_trackers import OffsetRangeTracker
    26  
    27  
    28  class OffsetRange(object):
    29    def __init__(self, start, stop):
    30      if start > stop:
    31        raise ValueError(
    32            'Start offset must be not be larger than the stop offset. '
    33            'Received %d and %d respectively.' % (start, stop))
    34      self.start = start
    35      self.stop = stop
    36  
    37    def __eq__(self, other):
    38      if not isinstance(other, OffsetRange):
    39        return False
    40  
    41      return self.start == other.start and self.stop == other.stop
    42  
    43    def __hash__(self):
    44      return hash((type(self), self.start, self.stop))
    45  
    46    def __repr__(self):
    47      return 'OffsetRange(start=%s, stop=%s)' % (self.start, self.stop)
    48  
    49    def split(self, desired_num_offsets_per_split, min_num_offsets_per_split=1):
    50      current_split_start = self.start
    51      max_split_size = max(
    52          desired_num_offsets_per_split, min_num_offsets_per_split)
    53      while current_split_start < self.stop:
    54        current_split_stop = min(current_split_start + max_split_size, self.stop)
    55        remaining = self.stop - current_split_stop
    56  
    57        # Avoiding a small split at the end.
    58        if (remaining < desired_num_offsets_per_split // 4 or
    59            remaining < min_num_offsets_per_split):
    60          current_split_stop = self.stop
    61  
    62        yield OffsetRange(current_split_start, current_split_stop)
    63        current_split_start = current_split_stop
    64  
    65    def split_at(self, split_pos):
    66      # type: (...) -> Tuple[OffsetRange, OffsetRange]
    67      return OffsetRange(self.start, split_pos), OffsetRange(split_pos, self.stop)
    68  
    69    def new_tracker(self):
    70      return OffsetRangeTracker(self.start, self.stop)
    71  
    72    def size(self):
    73      return self.stop - self.start
    74  
    75  
    76  class OffsetRestrictionTracker(RestrictionTracker):
    77    """An `iobase.RestrictionTracker` implementations for an offset range.
    78  
    79    Offset range is represented as OffsetRange.
    80    """
    81    def __init__(self, offset_range):
    82      # type: (OffsetRange) -> None
    83      assert isinstance(offset_range, OffsetRange), offset_range
    84      self._range = offset_range
    85      self._current_position = None
    86      self._last_claim_attempt = None
    87      self._checkpointed = False
    88  
    89    def check_done(self):
    90      if (self._range.start != self._range.stop and
    91          (self._last_claim_attempt is None or
    92           self._last_claim_attempt < self._range.stop - 1)):
    93        raise ValueError(
    94            'OffsetRestrictionTracker is not done since work in range [%s, %s) '
    95            'has not been claimed.' % (
    96                self._last_claim_attempt
    97                if self._last_claim_attempt is not None else self._range.start,
    98                self._range.stop))
    99  
   100    def current_restriction(self):
   101      return self._range
   102  
   103    def current_progress(self):
   104      # type: () -> RestrictionProgress
   105      if self._current_position is None:
   106        fraction = 0.0
   107      elif self._range.stop == self._range.start:
   108        # If self._current_position is not None, we must be done.
   109        fraction = 1.0
   110      else:
   111        fraction = (
   112            float(self._current_position - self._range.start) /
   113            (self._range.stop - self._range.start))
   114      return RestrictionProgress(fraction=fraction)
   115  
   116    def start_position(self):
   117      return self._range.start
   118  
   119    def stop_position(self):
   120      return self._range.stop
   121  
   122    def try_claim(self, position):
   123      if (self._last_claim_attempt is not None and
   124          position <= self._last_claim_attempt):
   125        raise ValueError(
   126            'Positions claimed should strictly increase. Trying to claim '
   127            'position %d while last claim attempt was %d.' %
   128            (position, self._last_claim_attempt))
   129  
   130      self._last_claim_attempt = position
   131      if position < self._range.start:
   132        raise ValueError(
   133            'Position to be claimed cannot be smaller than the start position '
   134            'of the range. Tried to claim position %r for the range [%r, %r)' %
   135            (position, self._range.start, self._range.stop))
   136  
   137      if self._range.start <= position < self._range.stop:
   138        self._current_position = position
   139        return True
   140  
   141      return False
   142  
   143    def try_split(self, fraction_of_remainder):
   144      if not self._checkpointed:
   145        if self._last_claim_attempt is None:
   146          cur = self._range.start - 1
   147        else:
   148          cur = self._last_claim_attempt
   149        split_point = (
   150            cur + int(max(1, (self._range.stop - cur) * fraction_of_remainder)))
   151        if split_point < self._range.stop:
   152          if fraction_of_remainder == 0:
   153            self._checkpointed = True
   154          self._range, residual_range = self._range.split_at(split_point)
   155          return self._range, residual_range
   156  
   157    def is_bounded(self):
   158      return True
   159  
   160  
   161  class UnsplittableRestrictionTracker(RestrictionTracker):
   162    """An `iobase.RestrictionTracker` that wraps another but does not split."""
   163    def __init__(self, underling_tracker):
   164      self._underling_tracker = underling_tracker
   165  
   166    def try_split(self, fraction_of_remainder):
   167      return False
   168  
   169    # __getattribute__ is used rather than __getattr__ to override the
   170    # stubs in the baseclass.
   171    def __getattribute__(self, name):
   172      if name.startswith('_') or name in ('try_split', ):
   173        return super().__getattribute__(name)
   174      else:
   175        return getattr(self._underling_tracker, name)