github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/source_test_utils.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/source_test_utils.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Helper functions and test harnesses for source implementations.
    19  
    20  This module contains helper functions and test harnesses for checking
    21  correctness of source (a subclass of ``iobase.BoundedSource``) and range
    22  tracker (a subclass of``iobase.RangeTracker``) implementations.
    23  
    24  Contains a few lightweight utilities (e.g. reading items from a source such as
    25  ``readFromSource()``, as well as heavyweight property testing and stress
    26  testing harnesses that help getting a large amount of test coverage with few
    27  code.
    28  
    29  Most notable ones are:
    30  * ``assertSourcesEqualReferenceSource()`` helps testing that the data read by
    31  the union of sources produced by ``BoundedSource.split()`` is the same as data
    32  read by the original source.
    33  * If your source implements dynamic work rebalancing, use the
    34  ``assertSplitAtFraction()`` family of functions - they test behavior of
    35  ``RangeTracker.try_split()``, in particular, that various consistency
    36  properties are respected and the total set of data read by the source is
    37  preserved when splits happen. Use ``assertSplitAtFractionBehavior()`` to test
    38  individual cases of ``RangeTracker.try_split()`` and use
    39  ``assertSplitAtFractionExhaustive()`` as a heavy-weight stress test including
    40  concurrency. We strongly recommend to use both.
    41  
    42  For example usages, see the unit tests of modules such as
    43   * apache_beam.io.source_test_utils_test.py
    44   * apache_beam.io.avroio_test.py
    45  """
    46  # pytype: skip-file
    47  
    48  import logging
    49  import threading
    50  import weakref
    51  from collections import namedtuple
    52  from multiprocessing.pool import ThreadPool
    53  
    54  from apache_beam.io import iobase
    55  from apache_beam.testing.util import equal_to
    56  
    57  __all__ = [
    58      'read_from_source',
    59      'assert_sources_equal_reference_source',
    60      'assert_reentrant_reads_succeed',
    61      'assert_split_at_fraction_behavior',
    62      'assert_split_at_fraction_binary',
    63      'assert_split_at_fraction_exhaustive',
    64      'assert_split_at_fraction_fails',
    65      'assert_split_at_fraction_succeeds_and_consistent'
    66  ]
    67  
    68  _LOGGER = logging.getLogger(__name__)
    69  
    70  
    71  class ExpectedSplitOutcome(object):
    72    MUST_SUCCEED_AND_BE_CONSISTENT = 1
    73    MUST_FAIL = 2
    74    MUST_BE_CONSISTENT_IF_SUCCEEDS = 3
    75  
    76  
    77  SplitAtFractionResult = namedtuple(
    78      'SplitAtFractionResult', 'num_primary_items num_residual_items')
    79  
    80  SplitFractionStatistics = namedtuple(
    81      'SplitFractionStatistics', 'successful_fractions non_trivial_fractions')
    82  
    83  
    84  def read_from_source(source, start_position=None, stop_position=None):
    85    """Reads elements from the given ```BoundedSource```.
    86  
    87    Only reads elements within the given position range.
    88    Args:
    89      source (~apache_beam.io.iobase.BoundedSource):
    90        :class:`~apache_beam.io.iobase.BoundedSource` implementation.
    91      start_position (int): start position for reading.
    92      stop_position (int): stop position for reading.
    93  
    94    Returns:
    95      List[str]: the set of values read from the sources.
    96    """
    97    values = []
    98    range_tracker = source.get_range_tracker(start_position, stop_position)
    99    assert isinstance(range_tracker, iobase.RangeTracker)
   100    reader = source.read(range_tracker)
   101    for value in reader:
   102      values.append(value)
   103  
   104    return values
   105  
   106  
   107  def _ThreadPool(threads):
   108    # ThreadPool crashes in old versions of Python (< 2.7.5) if created from a
   109    # child thread. (http://bugs.python.org/issue10015)
   110    if not hasattr(threading.current_thread(), '_children'):
   111      threading.current_thread()._children = weakref.WeakKeyDictionary()
   112    return ThreadPool(threads)
   113  
   114  
   115  def assert_sources_equal_reference_source(reference_source_info, sources_info):
   116    """Tests if a reference source is equal to a given set of sources.
   117  
   118    Given a reference source (a :class:`~apache_beam.io.iobase.BoundedSource`
   119    and a position range) and a list of sources, assert that the union of the
   120    records read from the list of sources is equal to the records read from the
   121    reference source.
   122  
   123    Args:
   124      reference_source_info\
   125          (Tuple[~apache_beam.io.iobase.BoundedSource, int, int]):
   126        a three-tuple that gives the reference
   127        :class:`~apache_beam.io.iobase.BoundedSource`, position to start
   128        reading at, and position to stop reading at.
   129      sources_info\
   130          (Iterable[Tuple[~apache_beam.io.iobase.BoundedSource, int, int]]):
   131        a set of sources. Each source is a three-tuple that is of the same
   132        format described above.
   133  
   134    Raises:
   135      ValueError: if the set of data produced by the reference source
   136        and the given set of sources are not equivalent.
   137  
   138    """
   139  
   140    if not (isinstance(reference_source_info, tuple) and
   141            len(reference_source_info) == 3 and
   142            isinstance(reference_source_info[0], iobase.BoundedSource)):
   143      raise ValueError(
   144          'reference_source_info must a three-tuple where first'
   145          'item of the tuple gives a '
   146          'iobase.BoundedSource. Received: %r' % reference_source_info)
   147    reference_records = read_from_source(*reference_source_info)
   148  
   149    source_records = []
   150    for source_info in sources_info:
   151      assert isinstance(source_info, tuple)
   152      assert len(source_info) == 3
   153      if not (isinstance(source_info, tuple) and len(source_info) == 3 and
   154              isinstance(source_info[0], iobase.BoundedSource)):
   155        raise ValueError(
   156            'source_info must a three tuple where first'
   157            'item of the tuple gives a '
   158            'iobase.BoundedSource. Received: %r' % source_info)
   159      if (type(reference_source_info[0].default_output_coder()) != type(
   160          source_info[0].default_output_coder())):
   161        raise ValueError(
   162            'Reference source %r and the source %r must use the same coder. '
   163            'They are using %r and %r respectively instead.' % (
   164                reference_source_info[0],
   165                source_info[0],
   166                type(reference_source_info[0].default_output_coder()),
   167                type(source_info[0].default_output_coder())))
   168      source_records.extend(read_from_source(*source_info))
   169  
   170    if len(reference_records) != len(source_records):
   171      raise ValueError(
   172          'Reference source must produce the same number of records as the '
   173          'list of sources. Number of records were %d and %d instead.' %
   174          (len(reference_records), len(source_records)))
   175  
   176    if equal_to(reference_records)(source_records):
   177      raise ValueError(
   178          'Reference source and provided list of sources must produce the '
   179          'same set of records.')
   180  
   181  
   182  def assert_reentrant_reads_succeed(source_info):
   183    """Tests if a given source can be read in a reentrant manner.
   184  
   185    Assume that given source produces the set of values ``{v1, v2, v3, ... vn}``.
   186    For ``i`` in range ``[1, n-1]`` this method performs a reentrant read after
   187    reading ``i`` elements and verifies that both the original and reentrant read
   188    produce the expected set of values.
   189  
   190    Args:
   191      source_info (Tuple[~apache_beam.io.iobase.BoundedSource, int, int]):
   192        a three-tuple that gives the reference
   193        :class:`~apache_beam.io.iobase.BoundedSource`, position to start reading
   194        at, and a position to stop reading at.
   195  
   196    Raises:
   197      ValueError: if source is too trivial or reentrant read result
   198        in an incorrect read.
   199    """
   200  
   201    source, start_position, stop_position = source_info
   202    assert isinstance(source, iobase.BoundedSource)
   203  
   204    expected_values = [
   205        val for val in source.read(
   206            source.get_range_tracker(start_position, stop_position))
   207    ]
   208    if len(expected_values) < 2:
   209      raise ValueError(
   210          'Source is too trivial since it produces only %d '
   211          'values. Please give a source that reads at least 2 '
   212          'values.' % len(expected_values))
   213  
   214    for i in range(1, len(expected_values) - 1):
   215      read_iter = source.read(
   216          source.get_range_tracker(start_position, stop_position))
   217      original_read = []
   218      for _ in range(i):
   219        original_read.append(next(read_iter))
   220  
   221      # Reentrant read
   222      reentrant_read = [
   223          val for val in source.read(
   224              source.get_range_tracker(start_position, stop_position))
   225      ]
   226  
   227      # Continuing original read.
   228      for val in read_iter:
   229        original_read.append(val)
   230  
   231      if equal_to(original_read)(expected_values):
   232        raise ValueError(
   233            'Source did not produce expected values when '
   234            'performing a reentrant read after reading %d values. '
   235            'Expected %r received %r.' % (i, expected_values, original_read))
   236  
   237      if equal_to(reentrant_read)(expected_values):
   238        raise ValueError(
   239            'A reentrant read of source after reading %d values '
   240            'did not produce expected values. Expected %r '
   241            'received %r.' % (i, expected_values, reentrant_read))
   242  
   243  
   244  def assert_split_at_fraction_behavior(
   245      source, num_items_to_read_before_split, split_fraction, expected_outcome):
   246    """Verifies the behaviour of splitting a source at a given fraction.
   247  
   248    Asserts that splitting a :class:`~apache_beam.io.iobase.BoundedSource` either
   249    fails after reading **num_items_to_read_before_split** items, or succeeds in
   250    a way that is consistent according to
   251    :func:`assert_split_at_fraction_succeeds_and_consistent()`.
   252  
   253    Args:
   254      source (~apache_beam.io.iobase.BoundedSource): the source to perform
   255        dynamic splitting on.
   256      num_items_to_read_before_split (int): number of items to read before
   257        splitting.
   258      split_fraction (float): fraction to split at.
   259      expected_outcome (int): a value from
   260        :class:`~apache_beam.io.source_test_utils.ExpectedSplitOutcome`.
   261  
   262    Returns:
   263      Tuple[int, int]: a tuple that gives the number of items produced by reading
   264      the two ranges produced after dynamic splitting. If splitting did not
   265      occur, the first value of the tuple will represent the full set of records
   266      read by the source while the second value of the tuple will be ``-1``.
   267    """
   268    assert isinstance(source, iobase.BoundedSource)
   269    expected_items = read_from_source(source, None, None)
   270    return _assert_split_at_fraction_behavior(
   271        source,
   272        expected_items,
   273        num_items_to_read_before_split,
   274        split_fraction,
   275        expected_outcome)
   276  
   277  
   278  def _assert_split_at_fraction_behavior(
   279      source,
   280      expected_items,
   281      num_items_to_read_before_split,
   282      split_fraction,
   283      expected_outcome,
   284      start_position=None,
   285      stop_position=None):
   286  
   287    range_tracker = source.get_range_tracker(start_position, stop_position)
   288    assert isinstance(range_tracker, iobase.RangeTracker)
   289    current_items = []
   290    reader = source.read(range_tracker)
   291    # Reading 'num_items_to_read_before_split' items.
   292    reader_iter = iter(reader)
   293    for _ in range(num_items_to_read_before_split):
   294      current_items.append(next(reader_iter))
   295  
   296    suggested_split_position = range_tracker.position_at_fraction(split_fraction)
   297  
   298    stop_position_before_split = range_tracker.stop_position()
   299    split_result = range_tracker.try_split(suggested_split_position)
   300  
   301    if split_result is not None:
   302      if len(split_result) != 2:
   303        raise ValueError(
   304            'Split result must be a tuple that contains split '
   305            'position and split fraction. Received: %r' % (split_result, ))
   306  
   307      if range_tracker.stop_position() != split_result[0]:
   308        raise ValueError(
   309            'After a successful split, the stop position of the '
   310            'RangeTracker must be the same as the returned split '
   311            'position. Observed %r and %r which are different.' %
   312            (range_tracker.stop_position() % (split_result[0], )))
   313  
   314      if split_fraction < 0 or split_fraction > 1:
   315        raise ValueError(
   316            'Split fraction must be within the range [0,1]',
   317            'Observed split fraction was %r.' % (split_result[1], ))
   318  
   319    stop_position_after_split = range_tracker.stop_position()
   320    if split_result and stop_position_after_split == stop_position_before_split:
   321      raise ValueError(
   322          'Stop position %r did not change after a successful '
   323          'split of source %r at fraction %r.' %
   324          (stop_position_before_split, source, split_fraction))
   325  
   326    if expected_outcome == ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT:
   327      if not split_result:
   328        raise ValueError(
   329            'Expected split of source %r at fraction %r to be '
   330            'successful after reading %d elements. But '
   331            'the split failed.' %
   332            (source, split_fraction, num_items_to_read_before_split))
   333    elif expected_outcome == ExpectedSplitOutcome.MUST_FAIL:
   334      if split_result:
   335        raise ValueError(
   336            'Expected split of source %r at fraction %r after '
   337            'reading %d elements to fail. But splitting '
   338            'succeeded with result %r.' % (
   339                source,
   340                split_fraction,
   341                num_items_to_read_before_split,
   342                split_result))
   343  
   344    elif (
   345        expected_outcome != ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS):
   346      raise ValueError('Unknown type of expected outcome: %r' % expected_outcome)
   347    current_items.extend([value for value in reader_iter])
   348  
   349    residual_range = (
   350        split_result[0], stop_position_before_split) if split_result else None
   351  
   352    return _verify_single_split_fraction_result(
   353        source,
   354        expected_items,
   355        current_items,
   356        split_result,
   357        (range_tracker.start_position(), range_tracker.stop_position()),
   358        residual_range,
   359        split_fraction)
   360  
   361  
   362  def _range_to_str(start, stop):
   363    return '[' + (str(start) + ',' + str(stop) + ')')
   364  
   365  
   366  def _verify_single_split_fraction_result(
   367      source,
   368      expected_items,
   369      current_items,
   370      split_successful,
   371      primary_range,
   372      residual_range,
   373      split_fraction):
   374  
   375    assert primary_range
   376    primary_items = read_from_source(source, *primary_range)
   377  
   378    if not split_successful:
   379      # For unsuccessful splits, residual_range should be None.
   380      assert not residual_range
   381  
   382    residual_items = (
   383        read_from_source(source, *residual_range) if split_successful else [])
   384  
   385    total_items = primary_items + residual_items
   386  
   387    if current_items != primary_items:
   388      raise ValueError(
   389          'Current source %r and a source created using the '
   390          'range of the primary source %r determined '
   391          'by performing dynamic work rebalancing at fraction '
   392          '%r produced different values. Expected '
   393          'these sources to produce the same list of values.' %
   394          (source, _range_to_str(*primary_range), split_fraction))
   395  
   396    if expected_items != total_items:
   397      raise ValueError(
   398          'Items obtained by reading the source %r for primary '
   399          'and residual ranges %s and %s did not produce the '
   400          'expected list of values.' %
   401          (source, _range_to_str(*primary_range), _range_to_str(*residual_range)))
   402  
   403    result = (len(primary_items), len(residual_items) if split_successful else -1)
   404    return result
   405  
   406  
   407  def assert_split_at_fraction_succeeds_and_consistent(
   408      source, num_items_to_read_before_split, split_fraction):
   409    """Verifies some consistency properties of dynamic work rebalancing.
   410  
   411    Equivalent to the following pseudocode:::
   412  
   413      original_range_tracker = source.getRangeTracker(None, None)
   414      original_reader = source.read(original_range_tracker)
   415      items_before_split = read N items from original_reader
   416      suggested_split_position = original_range_tracker.position_for_fraction(
   417        split_fraction)
   418      original_stop_position - original_range_tracker.stop_position()
   419      split_result = range_tracker.try_split()
   420      split_position, split_fraction = split_result
   421      primary_range_tracker = source.get_range_tracker(
   422        original_range_tracker.start_position(), split_position)
   423      residual_range_tracker = source.get_range_tracker(split_position,
   424        original_stop_position)
   425  
   426      assert that: items when reading source.read(primary_range_tracker) ==
   427        items_before_split + items from continuing to read 'original_reader'
   428      assert that: items when reading source.read(original_range_tracker) =
   429        items when reading source.read(primary_range_tracker) + items when reading
   430      source.read(residual_range_tracker)
   431  
   432    Args:
   433  
   434      source: source to perform dynamic work rebalancing on.
   435      num_items_to_read_before_split: number of items to read before splitting.
   436      split_fraction: fraction to split at.
   437    """
   438  
   439    assert_split_at_fraction_behavior(
   440        source,
   441        num_items_to_read_before_split,
   442        split_fraction,
   443        ExpectedSplitOutcome.MUST_SUCCEED_AND_BE_CONSISTENT)
   444  
   445  
   446  def assert_split_at_fraction_fails(
   447      source, num_items_to_read_before_split, split_fraction):
   448    """Asserts that dynamic work rebalancing at a given fraction fails.
   449  
   450    Asserts that trying to perform dynamic splitting after reading
   451    'num_items_to_read_before_split' items from the source fails.
   452  
   453    Args:
   454      source: source to perform dynamic splitting on.
   455      num_items_to_read_before_split: number of items to read before splitting.
   456      split_fraction: fraction to split at.
   457    """
   458  
   459    assert_split_at_fraction_behavior(
   460        source,
   461        num_items_to_read_before_split,
   462        split_fraction,
   463        ExpectedSplitOutcome.MUST_FAIL)
   464  
   465  
   466  def assert_split_at_fraction_binary(
   467      source,
   468      expected_items,
   469      num_items_to_read_before_split,
   470      left_fraction,
   471      left_result,
   472      right_fraction,
   473      right_result,
   474      stats,
   475      start_position=None,
   476      stop_position=None):
   477    """Performs dynamic work rebalancing for fractions within a given range.
   478  
   479    Asserts that given a start position, a source can be split at every
   480    interesting fraction (halfway between two fractions that differ by at
   481    least one item) and the results are consistent if a split succeeds.
   482  
   483    Args:
   484      source: source to perform dynamic splitting on.
   485      expected_items: total set of items expected when reading the source.
   486      num_items_to_read_before_split: number of items to read before splitting.
   487      left_fraction: left fraction for binary splitting.
   488      left_result: result received by splitting at left fraction.
   489      right_fraction: right fraction for binary splitting.
   490      right_result: result received by splitting at right fraction.
   491      stats: a ``SplitFractionStatistics`` for storing results.
   492    """
   493    assert right_fraction > left_fraction
   494  
   495    if right_fraction - left_fraction < 0.001:
   496      # This prevents infinite recursion.
   497      return
   498  
   499    middle_fraction = (left_fraction + right_fraction) / 2
   500  
   501    if left_result is None:
   502      left_result = _assert_split_at_fraction_behavior(
   503          source,
   504          expected_items,
   505          num_items_to_read_before_split,
   506          left_fraction,
   507          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS)
   508  
   509    if right_result is None:
   510      right_result = _assert_split_at_fraction_behavior(
   511          source,
   512          expected_items,
   513          num_items_to_read_before_split,
   514          right_fraction,
   515          ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS)
   516  
   517    middle_result = _assert_split_at_fraction_behavior(
   518        source,
   519        expected_items,
   520        num_items_to_read_before_split,
   521        middle_fraction,
   522        ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS)
   523  
   524    if middle_result[1] != -1:
   525      stats.successful_fractions.append(middle_fraction)
   526    if middle_result[1] > 0:
   527      stats.non_trivial_fractions.append(middle_fraction)
   528  
   529    # Two split results are equivalent if primary and residual ranges of them
   530    # produce the same number of records (simply checking the size of primary
   531    # enough since the total number of records is constant).
   532  
   533    if left_result[0] != middle_result[0]:
   534      assert_split_at_fraction_binary(
   535          source,
   536          expected_items,
   537          num_items_to_read_before_split,
   538          left_fraction,
   539          left_result,
   540          middle_fraction,
   541          middle_result,
   542          stats)
   543  
   544    # We special case right_fraction=1.0 since that could fail due to being out
   545    # of range. (even if a dynamic split fails at 'middle_fraction' and at
   546    # fraction 1.0, there might be fractions in range ('middle_fraction', 1.0)
   547    # where dynamic splitting succeeds).
   548    if right_fraction == 1.0 or middle_result[0] != right_result[0]:
   549      assert_split_at_fraction_binary(
   550          source,
   551          expected_items,
   552          num_items_to_read_before_split,
   553          middle_fraction,
   554          middle_result,
   555          right_fraction,
   556          right_result,
   557          stats)
   558  
   559  
   560  MAX_CONCURRENT_SPLITTING_TRIALS_PER_ITEM = 100
   561  MAX_CONCURRENT_SPLITTING_TRIALS_TOTAL = 1000
   562  
   563  
   564  def assert_split_at_fraction_exhaustive(
   565      source,
   566      start_position=None,
   567      stop_position=None,
   568      perform_multi_threaded_test=True):
   569    """Performs and tests dynamic work rebalancing exhaustively.
   570  
   571    Asserts that for each possible start position, a source can be split at
   572    every interesting fraction (halfway between two fractions that differ by at
   573    least one item) and the results are consistent if a split succeeds.
   574    Verifies multi threaded splitting as well.
   575  
   576    Args:
   577      source (~apache_beam.io.iobase.BoundedSource): the source to perform
   578        dynamic splitting on.
   579      perform_multi_threaded_test (bool): if :data:`True` performs a
   580        multi-threaded test, otherwise this test is skipped.
   581  
   582    Raises:
   583      ValueError: if the exhaustive splitting test fails.
   584    """
   585  
   586    expected_items = read_from_source(source, start_position, stop_position)
   587    if not expected_items:
   588      raise ValueError('Source %r is empty.' % source)
   589  
   590    if len(expected_items) == 1:
   591      raise ValueError('Source %r only reads a single item.' % source)
   592  
   593    all_non_trivial_fractions = []
   594  
   595    any_successful_fractions = False
   596    any_non_trivial_fractions = False
   597  
   598    for i in range(len(expected_items)):
   599      stats = SplitFractionStatistics([], [])
   600  
   601      assert_split_at_fraction_binary(
   602          source, expected_items, i, 0.0, None, 1.0, None, stats)
   603  
   604      if stats.successful_fractions:
   605        any_successful_fractions = True
   606      if stats.non_trivial_fractions:
   607        any_non_trivial_fractions = True
   608  
   609      all_non_trivial_fractions.append(stats.non_trivial_fractions)
   610  
   611    if not any_successful_fractions:
   612      raise ValueError(
   613          'SplitAtFraction test completed vacuously: no '
   614          'successful split fractions found')
   615  
   616    if not any_non_trivial_fractions:
   617      raise ValueError(
   618          'SplitAtFraction test completed vacuously: no non-trivial split '
   619          'fractions found')
   620  
   621    if not perform_multi_threaded_test:
   622      return
   623  
   624    num_total_trials = 0
   625    for i in range(len(expected_items)):
   626      non_trivial_fractions = [2.0]  # 2.0 is larger than any valid fraction.
   627      non_trivial_fractions.extend(all_non_trivial_fractions[i])
   628      min_non_trivial_fraction = min(non_trivial_fractions)
   629  
   630      if min_non_trivial_fraction == 2.0:
   631        # This will not happen all the time. Otherwise previous test will fail
   632        # due to vacuousness.
   633        continue
   634  
   635      num_trials = 0
   636      have_success = False
   637      have_failure = False
   638  
   639      thread_pool = _ThreadPool(2)
   640      try:
   641        while True:
   642          num_trials += 1
   643          if num_trials > MAX_CONCURRENT_SPLITTING_TRIALS_PER_ITEM:
   644            _LOGGER.warning(
   645                'After %d concurrent splitting trials at item #%d, observed '
   646                'only %s, giving up on this item',
   647                num_trials,
   648                i,
   649                'success' if have_success else 'failure')
   650            break
   651  
   652          if _assert_split_at_fraction_concurrent(source,
   653                                                  expected_items,
   654                                                  i,
   655                                                  min_non_trivial_fraction,
   656                                                  thread_pool):
   657            have_success = True
   658          else:
   659            have_failure = True
   660  
   661          if have_success and have_failure:
   662            _LOGGER.info(
   663                '%d trials to observe both success and failure of '
   664                'concurrent splitting at item #%d',
   665                num_trials,
   666                i)
   667            break
   668      finally:
   669        thread_pool.close()
   670  
   671      num_total_trials += num_trials
   672  
   673      if num_total_trials > MAX_CONCURRENT_SPLITTING_TRIALS_TOTAL:
   674        _LOGGER.warning(
   675            'After %d total concurrent splitting trials, considered '
   676            'only %d items, giving up.',
   677            num_total_trials,
   678            i)
   679        break
   680  
   681    _LOGGER.info(
   682        '%d total concurrent splitting trials for %d items',
   683        num_total_trials,
   684        len(expected_items))
   685  
   686  
   687  def _assert_split_at_fraction_concurrent(
   688      source,
   689      expected_items,
   690      num_items_to_read_before_splitting,
   691      split_fraction,
   692      thread_pool=None):
   693  
   694    range_tracker = source.get_range_tracker(None, None)
   695    stop_position_before_split = range_tracker.stop_position()
   696    reader = source.read(range_tracker)
   697    reader_iter = iter(reader)
   698  
   699    current_items = []
   700    for _ in range(num_items_to_read_before_splitting):
   701      current_items.append(next(reader_iter))
   702  
   703    def read_or_split(test_params):
   704      if test_params[0]:
   705        return [val for val in test_params[1]]
   706      else:
   707        position = test_params[1].position_at_fraction(test_params[2])
   708        result = test_params[1].try_split(position)
   709        return result
   710  
   711    inputs = []
   712    pool = thread_pool if thread_pool else _ThreadPool(2)
   713    try:
   714      inputs.append([True, reader_iter])
   715      inputs.append([False, range_tracker, split_fraction])
   716  
   717      results = pool.map(read_or_split, inputs)
   718    finally:
   719      if not thread_pool:
   720        pool.close()
   721  
   722    current_items.extend(results[0])
   723    primary_range = (
   724        range_tracker.start_position(), range_tracker.stop_position())
   725  
   726    split_result = results[1]
   727    residual_range = (
   728        split_result[0], stop_position_before_split) if split_result else None
   729  
   730    res = _verify_single_split_fraction_result(
   731        source,
   732        expected_items,
   733        current_items,
   734        split_result,
   735        primary_range,
   736        residual_range,
   737        split_fraction)
   738  
   739    return res[1] > 0