github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/inference/utils_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  # pytype: skip-file
    18  
    19  import unittest
    20  
    21  import apache_beam as beam
    22  from apache_beam.io.filesystem import FileMetadata
    23  from apache_beam.ml.inference import utils
    24  from apache_beam.testing.test_pipeline import TestPipeline
    25  from apache_beam.testing.util import assert_that
    26  from apache_beam.testing.util import equal_to
    27  
    28  
    29  class WatchFilePatternTest(unittest.TestCase):
    30    def test_latest_file_by_timestamp_default_value(self):
    31      # match continuously returns the files in sorted timestamp order.
    32      main_input_pcoll = [
    33          FileMetadata(
    34              'path1.py',
    35              10,
    36              last_updated_in_seconds=utils._START_TIME_STAMP - 20),
    37          FileMetadata(
    38              'path2.py',
    39              10,
    40              last_updated_in_seconds=utils._START_TIME_STAMP - 10)
    41      ]
    42      with TestPipeline() as p:
    43        files_pc = (
    44            p
    45            | beam.Create(main_input_pcoll)
    46            | beam.Map(lambda x: (x.path, x))
    47            | beam.ParDo(utils._GetLatestFileByTimeStamp())
    48            | beam.Map(lambda x: x[0]))
    49        assert_that(files_pc, equal_to(['', '']))
    50  
    51    def test_latest_file_with_timestamp_after_pipeline_construction_time(self):
    52      main_input_pcoll = [
    53          FileMetadata(
    54              'path1.py',
    55              10,
    56              last_updated_in_seconds=utils._START_TIME_STAMP + 10)
    57      ]
    58      with TestPipeline() as p:
    59        files_pc = (
    60            p
    61            | beam.Create(main_input_pcoll)
    62            | beam.Map(lambda x: (x.path, x))
    63            | beam.ParDo(utils._GetLatestFileByTimeStamp())
    64            | beam.Map(lambda x: x[0]))
    65        assert_that(files_pc, equal_to(['path1.py']))
    66  
    67    def test_emitting_singleton_output(self):
    68      # match continuously returns the files in sorted timestamp order.
    69      main_input_pcoll = [
    70          FileMetadata(
    71              'path1.py',
    72              10,
    73              last_updated_in_seconds=utils._START_TIME_STAMP - 20),
    74          # returns default
    75          FileMetadata(
    76              'path2.py',
    77              10,
    78              last_updated_in_seconds=utils._START_TIME_STAMP - 10),
    79          # returns default
    80          FileMetadata(
    81              'path3.py',
    82              10,
    83              last_updated_in_seconds=utils._START_TIME_STAMP + 10),
    84          FileMetadata(
    85              'path4.py',
    86              10,
    87              last_updated_in_seconds=utils._START_TIME_STAMP + 20)
    88      ]
    89      # returns path3.py
    90  
    91      with TestPipeline() as p:
    92        files_pc = (
    93            p
    94            | beam.Create(main_input_pcoll)
    95            | beam.Map(lambda x: (x.path, x))
    96            | beam.ParDo(utils._GetLatestFileByTimeStamp())
    97            | beam.ParDo(utils._ConvertIterToSingleton())
    98            | beam.Map(lambda x: x[0]))
    99        assert_that(files_pc, equal_to(['', 'path3.py', 'path4.py']))
   100  
   101  
   102  if __name__ == '__main__':
   103    unittest.main()