github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/inference/tensorflow_inference_test.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/inference/tensorflow_inference_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  # pytype: skip-file
    19  
    20  import os
    21  import shutil
    22  import tempfile
    23  import unittest
    24  from typing import Any
    25  from typing import Dict
    26  from typing import Iterable
    27  from typing import Optional
    28  from typing import Sequence
    29  from typing import Union
    30  
    31  import numpy
    32  import pytest
    33  
    34  import apache_beam as beam
    35  from apache_beam.ml.inference import utils
    36  from apache_beam.ml.inference.base import KeyedModelHandler
    37  from apache_beam.ml.inference.base import PredictionResult
    38  from apache_beam.ml.inference.base import RunInference
    39  from apache_beam.testing.test_pipeline import TestPipeline
    40  from apache_beam.testing.util import assert_that
    41  from apache_beam.testing.util import equal_to
    42  
    43  # pylint: disable=ungrouped-imports
    44  try:
    45    import tensorflow as tf
    46    from apache_beam.ml.inference.sklearn_inference_test import _compare_prediction_result
    47    from apache_beam.ml.inference.tensorflow_inference import TFModelHandlerNumpy, TFModelHandlerTensor
    48    from apache_beam.ml.inference import tensorflow_inference
    49  except ImportError:
    50    raise unittest.SkipTest(
    51        'Tensorflow dependencies are not installed. ' +
    52        'Make sure you have both tensorflow and tensorflow_hub installed.')
    53  
    54  
    55  class FakeTFNumpyModel:
    56    def predict(self, input: numpy.ndarray):
    57      return numpy.multiply(input, 10)
    58  
    59  
    60  class FakeTFTensorModel:
    61    def predict(self, input: tf.Tensor, add=False):
    62      if add:
    63        return tf.math.add(tf.math.multiply(input, 10), 10)
    64      return tf.math.multiply(input, 10)
    65  
    66  
    67  def _create_mult2_model():
    68    inputs = tf.keras.Input(shape=(3))
    69    outputs = tf.keras.layers.Lambda(lambda x: x * 2, dtype='float32')(inputs)
    70    return tf.keras.Model(inputs=inputs, outputs=outputs)
    71  
    72  
    73  def _compare_tensor_prediction_result(x, y):
    74    return tf.reduce_all(tf.math.equal(x.inference, y.inference))
    75  
    76  
    77  def fake_inference_fn(
    78      model: tf.Module,
    79      batch: Union[Sequence[numpy.ndarray], Sequence[tf.Tensor]],
    80      inference_args: Dict[str, Any],
    81      model_id: Optional[str] = None) -> Iterable[PredictionResult]:
    82    predictions = model.predict(batch, **inference_args)
    83    return utils._convert_to_result(batch, predictions, model_id)
    84  
    85  
    86  @pytest.mark.uses_tf
    87  class TFRunInferenceTest(unittest.TestCase):
    88    def setUp(self):
    89      self.tmpdir = tempfile.mkdtemp()
    90  
    91    def tearDown(self):
    92      shutil.rmtree(self.tmpdir)
    93  
    94    def test_predict_numpy(self):
    95      fake_model = FakeTFNumpyModel()
    96      inference_runner = TFModelHandlerNumpy(
    97          model_uri='unused', inference_fn=fake_inference_fn)
    98      batched_examples = [numpy.array([1]), numpy.array([10]), numpy.array([100])]
    99      expected_predictions = [
   100          PredictionResult(numpy.array([1]), 10),
   101          PredictionResult(numpy.array([10]), 100),
   102          PredictionResult(numpy.array([100]), 1000)
   103      ]
   104      inferences = inference_runner.run_inference(batched_examples, fake_model)
   105      for actual, expected in zip(inferences, expected_predictions):
   106        self.assertTrue(_compare_prediction_result(actual, expected))
   107  
   108    def test_predict_tensor(self):
   109      fake_model = FakeTFTensorModel()
   110      inference_runner = TFModelHandlerTensor(
   111          model_uri='unused', inference_fn=fake_inference_fn)
   112      batched_examples = [
   113          tf.convert_to_tensor(numpy.array([1])),
   114          tf.convert_to_tensor(numpy.array([10])),
   115          tf.convert_to_tensor(numpy.array([100])),
   116      ]
   117      expected_predictions = [
   118          PredictionResult(ex, pred) for ex,
   119          pred in zip(
   120              batched_examples,
   121              [tf.math.multiply(n, 10) for n in batched_examples])
   122      ]
   123  
   124      inferences = inference_runner.run_inference(batched_examples, fake_model)
   125      for actual, expected in zip(inferences, expected_predictions):
   126        self.assertTrue(_compare_tensor_prediction_result(actual, expected))
   127  
   128    def test_predict_tensor_with_batch_size(self):
   129      model = _create_mult2_model()
   130      model_path = os.path.join(self.tmpdir, 'mult2')
   131      tf.keras.models.save_model(model, model_path)
   132      with TestPipeline() as pipeline:
   133  
   134        def fake_batching_inference_fn(
   135            model: tf.Module,
   136            batch: Union[Sequence[numpy.ndarray], Sequence[tf.Tensor]],
   137            inference_args: Dict[str, Any],
   138            model_id: Optional[str] = None) -> Iterable[PredictionResult]:
   139          if len(batch) != 2:
   140            raise Exception(
   141                f'Expected batch of size 2, received batch of size {len(batch)}')
   142          batch = tf.stack(batch, axis=0)
   143          predictions = model(batch)
   144          return utils._convert_to_result(batch, predictions, model_id)
   145  
   146        model_handler = TFModelHandlerTensor(
   147            model_uri=model_path,
   148            inference_fn=fake_batching_inference_fn,
   149            min_batch_size=2,
   150            max_batch_size=2)
   151        examples = [
   152            tf.convert_to_tensor(numpy.array([1.1, 2.2, 3.3], dtype='float32')),
   153            tf.convert_to_tensor(
   154                numpy.array([10.1, 20.2, 30.3], dtype='float32')),
   155            tf.convert_to_tensor(
   156                numpy.array([100.1, 200.2, 300.3], dtype='float32')),
   157            tf.convert_to_tensor(
   158                numpy.array([200.1, 300.2, 400.3], dtype='float32')),
   159        ]
   160        expected_predictions = [
   161            PredictionResult(ex, pred) for ex,
   162            pred in zip(examples, [tf.math.multiply(n, 2) for n in examples])
   163        ]
   164  
   165        pcoll = pipeline | 'start' >> beam.Create(examples)
   166        predictions = pcoll | RunInference(model_handler)
   167        assert_that(
   168            predictions,
   169            equal_to(
   170                expected_predictions,
   171                equals_fn=_compare_tensor_prediction_result))
   172  
   173    def test_predict_numpy_with_batch_size(self):
   174      model = _create_mult2_model()
   175      model_path = os.path.join(self.tmpdir, 'mult2_numpy')
   176      tf.keras.models.save_model(model, model_path)
   177      with TestPipeline() as pipeline:
   178  
   179        def fake_batching_inference_fn(
   180            model: tf.Module,
   181            batch: Sequence[numpy.ndarray],
   182            inference_args: Dict[str, Any],
   183            model_id: Optional[str] = None) -> Iterable[PredictionResult]:
   184          if len(batch) != 2:
   185            raise Exception(
   186                f'Expected batch of size 2, received batch of size {len(batch)}')
   187          vectorized_batch = numpy.stack(batch, axis=0)
   188          predictions = model.predict(vectorized_batch, **inference_args)
   189          return utils._convert_to_result(batch, predictions, model_id)
   190  
   191        model_handler = TFModelHandlerNumpy(
   192            model_uri=model_path,
   193            inference_fn=fake_batching_inference_fn,
   194            min_batch_size=2,
   195            max_batch_size=2)
   196        examples = [
   197            numpy.array([1.1, 2.2, 3.3], dtype='float32'),
   198            numpy.array([10.1, 20.2, 30.3], dtype='float32'),
   199            numpy.array([100.1, 200.2, 300.3], dtype='float32'),
   200            numpy.array([200.1, 300.2, 400.3], dtype='float32'),
   201        ]
   202        expected_predictions = [
   203            PredictionResult(ex, pred) for ex,
   204            pred in zip(examples, [numpy.multiply(n, 2) for n in examples])
   205        ]
   206  
   207        pcoll = pipeline | 'start' >> beam.Create(examples)
   208        predictions = pcoll | RunInference(model_handler)
   209        assert_that(
   210            predictions,
   211            equal_to(
   212                expected_predictions,
   213                equals_fn=_compare_tensor_prediction_result))
   214  
   215    def test_predict_tensor_with_args(self):
   216      fake_model = FakeTFTensorModel()
   217      inference_runner = TFModelHandlerTensor(
   218          model_uri='unused', inference_fn=fake_inference_fn)
   219      batched_examples = [
   220          tf.convert_to_tensor(numpy.array([1])),
   221          tf.convert_to_tensor(numpy.array([10])),
   222          tf.convert_to_tensor(numpy.array([100])),
   223      ]
   224      expected_predictions = [
   225          PredictionResult(ex, pred) for ex,
   226          pred in zip(
   227              batched_examples, [
   228                  tf.math.add(tf.math.multiply(n, 10), 10)
   229                  for n in batched_examples
   230              ])
   231      ]
   232  
   233      inferences = inference_runner.run_inference(
   234          batched_examples, fake_model, inference_args={"add": True})
   235      for actual, expected in zip(inferences, expected_predictions):
   236        self.assertTrue(_compare_tensor_prediction_result(actual, expected))
   237  
   238    def test_predict_keyed_numpy(self):
   239      fake_model = FakeTFNumpyModel()
   240      inference_runner = KeyedModelHandler(
   241          TFModelHandlerNumpy(model_uri='unused', inference_fn=fake_inference_fn))
   242      batched_examples = [
   243          ('k1', numpy.array([1], dtype=numpy.int64)),
   244          ('k2', numpy.array([10], dtype=numpy.int64)),
   245          ('k3', numpy.array([100], dtype=numpy.int64)),
   246      ]
   247      expected_predictions = [
   248          (ex[0], PredictionResult(ex[1], pred)) for ex,
   249          pred in zip(
   250              batched_examples,
   251              [numpy.multiply(n[1], 10) for n in batched_examples])
   252      ]
   253      inferences = inference_runner.run_inference(batched_examples, fake_model)
   254      for actual, expected in zip(inferences, expected_predictions):
   255        self.assertTrue(_compare_prediction_result(actual[1], expected[1]))
   256  
   257    def test_predict_keyed_tensor(self):
   258      fake_model = FakeTFTensorModel()
   259      inference_runner = KeyedModelHandler(
   260          TFModelHandlerTensor(
   261              model_uri='unused', inference_fn=fake_inference_fn))
   262      batched_examples = [
   263          ('k1', tf.convert_to_tensor(numpy.array([1]))),
   264          ('k2', tf.convert_to_tensor(numpy.array([10]))),
   265          ('k3', tf.convert_to_tensor(numpy.array([100]))),
   266      ]
   267      expected_predictions = [
   268          (ex[0], PredictionResult(ex[1], pred)) for ex,
   269          pred in zip(
   270              batched_examples,
   271              [tf.math.multiply(n[1], 10) for n in batched_examples])
   272      ]
   273      inferences = inference_runner.run_inference(batched_examples, fake_model)
   274      for actual, expected in zip(inferences, expected_predictions):
   275        self.assertTrue(_compare_tensor_prediction_result(actual[1], expected[1]))
   276  
   277  
   278  @pytest.mark.uses_tf
   279  class TFRunInferenceTestWithMocks(unittest.TestCase):
   280    def setUp(self):
   281      self._load_model = tensorflow_inference._load_model
   282      tensorflow_inference._load_model = unittest.mock.MagicMock()
   283  
   284    def tearDown(self):
   285      tensorflow_inference._load_model = self._load_model
   286  
   287    def test_load_model_args(self):
   288      load_model_args = {compile: False, 'custom_objects': {'optimizer': 1}}
   289      model_handler = TFModelHandlerNumpy(
   290          "dummy_model", load_model_args=load_model_args)
   291      model_handler.load_model()
   292      tensorflow_inference._load_model.assert_called_with(
   293          "dummy_model", "", load_model_args)
   294  
   295    def test_load_model_with_args_and_custom_weights(self):
   296      load_model_args = {compile: False, 'custom_objects': {'optimizer': 1}}
   297      model_handler = TFModelHandlerNumpy(
   298          "dummy_model",
   299          custom_weights="dummy_weights",
   300          load_model_args=load_model_args)
   301      model_handler.load_model()
   302      tensorflow_inference._load_model.assert_called_with(
   303          "dummy_model", "dummy_weights", load_model_args)
   304  
   305    def test_env_vars_set_correctly_tensor(self):
   306      handler_with_vars = TFModelHandlerTensor(
   307          env_vars={'FOO': 'bar'},
   308          model_uri='unused',
   309          inference_fn=fake_inference_fn)
   310      os.environ.pop('FOO', None)
   311      self.assertFalse('FOO' in os.environ)
   312      batched_examples = [
   313          tf.convert_to_tensor(numpy.array([1])),
   314          tf.convert_to_tensor(numpy.array([10])),
   315          tf.convert_to_tensor(numpy.array([100])),
   316      ]
   317      with TestPipeline() as pipeline:
   318        _ = (
   319            pipeline
   320            | 'start' >> beam.Create(batched_examples)
   321            | RunInference(handler_with_vars))
   322        pipeline.run()
   323        self.assertTrue('FOO' in os.environ)
   324        self.assertTrue((os.environ['FOO']) == 'bar')
   325  
   326    def test_env_vars_set_correctly_numpy(self):
   327      handler_with_vars = TFModelHandlerNumpy(
   328          env_vars={'FOO': 'bar'},
   329          model_uri="unused",
   330          inference_fn=fake_inference_fn)
   331      os.environ.pop('FOO', None)
   332      self.assertFalse('FOO' in os.environ)
   333      batched_examples = [numpy.array([1]), numpy.array([10]), numpy.array([100])]
   334      tensorflow_inference._load_model = unittest.mock.MagicMock()
   335      with TestPipeline() as pipeline:
   336        _ = (
   337            pipeline
   338            | 'start' >> beam.Create(batched_examples)
   339            | RunInference(handler_with_vars))
   340        pipeline.run()
   341        self.assertTrue('FOO' in os.environ)
   342        self.assertTrue((os.environ['FOO']) == 'bar')
   343  
   344  
   345  if __name__ == '__main__':
   346    unittest.main()