github.com/kubeflow/training-operator@v1.7.0/examples/xgboost/xgboost-dist/local_test.py (about)

     1  # Licensed under the Apache License, Version 2.0 (the "License");
     2  # you may not use this file except in compliance with the License.
     3  # You may obtain a copy of the License at
     4  #
     5  #     http://www.apache.org/licenses/LICENSE-2.0
     6  #
     7  # Unless required by applicable law or agreed to in writing, software
     8  # distributed under the License is distributed on an "AS IS" BASIS,
     9  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    10  # See the License for the specific language governing permissions and
    11  # limitations under the License.
    12  
    13  """
    14  this file contains tests for xgboost local train and predict in single machine.
    15  Note: this is not for distributed train and predict test
    16  """
    17  from utils import dump_model, read_model, read_train_data, read_predict_data
    18  import xgboost as xgb
    19  import logging
    20  import numpy as np
    21  from sklearn.metrics import precision_score
    22  
    23  logger = logging.getLogger(__name__)
    24  
    25  
    26  def test_train_model():
    27      """
    28      test xgboost train in a single machine
    29      :return: trained model
    30      """
    31      rank = 1
    32      world_size = 10
    33      place = "/tmp/data"
    34      dmatrix = read_train_data(rank, world_size, place)
    35  
    36      param_xgboost_default = {'max_depth': 2, 'eta': 1, 'silent': 1,
    37                               'objective': 'multi:softprob', 'num_class': 3}
    38  
    39      booster = xgb.train(param_xgboost_default, dtrain=dmatrix)
    40  
    41      assert booster is not None
    42  
    43      return booster
    44  
    45  
    46  def test_model_predict(booster):
    47      """
    48      test xgboost train in the single node
    49      :return: true if pass the test
    50      """
    51      rank = 1
    52      world_size = 10
    53      place = "/tmp/data"
    54      dmatrix, y_test = read_predict_data(rank, world_size, place)
    55  
    56      preds = booster.predict(dmatrix)
    57      best_preds = np.asarray([np.argmax(line) for line in preds])
    58      score = precision_score(y_test, best_preds, average='macro')
    59  
    60      assert score > 0.99
    61  
    62      logging.info("Predict accuracy: %f", score)
    63  
    64      return True
    65  
    66  
    67  def test_upload_model(model, model_path, args):
    68  
    69      return dump_model(model, type="local", model_path=model_path, args=args)
    70  
    71  
    72  def test_download_model(model_path, args):
    73  
    74      return read_model(type="local", model_path=model_path, args=args)
    75  
    76  
    77  def run_test():
    78      args = {}
    79      model_path = "/tmp/xgboost"
    80  
    81      logging.info("Start the local test")
    82  
    83      booster = test_train_model()
    84      test_upload_model(booster, model_path, args)
    85      booster_new = test_download_model(model_path, args)
    86      test_model_predict(booster_new)
    87  
    88      logging.info("Finish the local test")
    89  
    90  
    91  if __name__ == '__main__':
    92  
    93      logging.basicConfig(format='%(message)s')
    94      logging.getLogger().setLevel(logging.INFO)
    95  
    96      run_test()