github.com/pachyderm/pachyderm@v1.13.4/examples/ml/hyperparameter/train/pytrain.py (about)

     1  import pandas as pd
     2  from sklearn import svm
     3  from sklearn.externals import joblib
     4  import argparse
     5  import os
     6  
     7  # command line arguments
     8  parser = argparse.ArgumentParser(description='Train a model for iris classification.')
     9  parser.add_argument('indir', type=str, help='Input directory containing the training set')
    10  parser.add_argument('outdir', type=str, help='Output directory for the trained model')
    11  parser.add_argument('cparam', type=float, help='Parameter C for SVM')
    12  parser.add_argument('gammaparam', type=float, help='Parameter Gamma for SVM')
    13  args = parser.parse_args()
    14  
    15  # training set column names
    16  cols = [
    17      "Sepal_Length",
    18      "Sepal_Width",
    19      "Petal_Length",
    20      "Petal_Width",
    21      "Species"
    22  ]
    23  
    24  features = [
    25      "Sepal_Length",
    26      "Sepal_Width",
    27      "Petal_Length",
    28      "Petal_Width"
    29  ]
    30  
    31  # import the iris training set
    32  irisDF = pd.read_csv(os.path.join(args.indir, "iris.csv"), names=cols)
    33  
    34  # fit the model
    35  svc = svm.SVC(kernel='linear', C=args.cparam, gamma=args.gammaparam).fit(irisDF[features], irisDF["Species"])
    36  
    37  # persist the model
    38  joblib.dump(svc, os.path.join(args.outdir, 'model_C' + str(args.cparam) + '_G' + str(args.gammaparam) + '.pkl'))