github.com/pachyderm/pachyderm@v1.13.4/examples/ml/iris/python/iris-train-python-lda/pytrain.py (about)

     1  import pandas as pd
     2  from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
     3  from sklearn.externals import joblib
     4  import argparse
     5  import os
     6  
     7  # command line arguments
     8  parser = argparse.ArgumentParser(description='Train a model for iris classification.')
     9  parser.add_argument('indir', type=str, help='Input directory containing the training set')
    10  parser.add_argument('outdir', type=str, help='Output directory for the trained model')
    11  args = parser.parse_args()
    12  
    13  # training set column names
    14  cols = [
    15      "Sepal_Length",
    16      "Sepal_Width",
    17      "Petal_Length",
    18      "Petal_Width",
    19      "Species"
    20  ]
    21  
    22  features = [
    23      "Sepal_Length",
    24      "Sepal_Width",
    25      "Petal_Length",
    26      "Petal_Width"
    27  ]
    28  
    29  # import the iris training set
    30  irisDF = pd.read_csv(os.path.join(args.indir, "iris.csv"), names=cols)
    31  
    32  # fit the model
    33  lda = LinearDiscriminantAnalysis().fit(irisDF[features], irisDF["Species"])
    34  
    35  # output a text description of the model
    36  f = open(os.path.join(args.outdir, 'model.txt'), 'w')
    37  f.write(str(lda))
    38  f.close()
    39  
    40  # persist the model
    41  joblib.dump(lda, os.path.join(args.outdir, 'model.pkl'))