github.com/pachyderm/pachyderm@v1.13.4/examples/ml/hyperparameter/split.json (about) 1 { 2 "pipeline": { 3 "name": "split" 4 }, 5 "description": "A pipeline that splits the `iris` data set into the `training` and `test` data sets.", 6 "transform": { 7 "cmd": [ "/bin/bash" ], 8 "stdin": [ 9 "shuf /pfs/raw_data/iris.csv > /tmp/iris_shuffled.csv", 10 "head -n80 /tmp/iris_shuffled.csv > /pfs/out/train.csv", 11 "tail -n20 /tmp/iris_shuffled.csv > /pfs/out/test.csv" 12 ] 13 }, 14 "parallelism_spec": { 15 "constant": "1" 16 }, 17 "input": { 18 "pfs": { 19 "repo": "raw_data", 20 "glob": "/" 21 } 22 } 23 }