github.com/pachyderm/pachyderm@v1.13.4/examples/ml/tensorflow/training_and_export_gpu.json (about)

     1  {
     2    "pipeline": {
     3      "name": "checkpoint"
     4    },
     5    "description": "A pipeline that performs image-to-image translation first in `training` and then in `export` mode.",
     6    "transform": {
     7      "image": "dwhitena/pix2pix",
     8      "cmd": [ "/bin/bash" ],
     9      "stdin": [
    10        "python pix2pix.py --mode train --output_dir /pfs/out --max_epochs 1 --input_dir /pfs/training --which_direction BtoA",
    11        "sed -i 's/out/checkpoint/g' /pfs/out/checkpoint"
    12      ],
    13      "env": {
    14        "LD_LIBRARY_PATH": "/usr/lib/nvidia:/usr/local/cuda/lib64:/rootfs/usr/lib/x86_64-linux-gnu"
    15      }
    16    },
    17    "resource_limits": {
    18      "memory": "4.0G",
    19      "cpu": 2,
    20      "gpu": 1
    21    },
    22    "parallelism_spec": {
    23      "constant": "1"
    24    },
    25    "input": {
    26      "pfs": {
    27        "repo": "training",
    28        "glob": "/"
    29      }
    30    }
    31  }
    32  {
    33    "pipeline": {
    34      "name": "model"
    35    },
    36    "transform": {
    37      "image": "dwhitena/pix2pix",
    38      "cmd": [ "/bin/bash" ],
    39      "stdin": [
    40        "python pix2pix.py --mode export --output_dir /pfs/out --checkpoint /pfs/checkpoint",
    41        "sed -i 's/out/model/g' /pfs/out/checkpoint"
    42      ]
    43    },
    44    "parallelism_spec": {
    45      "constant": "1"
    46    },
    47    "input": {
    48      "pfs": {
    49        "repo": "checkpoint",
    50        "glob": "/"
    51      }
    52    }
    53  }