github.com/pachyderm/pachyderm@v1.13.4/examples/word_count/pipelines/scraper.json (about)

     1  {
     2    "pipeline": {
     3      "name": "scraper"
     4    },
     5    "description": "A pipeline that pulls content from a specified Internet source.",
     6    "transform": {
     7      "cmd": [ "bash" ],
     8      "stdin": [
     9          "apt-get update -yq && apt-get install -yq --no-install-recommends ca-certificates wget",
    10          "FILES=/pfs/urls/*",
    11          "for f in $FILES",
    12          "do",
    13              "wget -e robots=off --adjust-extension --no-check-certificate --no-directories --directory-prefix /pfs/out/$(basename $f) $(cat $f)",
    14          "done"
    15      ],
    16      "acceptReturnCode": [4,5,6,7,8]
    17    },
    18    "input": {
    19        "pfs": {
    20          "repo": "urls",
    21          "glob": "/*"
    22        }
    23     }
    24  }