github.com/pachyderm/pachyderm@v1.13.4/examples/word_count/pipelines/scraper.json (about) 1 { 2 "pipeline": { 3 "name": "scraper" 4 }, 5 "description": "A pipeline that pulls content from a specified Internet source.", 6 "transform": { 7 "cmd": [ "bash" ], 8 "stdin": [ 9 "apt-get update -yq && apt-get install -yq --no-install-recommends ca-certificates wget", 10 "FILES=/pfs/urls/*", 11 "for f in $FILES", 12 "do", 13 "wget -e robots=off --adjust-extension --no-check-certificate --no-directories --directory-prefix /pfs/out/$(basename $f) $(cat $f)", 14 "done" 15 ], 16 "acceptReturnCode": [4,5,6,7,8] 17 }, 18 "input": { 19 "pfs": { 20 "repo": "urls", 21 "glob": "/*" 22 } 23 } 24 }