github.com/pachyderm/pachyderm@v1.13.4/examples/scraper/scraper.json (about) 1 { 2 "pipeline": { 3 "name": "scraper" 4 }, 5 "transform": { 6 "cmd": [ "sh" ], 7 "stdin": [ 8 "apt-get update -yq && apt-get install -yq --no-install-recommends ca-certificates wget", 9 "wget --recursive --level 1 --accept jpg,jpeg,png,gif,bmp --page-requisites --adjust-extension --span-hosts --no-check-certificate --timestamping --directory-prefix /pfs/out --input-file /pfs/urls/urls" 10 ], 11 "acceptReturnCode": [4,5,6,7,8] 12 }, 13 "parallelism_spec": { 14 "constant": 1 15 }, 16 "input": { 17 "cross": [ { 18 "pfs": { 19 "glob": "urls/*", 20 "repo": "urls" 21 } 22 }, 23 { 24 "cron": { 25 "name": "tick", 26 "spec": "@every 1m" 27 } 28 } ] 29 } 30 }