github.com/pachyderm/pachyderm@v1.13.4/examples/scraper/scraper.json (about)

     1  {
     2    "pipeline": {
     3      "name": "scraper"
     4    },
     5    "transform": {
     6      "cmd": [ "sh" ],
     7      "stdin": [
     8          "apt-get update -yq && apt-get install -yq --no-install-recommends ca-certificates wget",
     9          "wget --recursive --level 1 --accept jpg,jpeg,png,gif,bmp --page-requisites --adjust-extension --span-hosts --no-check-certificate --timestamping --directory-prefix /pfs/out --input-file /pfs/urls/urls"
    10      ],
    11      "acceptReturnCode": [4,5,6,7,8]
    12    },
    13    "parallelism_spec": {
    14      "constant": 1
    15    },
    16    "input": {
    17      "cross": [ {
    18        "pfs": {
    19          "glob": "urls/*",
    20          "repo": "urls"
    21        }
    22      },
    23      {
    24        "cron": {
    25          "name": "tick",
    26          "spec": "@every 1m"
    27        }
    28      } ]
    29    }
    30  }