github.com/pachyderm/pachyderm@v1.13.4/etc/testing/examples.sh

github.com/pachyderm/pachyderm@v1.13.4/etc/testing/examples.sh (about)

     1  #!/bin/bash
     2  
     3  set -ex
     4  
     5  if [ -z "$RUN_BAD_TESTS" ]; then
     6      echo "Skipping because RUN_BAD_TESTS is empty"
     7      exit 0
     8  fi
     9  
    10  # Runs various examples to ensure they don't break. Some examples were
    11  # designed for older versions of pachyderm and are not used here.
    12  
    13  # NOTE: this script is run periodically in hub as a coarse-grained end-to-end
    14  # test. Be careful to ensure changes here work fine on hub. See hub's
    15  # examples-runner for details.
    16  
    17  pushd examples/opencv
    18      pachctl create repo images
    19      pachctl create pipeline -f edges.json
    20      pachctl create pipeline -f montage.json
    21      pachctl put file images@master -i images.txt
    22      pachctl put file images@master -i images2.txt
    23  
    24      # wait for everything to finish
    25      commit_id=$(pachctl list commit images -n 1 --raw | jq .commit.id -r)
    26      pachctl flush job "images@$commit_id"
    27  
    28      # ensure the montage image was generated
    29      pachctl inspect file montage@master:montage.png
    30  popd
    31  
    32  pachctl delete pipeline --all
    33  pachctl delete repo --all
    34  
    35  pushd examples/shuffle
    36      pachctl create repo fruits
    37      pachctl create repo pricing
    38      pachctl create pipeline -f shuffle.json
    39      pachctl put file fruits@master -f mango.jpeg
    40      pachctl put file fruits@master -f apple.jpeg
    41      pachctl put file pricing@master -f mango.json
    42      pachctl put file pricing@master -f apple.json
    43  
    44      # wait for everything to finish
    45      commit_id=$(pachctl list commit fruits -n 1 --raw | jq .commit.id -r)
    46      pachctl flush job "fruits@$commit_id"
    47      pachctl flush commit "fruits@$commit_id"
    48  
    49      # check downloaded and uploaded bytes
    50      downloaded_bytes=$(pachctl list job -p shuffle --raw | jq '.stats.download_bytes | values')
    51      if [ "$downloaded_bytes" != "" ]; then
    52          echo "Unexpected downloaded bytes in shuffle repo: $downloaded_bytes"
    53          exit 1
    54      fi
    55  
    56      uploaded_bytes=$(pachctl list job -p shuffle --raw | jq '.stats.upload_bytes | values')
    57      if [ "$uploaded_bytes" != "" ]; then
    58          echo "Unexpected downloaded bytes in shuffle repo: $uploaded_bytes"
    59          exit 1
    60      fi
    61  
    62      # check that the files were made
    63      files=$(pachctl list file "shuffle@master:*" --raw | jq '.file.path' -r)
    64      expected_files=$(echo -e "/apple\n/apple/cost.json\n/apple/img.jpeg\n/mango\n/mango/cost.json\n/mango/img.jpeg")
    65      if [ "$files" != "$expected_files" ]; then
    66          echo "Unexpected output files in shuffle repo: $files"
    67          exit 1
    68      fi
    69  popd
    70  
    71  pachctl delete pipeline --all
    72  pachctl delete repo --all
    73  
    74  pushd examples/word_count
    75      # note: we do not test reducing because it's slower
    76      pachctl create repo urls
    77      (cd data && pachctl put file urls@master -f Wikipedia)
    78      pachctl create pipeline -f pipelines/scraper.json
    79      pachctl create pipeline -f pipelines/map.json
    80  
    81      # wait for everything to finish
    82      commit_id=$(pachctl list commit urls -n 1 --raw | jq .commit.id -r)
    83      pachctl flush commit "urls@$commit_id"
    84  
    85      # just make sure the count for the word 'wikipedia' is a valid and
    86      # positive int, since the specific count may vary over time
    87      wikipedia_count=$(pachctl get file map@master:wikipedia)
    88      if [ "$wikipedia_count" -le 0 ]; then
    89          echo "Unexpected count for the word 'wikipedia': $wikipedia_count"
    90          exit 1
    91      fi
    92  popd
    93  
    94  pachctl delete pipeline --all
    95  pachctl delete repo --all
    96  
    97  pushd examples/ml/hyperparameter
    98      pachctl create repo raw_data
    99      pachctl create repo parameters
   100      pachctl list repo
   101  
   102      pushd data
   103          pachctl put file raw_data@master:iris.csv -f noisy_iris.csv
   104  
   105          pushd parameters
   106              pachctl put file parameters@master -f c_parameters.txt --split line --target-file-datums 1 
   107              pachctl put file parameters@master -f gamma_parameters.txt --split line --target-file-datums 1
   108          popd
   109      popd
   110  
   111      pachctl create pipeline -f split.json 
   112      pachctl create pipeline -f model.json
   113      pachctl create pipeline -f test.json 
   114      pachctl create pipeline -f select.json
   115  
   116      commit_id=$(pachctl list commit raw_data -n 1 --raw | jq .commit.id -r)
   117      pachctl flush job "raw_data@$commit_id"
   118  
   119      # just make sure we outputted some files
   120      selected_file_count=$(pachctl list file select@master | wc -l)
   121      if [ "$selected_file_count" -le 2 ]; then
   122          echo "Expected some files to be outputted in the select repo"
   123          exit 1
   124      fi
   125  popd
   126  
   127  pachctl delete pipeline --all
   128  pachctl delete repo --all
   129  
   130  pushd examples/ml/iris
   131      pachctl create repo training
   132      pachctl create repo attributes
   133  
   134      pushd data
   135          pachctl put file training@master -f iris.csv
   136      popd
   137  
   138      pachctl create pipeline -f julia_train.json
   139  
   140      pushd data/test
   141          pachctl put file attributes@master -r -f .
   142      popd
   143  
   144      pachctl list file attributes@master
   145      pachctl create pipeline -f julia_infer.json
   146  
   147      commit_id=$(pachctl list commit training -n 1 --raw | jq .commit.id -r)
   148      pachctl flush job "training@$commit_id"
   149  
   150      # just make sure we outputted some files
   151      inference_file_count=$(pachctl list file inference@master | wc -l)
   152      if [ "$inference_file_count" -ne 3 ]; then
   153          echo "Unexpected file count in inference repo"
   154          exit 1
   155      fi
   156  popd