github.com/pachyderm/pachyderm@v1.13.4/etc/testing/migration/v1_7/sort.sh (about) 1 #!/bin/bash 2 # sort.sh creates a collection of pachyderm pipelines that look like: 3 # left─┐ 4 # ├─copy─sort 5 # right┘ 6 # 7 # inputs: 8 # each input file is named for a digit and contains 2-digit numbers ending in 9 # that digit. E.g. '0' contains '00\n10\n20...', '1' contains '01\n11\n21\n...' 10 # left: 0,...,4 -> copy -> sort -> 01, 02, 03, 04, 05, ... 11 # right: 5,...,9 12 13 HERE="$(dirname "${0}")" 14 # shellcheck source=./etc/testing/migration/v1_7/deploy.sh 15 source "${HERE}/deploy.sh" 16 17 set -x 18 19 pachctl_1_7 create-repo left 20 pachctl_1_7 create-repo right 21 22 pachctl_1_7 create-pipeline -f - <<EOF 23 { 24 "pipeline": { 25 "name": "copy" 26 }, 27 "transform": { 28 "cmd": [ "/bin/bash" ], 29 "stdin": [ 30 "cp /pfs/*/* /pfs/out" 31 ] 32 }, 33 "parallelism_spec": { 34 "constant": 1 35 }, 36 "input": { 37 "union": [ 38 { "atom": { "repo": "left", "glob": "/*" } }, 39 { "atom": { "repo": "right", "glob": "/*" } } 40 ] 41 }, 42 "enable_stats": true 43 } 44 { 45 "pipeline": { 46 "name": "sort" 47 }, 48 "transform": { 49 "cmd": [ "/bin/bash" ], 50 "stdin": [ 51 "sort -n /pfs/copy/* >/pfs/out/nums" 52 ] 53 }, 54 "parallelism_spec": { 55 "constant": 1 56 }, 57 "input": { "atom": { "repo": "copy", "glob": "/" } }, 58 "enable_stats": true 59 } 60 EOF 61 62 tmpfile=$(mktemp -p.) 63 for _i in $(seq 0 9); do 64 # roughly alternate between 'left' and 'right' by committing in a funny order 65 # (this writes the files left/0, right/7, left/4, left/1, right/8, etc...) 66 i=$(( _i*7 % 10 )) 67 [[ "${i}" -ge 5 ]] && repo=right || repo=left 68 69 # Clear tmpfile, write all two-digit numbers with ones place=$i to tmpfile 70 echo -n "" >"${tmpfile}" 71 for j in $(seq 0 9); do 72 echo "${j}${i}" >>"${tmpfile}" 73 done 74 75 # Write to pachd 76 pachctl_1_7 put-file "${repo}" master "/${i}" <"${tmpfile}" 77 done 78 79 # Wait for pipelines to process all commits 80 pachctl_1_7 flush-commit left/master 81 82 # Delete a few commits, as that has caused migration bugs in the past 83 # TODO(msteffen): Split this test up into tests of distinct bugs (stats, 84 # delete-commit, multiple pipelines) 85 pachctl_1_7 delete-commit left master~4 86 pachctl_1_7 delete-commit left master~3 87 pachctl_1_7 delete-commit right master~4 88 pachctl_1_7 delete-commit right master~3 89 90 echo "Extracting metadata from Pachyderm. Note that this step occasionally" 91 echo "fails due to transient encoding issues, and you may need to re-run it" 92 93 set -x 94 95 pachctl_1_7 extract >"${HERE}/sort.metadata"