github.com/pachyderm/pachyderm@v1.13.4/etc/testing/migration/v1_7/sort.sh (about)

     1  #!/bin/bash
     2  # sort.sh creates a collection of pachyderm pipelines that look like:
     3  #    left─┐
     4  #         ├─copy─sort
     5  #    right┘
     6  #
     7  # inputs:
     8  # each input file is named for a digit and contains 2-digit numbers ending in
     9  # that digit. E.g. '0' contains '00\n10\n20...', '1' contains '01\n11\n21\n...'
    10  # left:  0,...,4 -> copy -> sort -> 01, 02, 03, 04, 05, ...
    11  # right: 5,...,9
    12  
    13  HERE="$(dirname "${0}")"
    14  # shellcheck source=./etc/testing/migration/v1_7/deploy.sh
    15  source "${HERE}/deploy.sh"
    16  
    17  set -x
    18  
    19  pachctl_1_7 create-repo left
    20  pachctl_1_7 create-repo right
    21  
    22  pachctl_1_7 create-pipeline -f - <<EOF
    23  {
    24    "pipeline": {
    25      "name": "copy"
    26    },
    27    "transform": {
    28      "cmd": [ "/bin/bash" ],
    29      "stdin": [
    30        "cp /pfs/*/* /pfs/out"
    31      ]
    32    },
    33    "parallelism_spec": {
    34      "constant": 1
    35    },
    36    "input": {
    37      "union": [
    38        { "atom": { "repo": "left",  "glob": "/*" } },
    39        { "atom": { "repo": "right", "glob": "/*" } }
    40      ]
    41    },
    42    "enable_stats": true
    43  }
    44  {
    45    "pipeline": {
    46      "name": "sort"
    47    },
    48    "transform": {
    49      "cmd": [ "/bin/bash" ],
    50      "stdin": [
    51        "sort -n /pfs/copy/* >/pfs/out/nums"
    52      ]
    53    },
    54    "parallelism_spec": {
    55      "constant": 1
    56    },
    57    "input": { "atom": { "repo": "copy", "glob": "/" } },
    58    "enable_stats": true
    59  }
    60  EOF
    61  
    62  tmpfile=$(mktemp -p.)
    63  for _i in $(seq 0 9); do
    64    # roughly alternate between 'left' and 'right' by committing in a funny order
    65    # (this writes the files left/0, right/7, left/4, left/1, right/8, etc...)
    66    i=$(( _i*7 % 10 ))
    67    [[ "${i}" -ge 5 ]] && repo=right || repo=left
    68  
    69    # Clear tmpfile, write all two-digit numbers with ones place=$i to tmpfile
    70    echo -n "" >"${tmpfile}"
    71    for j in $(seq 0 9); do
    72      echo "${j}${i}" >>"${tmpfile}"
    73    done
    74  
    75    # Write to pachd
    76    pachctl_1_7 put-file "${repo}" master "/${i}" <"${tmpfile}"
    77  done
    78  
    79  # Wait for pipelines to process all commits
    80  pachctl_1_7 flush-commit left/master
    81  
    82  # Delete a few commits, as that has caused migration bugs in the past
    83  # TODO(msteffen): Split this test up into tests of distinct bugs (stats,
    84  # delete-commit, multiple pipelines)
    85  pachctl_1_7 delete-commit left master~4
    86  pachctl_1_7 delete-commit left master~3
    87  pachctl_1_7 delete-commit right master~4
    88  pachctl_1_7 delete-commit right master~3
    89  
    90  echo "Extracting metadata from Pachyderm. Note that this step occasionally"
    91  echo "fails due to transient encoding issues, and you may need to re-run it"
    92  
    93  set -x
    94  
    95  pachctl_1_7 extract >"${HERE}/sort.metadata"