github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/scripts/s3-mpt-large-files.sh (about) 1 #!/bin/bash 2 3 # Prerequisites: ######################################################################################## 4 # - aistore cluster 5 # - s3cmd 6 # - locally accessible (source) directory that MAY contain large files (see examples below) 7 # - any aistore bucket 8 # 9 ## Examples: 10 ## 1. use existing large files and run 8 iterations 11 ## s3-mpt-large-files.sh /tmp/largefiles s3://abc 8 12 # 13 ## 2. generate (the default number of) large files, run 16 iterations 14 ## s3-mpt-large-files.sh /tmp/largefiles s3://abc 16 true 15 # 16 ## 3. same as above w/ 10 generated large files 17 ## s3-mpt-large-files.sh /tmp/largefiles s3://abc 16 true 10 18 # 19 # ####################################################################################################### 20 21 # command line 22 srcdir=$1 23 bucket=$2 24 25 # runtime defaults 26 iterations=4 27 generate="false" 28 numfiles=3 29 30 dstdir="mpt-$RANDOM" 31 32 # NOTE: to have s3cmd working directly with ais:// bucket 33 # overriding AWS defaults (incl. "--host=s3.amazonaws.com") 34 s3endpoint="localhost:8080/s3" 35 host="--host=$s3endpoint" 36 host_bucket="--host-bucket=$s3endpoint/%(bucket)" 37 38 if ! [ -x "$(command -v s3cmd)" ]; then 39 echo "Error: s3cmd not installed" >&2 40 exit 1 41 fi 42 43 if [ $# -le 1 ]; then 44 echo "Usage: $0 DIR BUCKET [NUM-ITERATIONS [GENERATE [NUM-FILES]]], where:" 45 echo " DIR: source directory containing (large) files" 46 echo " BUCKET: destination aistore bucket, e.g., s3://abc (where 's3://abc' may, in fact, be 'ais://abc' etc.)" 47 echo "and optionally:" 48 echo " ITERATIONS: number of iterations to run (default: 4)" 49 echo " GENERATE: generate large files (default: false)" 50 echo " NUM-FILES: number of large files to generate (default: 3)" 51 exit 1 52 fi 53 54 ## command line 55 if ! [ -d $srcdir ]; then 56 echo "Error: directory '$srcdir' does not exist" >&2 57 exit 1 58 fi 59 if ! [ -z "$3" ]; then 60 iterations=$3 61 fi 62 if ! [ -z "$4" ]; then 63 generate=$4 64 fi 65 if ! [ -z "$5" ]; then 66 numfiles=$5 67 fi 68 69 ## uncomment for verbose output 70 ## set -x 71 72 s3cmd info $bucket $host $host_bucket --no-ssl 1> /dev/null | exit $? 73 74 cleanup() { 75 rc=$? 76 if [[ "$generate" == "true" ]]; then 77 rm -f $srcdir/mpt-* 78 fi 79 s3cmd del "$bucket/$dstdir/mpt*" $host $host_bucket --no-ssl 2> /dev/null 80 exit $rc 81 } 82 83 trap cleanup EXIT INT TERM 84 85 if [[ "$generate" == "true" ]]; then ## generate 86 echo "Generating large files ..." 87 count=499 88 for i in $(seq 1 1 $numfiles); do 89 dd if=/dev/random of="$srcdir/mpt-$count" bs=4024k count=$count | exit $? 90 count=`expr $count + 500` 91 done 92 fi 93 94 files=`ls $srcdir` 95 for i in $(seq 1 1 $iterations); do 96 echo "Iteration #$i -------------------------------------------------------------------------" 97 for f in $files; do 98 filesize=$(stat --printf="%s" $srcdir/$f) 99 mbs=$(echo "$filesize / 1000 / 1000" | bc) 100 parts=$(shuf -i 2-100 -n 1) 101 partsize=$(echo "$mbs / $parts" | bc) 102 103 if [ $partsize -le 5 ]; then 104 partsize=$(shuf -i 5-10 -n 1) 105 fi 106 cmd="s3cmd put $srcdir/$f $bucket/$dstdir/$f --multipart-chunk-size-mb=$partsize $host $host_bucket --no-ssl" 107 echo "Running '$cmd' ..." 108 $cmd || exit $? 109 done 110 # cleanup 111 s3cmd ls $bucket/$dstdir/* $host $host_bucket --no-ssl || exit $? 112 echo "Cleaning up ..." 113 for f in $files; do 114 s3cmd del $bucket/$dstdir/$f $host $host_bucket --no-ssl 115 done 116 done