github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/split_gz.sh (about) 1 #!/bin/bash 2 3 set -e 4 5 if [[ $# != 2 ]]; then 6 echo "Usage: $0 <input_dir> <output_dir>" 7 exit 1 8 fi 9 10 inDir=$1 11 outDir=$2 12 13 rm -rf $outDir 14 mkdir $outDir 15 for inputFile in $inDir/*.rdf.gz; do 16 echo Processing: $inputFile 17 base=$(basename $inputFile | cut -f 1 -d '.') 18 gunzip < $inputFile | split --lines=10000000 - $outDir/$base$(echo _) 19 done 20 for chunkedFile in $outDir/*; do 21 echo "Zipping: $chunkedFile" 22 gzip -S .rdf.gz $chunkedFile & 23 done 24 wait