github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/split_gz.sh (about)

     1  #!/bin/bash
     2  
     3  set -e
     4  
     5  if [[ $# != 2 ]]; then
     6  		echo "Usage: $0 <input_dir> <output_dir>"
     7  		exit 1
     8  fi
     9  
    10  inDir=$1
    11  outDir=$2
    12  
    13  rm -rf $outDir
    14  mkdir $outDir
    15  for inputFile in $inDir/*.rdf.gz; do
    16  		echo Processing: $inputFile
    17  		base=$(basename $inputFile | cut -f 1 -d '.')
    18  		gunzip < $inputFile | split --lines=10000000 - $outDir/$base$(echo _)
    19  done
    20  for chunkedFile in $outDir/*; do
    21  	echo "Zipping: $chunkedFile"
    22  	gzip -S .rdf.gz $chunkedFile &
    23  done
    24  wait