github.com/pdfcpu/pdfcpu@v0.11.1/_scripts/extractContentDir.sh (about)

     1  #!/bin/sh
     2  
     3  # Copyright 2018 The pdfcpu Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #	http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # eg: ./extractContentDir.sh ~/pdf/big ~/pdf/out
    18  
    19  if [ $# -ne 2 ]; then
    20      echo "usage: ./extractContentDir.sh inDir outDir"
    21      exit 1
    22  fi
    23  
    24  out=$2
    25  
    26  #rm -drf $out/*
    27  
    28  #set -e
    29  
    30  for pdf in $1/*.pdf
    31  do
    32  	
    33  	f=${pdf##*/}
    34  	#echo f = $f
    35  	
    36  	f1=${f%.*}
    37  	#echo f1 = $f1
    38  	
    39      mkdir $out/$f1
    40      cp $pdf $out/$f1
    41  
    42      pdfcpu extract -verbose -mode=content $out/$f1/$f $out/$f1 &> $out/$f1/$f1.log
    43      if [ $? -eq 1 ]; then
    44          echo "extraction error: $pdf -> $out/$f1"
    45          echo
    46  		continue
    47      else
    48          echo "extraction success: $pdf -> $out/$f1"
    49      fi
    50  
    51  done