github.com/benoitkugler/goacve@v0.0.0-20201217100549-151ce6e55dc8/server/migration/v2_to_v3/cmd/compression_docs/main.go (about)

     1  // Télécharge, compresse et sauve les images jpeg de la base
     2  // Se script est à lancer après avoir décompressé les documents (v3)
     3  // une seule fois
     4  package main
     5  
     6  import (
     7  	"flag"
     8  	"fmt"
     9  	"log"
    10  	"mime"
    11  	"path/filepath"
    12  	"strings"
    13  
    14  	"github.com/benoitkugler/goACVE/server/core/rawdata"
    15  	"github.com/benoitkugler/goACVE/logs"
    16  	"github.com/benoitkugler/goACVE/server/documents"
    17  )
    18  
    19  func main() {
    20  	dev := flag.Bool("dev", true, "which DB to use")
    21  	flag.Parse()
    22  
    23  	logsDB := logs.DBProd
    24  	if *dev {
    25  		logsDB = logs.DBDev
    26  	}
    27  
    28  	fmt.Println("Connecting on", logsDB.Host, logsDB.Name)
    29  
    30  	db, err := rawdata.ConnectDB(logsDB)
    31  	if err != nil {
    32  		log.Fatal(err)
    33  	}
    34  	defer db.Close()
    35  
    36  	tx, err := db.Begin()
    37  	if err != nil {
    38  		log.Fatal(err)
    39  	}
    40  
    41  	fmt.Println("Selecting JPEG...")
    42  
    43  	rds, err := rawdata.SelectAllDocuments(tx)
    44  	if err != nil {
    45  		log.Fatal(err)
    46  	}
    47  	// on ignore les documents liés aux camps ou aux contraintes
    48  	docPers, err := rawdata.SelectAllDocumentPersonnes(tx)
    49  	if err != nil {
    50  		log.Fatal(err)
    51  	}
    52  	docAides, err := rawdata.SelectAllDocumentAides(tx)
    53  	if err != nil {
    54  		log.Fatal(err)
    55  	}
    56  	mapPers, mapAides := docPers.ByIdDocument(), docAides.ByIdDocument()
    57  
    58  	var idsJPEG, idsPDF rawdata.Ids
    59  	for _, doc := range rds {
    60  		_, linkedPers := mapPers[doc.Id]
    61  		_, linkedAide := mapAides[doc.Id]
    62  		if !(linkedPers || linkedAide) {
    63  			continue
    64  		}
    65  
    66  		type_ := mime.TypeByExtension(filepath.Ext(doc.NomClient.String()))
    67  		if type_ == "image/jpeg" {
    68  			idsJPEG = append(idsJPEG, doc.Id)
    69  		} else if type_ == "application/pdf" {
    70  			idsPDF = append(idsPDF, doc.Id)
    71  		}
    72  	}
    73  
    74  	fmt.Printf("Downloading %d JPEG images... \n", len(idsJPEG))
    75  	contenus, err := rawdata.SelectContenuDocumentsByIdDocuments(tx, idsJPEG...)
    76  	if err != nil {
    77  		log.Fatal(err)
    78  	}
    79  
    80  	var beforeCompression, afterCompression int
    81  	for i, contenu := range contenus {
    82  		if i%5 == 0 {
    83  			fmt.Printf("\rCompressing document %d / %d (%d %%) ...", i+1, len(contenus), 100*(i+1)/len(contenus))
    84  		}
    85  
    86  		compressed, err := documents.CompressJPEG(contenu.Contenu)
    87  		if err != nil {
    88  			log.Println("invalid JPEG :", err)
    89  			continue
    90  		}
    91  
    92  		beforeCompression += len(contenu.Contenu)
    93  		afterCompression += len(compressed)
    94  
    95  		_, err = tx.Exec("UPDATE contenu_documents SET contenu = $1 WHERE id_document = $2", compressed, contenu.IdDocument)
    96  		if err != nil {
    97  			log.Fatal(err)
    98  		}
    99  	}
   100  	fmt.Printf("Compression rate for JPEG : %.3f : %d KB -> %d KB\n",
   101  		float64(beforeCompression)/float64(afterCompression), beforeCompression/1000, afterCompression/1000)
   102  
   103  	fmt.Printf("Downloading %d PDF documents... \n", len(idsPDF))
   104  	contenus, err = rawdata.SelectContenuDocumentsByIdDocuments(tx, idsPDF...)
   105  	if err != nil {
   106  		log.Fatal(err)
   107  	}
   108  
   109  	beforeCompression, afterCompression = 0, 0
   110  	for i, contenu := range contenus {
   111  		if i%5 == 0 {
   112  			fmt.Printf("\rCompressing document %d / %d (%d %%) ...", i+1, len(contenus), 100*(i+1)/len(contenus))
   113  		}
   114  
   115  		compressed, err := documents.CompressPdf(contenu.Contenu)
   116  		if err != nil {
   117  			log.Println("invalid PDF :", err)
   118  			continue
   119  		}
   120  		if len(compressed) >= len(contenu.Contenu) {
   121  			// pas intéressant, on met seulement à jour les stats
   122  			continue
   123  		}
   124  
   125  		beforeCompression += len(contenu.Contenu)
   126  		afterCompression += len(compressed)
   127  
   128  		filename := rds[contenu.IdDocument].NomClient.String()
   129  		ext := filepath.Ext(filename)
   130  		filename = strings.TrimSuffix(filename, ext) + ".JPG"
   131  		_, err = tx.Exec("UPDATE documents SET nom_client = $1 WHERE id = $2", filename, contenu.IdDocument)
   132  		if err != nil {
   133  			log.Fatal(err)
   134  		}
   135  		_, err = tx.Exec("UPDATE contenu_documents SET contenu = $1 WHERE id_document = $2", compressed, contenu.IdDocument)
   136  		if err != nil {
   137  			log.Fatal(err)
   138  		}
   139  	}
   140  
   141  	err = tx.Commit()
   142  	if err != nil {
   143  		log.Fatal(err)
   144  	}
   145  
   146  	fmt.Printf("Compression rate for PDF : %.3f : %d KB -> %d KB\n",
   147  		float64(beforeCompression)/float64(afterCompression), beforeCompression/1000, afterCompression/1000)
   148  }