github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/encrypt/process_encrypt.go (about)

     1  // Copyright 2021 Readium Foundation. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license
     3  // that can be found in the LICENSE file exposed on Github (readium) in the project repository.
     4  
     5  package encrypt
     6  
     7  import (
     8  	"archive/zip"
     9  	"crypto/sha256"
    10  	"encoding/hex"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"log"
    15  	"net/http"
    16  	"net/url"
    17  	"os"
    18  	"path"
    19  	"path/filepath"
    20  	"strings"
    21  
    22  	"github.com/readium/readium-lcp-server/crypto"
    23  	"github.com/readium/readium-lcp-server/epub"
    24  	apilcp "github.com/readium/readium-lcp-server/lcpserver/api"
    25  	"github.com/readium/readium-lcp-server/pack"
    26  	uuid "github.com/satori/go.uuid"
    27  )
    28  
    29  // Publication aggregates information during the process
    30  type Publication struct {
    31  	UUID          string
    32  	Title         string
    33  	Date          string
    34  	Description   string
    35  	Language      []string
    36  	Publisher     []string
    37  	Author        []string
    38  	Subject       []string
    39  	CoverUrl      string
    40  	StorageMode   int
    41  	FileName      string
    42  	EncryptionKey []byte
    43  	Location      string
    44  	ContentType   string
    45  	Size          uint32
    46  	Checksum      string
    47  }
    48  
    49  // ProcessEncryption encrypts a publication
    50  // inputPath must contain a processable file extension (EPUB, PDF, LPF or RPF)
    51  func ProcessEncryption(contentID, contentKey, inputPath, tempRepo, outputRepo, storageRepo, storageURL, storageFilename string, extractCover bool) (*Publication, error) {
    52  
    53  	if inputPath == "" {
    54  		return nil, errors.New("ProcessEncryption, parameter error")
    55  	}
    56  
    57  	var pub Publication
    58  
    59  	// if contentID is not set, generate a random UUID
    60  	if contentID == "" {
    61  		uid, err := uuid.NewV4()
    62  		if err != nil {
    63  			return nil, err
    64  		}
    65  		contentID = uid.String()
    66  	}
    67  	pub.UUID = contentID
    68  
    69  	// create a temp folder if declared, or use the current dir
    70  	if tempRepo != "" {
    71  		err := os.MkdirAll(tempRepo, os.ModePerm)
    72  		if err != nil && !os.IsExist(err) {
    73  			return nil, err
    74  		}
    75  	} else {
    76  		tempRepo, _ = os.Getwd()
    77  	}
    78  
    79  	// if the input file is stored on a remote server, fetch it and store it into a temp folder
    80  	tempPath, err := fetchInputFile(inputPath, tempRepo, contentID)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	deleteTemp := false
    85  	// if a temp file has been fetched, it will be deleted later
    86  	if tempPath != "" {
    87  		deleteTemp = true
    88  		inputPath = tempPath
    89  	}
    90  
    91  	// select a storage mode
    92  	pub.StorageMode = apilcp.Storage_none
    93  	// if the storage repo is set, set storage mode and output repository
    94  	// note: the -storage parameter takes precedence over -output
    95  	if storageRepo != "" {
    96  		// S3 storage is specified by the presence of "s3:" at the start of the -storage param
    97  		if strings.HasPrefix(storageRepo, "s3:") {
    98  			pub.StorageMode = apilcp.Storage_s3
    99  			outputRepo = tempRepo // before move to s3
   100  			// file system storage
   101  		} else {
   102  			pub.StorageMode = apilcp.Storage_fs
   103  			// create the storage folder when necessary
   104  			err := os.MkdirAll(storageRepo, os.ModePerm)
   105  			if err != nil && !os.IsExist(err) {
   106  				return nil, err
   107  			}
   108  			// the encrypted file will be directly generated inside the storage path
   109  			outputRepo = storageRepo
   110  		}
   111  	}
   112  	// if the output repo is still not set, use the temp directory.
   113  	if outputRepo == "" {
   114  		outputRepo = tempRepo
   115  	}
   116  
   117  	// set target file info
   118  	targetFileInfo(&pub, inputPath, storageFilename)
   119  
   120  	// set the target file name; use the content id by default
   121  	if storageFilename == "" {
   122  		storageFilename = pub.UUID
   123  	}
   124  
   125  	// set the output path
   126  	outputPath := filepath.Join(outputRepo, storageFilename)
   127  	fmt.Println("Output path:", outputPath)
   128  
   129  	// define an AES encrypter
   130  	encrypter := crypto.NewAESEncrypter_PUBLICATION_RESOURCES()
   131  
   132  	// select the encryption process from the input file extension
   133  	err = nil
   134  
   135  	inputExt := filepath.Ext(inputPath)
   136  
   137  	// the cover can be extracted if lcpencrypt stores the file and the file is an EPUB
   138  	if storageRepo == "" {
   139  		extractCover = false
   140  	}
   141  
   142  	switch inputExt {
   143  	case ".epub":
   144  		err = processEPUB(&pub, inputPath, outputPath, encrypter, contentKey, extractCover)
   145  	case ".pdf":
   146  		extractCover = false
   147  		err = processPDF(&pub, inputPath, outputPath, encrypter, contentKey)
   148  	case ".lpf":
   149  		extractCover = false
   150  		err = processLPF(&pub, inputPath, outputPath, encrypter, contentKey)
   151  	case ".audiobook", ".divina", ".webpub", ".rpf":
   152  		extractCover = false
   153  		err = processRPF(&pub, inputPath, outputPath, encrypter, contentKey)
   154  	default:
   155  		return nil, errors.New("unprocessable extension " + inputExt)
   156  	}
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	if deleteTemp {
   162  		err = os.Remove(inputPath)
   163  		if err != nil {
   164  			return nil, err
   165  		}
   166  	}
   167  
   168  	// store the publication if required, and set pub.Location
   169  	switch pub.StorageMode {
   170  	// the license server will have to store the encrypted publication
   171  	// warning: the license server must have read access to the output repo.
   172  	case apilcp.Storage_none:
   173  		// location indicates to the license server the path to the encrypted publication
   174  		pub.Location = outputPath
   175  	// the encryption tools stores the encrypted publication in a file system
   176  	case apilcp.Storage_fs:
   177  		// location indicates the url of the publication
   178  		pub.Location, err = url.JoinPath(storageURL, storageFilename)
   179  		// the encryption tools stores the encrypted publication in an S3 storage
   180  	case apilcp.Storage_s3:
   181  		// store the encrypted file in its definitive S3 storage, delete the temp file
   182  		err = StoreS3Publication(outputPath, storageRepo, storageFilename)
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		// location indicates the url of the publication on S3
   187  		pub.Location, err = url.JoinPath(storageURL, storageFilename)
   188  	}
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  	if extractCover {
   193  		coverExt := path.Ext(pub.CoverUrl)
   194  		pub.CoverUrl, _ = url.JoinPath(storageURL, storageFilename+coverExt)
   195  	}
   196  
   197  	return &pub, nil
   198  }
   199  
   200  // fetchInputFile fetches the input file from a remote server
   201  func fetchInputFile(inputPath, tempRepo, contentID string) (string, error) {
   202  
   203  	if inputPath == "" || tempRepo == "" || contentID == "" {
   204  		return "", errors.New("fetchInputFile, parameter error")
   205  	}
   206  
   207  	url, err := url.Parse(inputPath)
   208  	if err != nil {
   209  		// this is not a valid URL
   210  		return "", nil
   211  	}
   212  
   213  	// no need to fetch the file, which is in a file system
   214  	if url.Scheme != "http" && url.Scheme != "https" && url.Scheme != "ftp" {
   215  		return "", nil
   216  	}
   217  
   218  	// the temp file has the same extension as the remote file
   219  	inputExt := filepath.Ext(inputPath)
   220  	tempPath := filepath.Join(tempRepo, contentID+inputExt)
   221  	// create the temp file
   222  	out, err := os.Create(tempPath)
   223  	if err != nil {
   224  		return "", err
   225  	}
   226  	defer out.Close()
   227  
   228  	// fetch the file
   229  	if url.Scheme == "http" || url.Scheme == "https" {
   230  		res, err := http.Get(inputPath)
   231  		if err != nil {
   232  			return "", err
   233  		}
   234  		defer res.Body.Close()
   235  		defer out.Close()
   236  		_, err = io.Copy(out, res.Body)
   237  		if err != nil {
   238  			return "", err
   239  		}
   240  	} else if url.Scheme == "ftp" {
   241  		// we'll use https://github.com/jlaffaye/ftp when requested
   242  		return "", errors.New("ftp not supported yet")
   243  	}
   244  	return tempPath, nil
   245  }
   246  
   247  // targetFileInfo sets the file name and content type
   248  // which will be used during future downloads
   249  func targetFileInfo(pub *Publication, inputPath, storageFilename string) error {
   250  
   251  	// if the storage filename was imposed, use it
   252  	if storageFilename != "" {
   253  		pub.FileName = storageFilename
   254  	} else {
   255  		//  generate a filename from the input filename and a target extension
   256  		inputFile := filepath.Base(inputPath)
   257  		inputExt := filepath.Ext(inputPath)
   258  		fileNameNoExt := inputFile[:len(inputFile)-len(inputExt)]
   259  
   260  		var ext string
   261  		switch inputExt {
   262  		case ".epub":
   263  			ext = inputExt
   264  		case ".pdf":
   265  			ext = ".lcpdf"
   266  		case ".audiobook", ".rpf":
   267  			ext = ".lcpau"
   268  		case ".divina":
   269  			ext = ".lcpdi"
   270  		case ".lpf":
   271  			// short term solution. We'll need to inspect the W3C manifest and check conformsTo,
   272  			// to be certain this is an audiobook (vs another profile of Web Publication)
   273  			ext = ".lcpau"
   274  		case ".webpub":
   275  			// short term solution. We'll need to inspect the RWP manifest and check conformsTo,
   276  			// to be certain this package contains a pdf
   277  			ext = ".lcpdf"
   278  		}
   279  		pub.FileName = fileNameNoExt + ext
   280  	}
   281  
   282  	// find the target mime type
   283  	outputExt := filepath.Ext(pub.FileName)
   284  	switch outputExt {
   285  	case ".epub":
   286  		pub.ContentType = epub.ContentType_EPUB
   287  	case ".lcpdf":
   288  		pub.ContentType = "application/pdf+lcp"
   289  	case ".lcpau":
   290  		pub.ContentType = "application/audiobook+lcp"
   291  	case ".lcpdi":
   292  		pub.ContentType = "application/divina+lcp"
   293  	}
   294  	return nil
   295  }
   296  
   297  // checksum calculates the checksum of a file
   298  func checksum(file *os.File) string {
   299  
   300  	hasher := sha256.New()
   301  	file.Seek(0, 0)
   302  	if _, err := io.Copy(hasher, file); err != nil {
   303  		return ""
   304  	}
   305  	return hex.EncodeToString(hasher.Sum(nil))
   306  }
   307  
   308  // processEPUB encrypts resources in an EPUB
   309  func processEPUB(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string, extractCover bool) error {
   310  
   311  	// create a zip reader from the input path
   312  	zr, err := zip.OpenReader(inputPath)
   313  	if err != nil {
   314  		return err
   315  	}
   316  	defer zr.Close()
   317  
   318  	// generate an EPUB object
   319  	epub, err := epub.Read(&zr.Reader)
   320  	if err != nil {
   321  		return err
   322  	}
   323  
   324  	// init metadata
   325  	pub.Title = epub.Package[0].Metadata.Title[0]
   326  	pub.Date = epub.Package[0].Metadata.Date
   327  	pub.Description = epub.Package[0].Metadata.Description
   328  	pub.Language = epub.Package[0].Metadata.Language
   329  	pub.Publisher = epub.Package[0].Metadata.Publisher
   330  	pub.Author = epub.Package[0].Metadata.Author
   331  	pub.Subject = epub.Package[0].Metadata.Subject
   332  
   333  	// look for the cover image
   334  	coverImageID := "cover-image"
   335  	for _, meta := range epub.Package[0].Metadata.Metas {
   336  		if meta.Name == "cover" {
   337  			coverImageID = meta.Content
   338  		}
   339  	}
   340  	var coverPath string
   341  	for _, item := range epub.Package[0].Manifest.Items {
   342  		if strings.Contains(item.Properties, "cover-image") ||
   343  			item.ID == coverImageID {
   344  			// re-construct a path, avoid insertion of backslashes as separator on Windows
   345  			coverPath = filepath.ToSlash(filepath.Join(epub.Package[0].BasePath, item.Href))
   346  		}
   347  	}
   348  
   349  	// create the output file
   350  	outputFile, err := os.Create(outputPath)
   351  	if err != nil {
   352  		return err
   353  	}
   354  	// will close the output file
   355  	defer outputFile.Close()
   356  
   357  	// encrypt the content of the publication,
   358  	// write  into the output file
   359  	_, encryptionKey, err := pack.Do(encrypter, contentKey, epub, outputFile)
   360  	if err != nil {
   361  		return err
   362  	}
   363  	pub.EncryptionKey = encryptionKey
   364  	// calculate the output file size and checksum
   365  	stats, err := outputFile.Stat()
   366  	if err == nil && (stats.Size() > 0) {
   367  		filesize := stats.Size()
   368  		pub.Size = uint32(filesize)
   369  		cs := checksum(outputFile)
   370  		pub.Checksum = cs
   371  	}
   372  	if stats.Size() == 0 {
   373  		return errors.New("empty output file")
   374  	}
   375  
   376  	if extractCover {
   377  		// extract the cover image and store it at the target location
   378  		for _, f := range zr.File {
   379  			if f.Name == coverPath {
   380  				epubCover, err := f.Open()
   381  				if err != nil {
   382  					log.Printf("Error opening the cover in %s, %s", coverPath, err.Error())
   383  					break // move out of the loop
   384  				}
   385  				defer epubCover.Close()
   386  				// create the output cover
   387  				coverExt := path.Ext(coverPath)
   388  				coverFile, err := os.Create(outputPath + coverExt)
   389  				if err != nil {
   390  					return err
   391  				}
   392  				defer coverFile.Close()
   393  				_, err = io.Copy(coverFile, epubCover)
   394  				if err != nil {
   395  					// we do not consider it as an error
   396  					log.Printf("Error copying cover data, %s", err.Error())
   397  				}
   398  				// set temporarily, will be updated later
   399  				pub.CoverUrl = coverPath
   400  				break
   401  			}
   402  		}
   403  	}
   404  
   405  	return nil
   406  }
   407  
   408  // processPDF wraps a PDF file inside a Readium Package and encrypts its resources
   409  func processPDF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error {
   410  
   411  	// generate a temp Readium Package (rwpp) which embeds the PDF file; its title is the PDF file name
   412  	tmpPackagePath := outputPath + ".tmp"
   413  	err := pack.BuildRPFFromPDF(filepath.Base(inputPath), inputPath, tmpPackagePath)
   414  	// will remove the tmp file even if an error is returned
   415  	defer os.Remove(tmpPackagePath)
   416  	// process error
   417  	if err != nil {
   418  		return err
   419  	}
   420  
   421  	// build an encrypted package
   422  	return buildEncryptedRPF(pub, tmpPackagePath, outputPath, encrypter, contentKey)
   423  }
   424  
   425  // processLPF transforms a W3C LPF file into a Readium Package and encrypts its resources
   426  func processLPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error {
   427  
   428  	// generate a tmp Readium Package (rwpp) out of a W3C Package (lpf)
   429  	tmpPackagePath := outputPath + ".tmp"
   430  	err := pack.BuildRPFFromLPF(inputPath, tmpPackagePath)
   431  	// will remove the tmp file even if an error is returned
   432  	defer os.Remove(tmpPackagePath)
   433  	// process error
   434  	if err != nil {
   435  		return err
   436  	}
   437  
   438  	// build an encrypted package
   439  	return buildEncryptedRPF(pub, tmpPackagePath, outputPath, encrypter, contentKey)
   440  }
   441  
   442  // processRPF encrypts the source Readium Package
   443  func processRPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error {
   444  
   445  	// build an encrypted package
   446  	return buildEncryptedRPF(pub, inputPath, outputPath, encrypter, contentKey)
   447  }
   448  
   449  // buildEncryptedRPF builds an encrypted Readium package out of an un-encrypted one
   450  // FIXME: it cannot be used for EPUB as long as Do() and Process() are not merged
   451  func buildEncryptedRPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error {
   452  
   453  	// create a reader on the un-encrypted readium package
   454  	reader, err := pack.OpenRPF(inputPath)
   455  	if err != nil {
   456  		return err
   457  	}
   458  	defer reader.Close()
   459  	// create the encrypted package file
   460  	outputFile, err := os.Create(outputPath)
   461  	if err != nil {
   462  		return err
   463  	}
   464  	defer outputFile.Close()
   465  	// create a writer on the encrypted package
   466  	writer, err := reader.NewWriter(outputFile)
   467  	if err != nil {
   468  		return err
   469  	}
   470  	// encrypt resources from the input package, return the encryption key
   471  	encryptionKey, err := pack.Process(encrypter, contentKey, reader, writer)
   472  	if err != nil {
   473  		return err
   474  	}
   475  	pub.EncryptionKey = encryptionKey
   476  
   477  	err = writer.Close()
   478  	if err != nil {
   479  		return err
   480  	}
   481  
   482  	// calculate the output file size and checksum
   483  	stats, err := outputFile.Stat()
   484  	if err == nil && (stats.Size() > 0) {
   485  		filesize := stats.Size()
   486  		pub.Size = uint32(filesize)
   487  		cs := checksum(outputFile)
   488  		pub.Checksum = cs
   489  	}
   490  	if stats.Size() == 0 {
   491  		return errors.New("empty output file")
   492  	}
   493  	return nil
   494  }