github.com/readium/readium-lcp-server@v0.0.0-20240509124024-799e77a0bbd6/pack/rwppackage.go (about)

     1  // Copyright 2020 Readium Foundation. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license
     3  // that can be found in the LICENSE file exposed on Github (readium) in the project repository.
     4  
     5  package pack
     6  
     7  import (
     8  	"archive/zip"
     9  	"encoding/json"
    10  	"errors"
    11  	"io"
    12  	"log"
    13  	"net/url"
    14  	"os"
    15  	"text/template"
    16  
    17  	"github.com/readium/readium-lcp-server/rwpm"
    18  )
    19  
    20  // RPFReader is a Readium Package reader
    21  type RPFReader struct {
    22  	manifest   rwpm.Publication
    23  	zipArchive *zip.ReadCloser
    24  }
    25  
    26  // RPFWriter is a Readium Package writer
    27  type RPFWriter struct {
    28  	manifest  rwpm.Publication
    29  	zipWriter *zip.Writer
    30  }
    31  
    32  // NopWriteCloser object
    33  type NopWriteCloser struct {
    34  	io.Writer
    35  }
    36  
    37  // NewWriter returns a new PackageWriter writing a RPF file to the output file
    38  func (reader *RPFReader) NewWriter(writer io.Writer) (PackageWriter, error) {
    39  
    40  	zipWriter := zip.NewWriter(writer)
    41  
    42  	files := map[string]*zip.File{}
    43  	for _, file := range reader.zipArchive.File {
    44  		files[file.Name] = file
    45  	}
    46  
    47  	// copy immediately the W3C manifest if it exists in the source package
    48  	if w3cmanFile, ok := files[W3CManifestName]; ok {
    49  		fw, err := zipWriter.Create(W3CManifestName)
    50  		if err != nil {
    51  			return nil, err
    52  		}
    53  		file, err := w3cmanFile.Open()
    54  		if err != nil {
    55  			return nil, err
    56  		}
    57  		_, err = io.Copy(fw, file)
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  		file.Close()
    62  	}
    63  
    64  	// copy immediately all ancilliary resources from the source manifest
    65  	// as they will not be encrypted in the current implementation
    66  	// FIXME: work on the encryption of ancilliary resources (except the W3C Entry Page?).
    67  	for _, manifestResource := range reader.manifest.Resources {
    68  		sourceFile := files[manifestResource.Href]
    69  		fw, err := zipWriter.Create(sourceFile.Name)
    70  		if err != nil {
    71  			return nil, err
    72  		}
    73  		file, err := sourceFile.Open()
    74  		if err != nil {
    75  			return nil, err
    76  		}
    77  		_, err = io.Copy(fw, file)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  		file.Close()
    82  	}
    83  
    84  	// copy immediately all linked resources, except the manifest itself (self link),
    85  	// from the source manifest as they should not be encrypted.
    86  	for _, manifestLink := range reader.manifest.Links {
    87  		if manifestLink.Href == ManifestLocation {
    88  			continue
    89  		}
    90  		isSelf := false
    91  		for _, rel := range manifestLink.Rel {
    92  			if rel == "self" {
    93  				isSelf = true
    94  				continue
    95  			}
    96  		}
    97  		if isSelf {
    98  			continue
    99  		}
   100  		sourceFile := files[manifestLink.Href]
   101  		if sourceFile == nil {
   102  			continue
   103  		}
   104  		fw, err := zipWriter.Create(sourceFile.Name)
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  		file, err := sourceFile.Open()
   109  		if err != nil {
   110  			return nil, err
   111  		}
   112  		_, err = io.Copy(fw, file)
   113  		if err != nil {
   114  			return nil, err
   115  		}
   116  		file.Close()
   117  	}
   118  
   119  	manifest := reader.manifest
   120  
   121  	return &RPFWriter{
   122  		zipWriter: zipWriter,
   123  		manifest:  manifest,
   124  	}, nil
   125  }
   126  
   127  // Resources returns a list of all resources which may be encrypted
   128  // It is part of the PackageReader interface.
   129  // Note: the current design choice is to leave ancillary resources (in "resources" and "alternates") unencrypted
   130  // FIXME: add "resources" and "alternates" to the slice
   131  func (reader *RPFReader) Resources() []Resource {
   132  	// index files by name to avoid multiple linear searches
   133  	files := map[string]*zip.File{}
   134  	for _, file := range reader.zipArchive.File {
   135  		files[file.Name] = file
   136  	}
   137  
   138  	// list files from the reading order; keep their type and encryption status
   139  	var resources []Resource
   140  	for _, manifestResource := range reader.manifest.ReadingOrder {
   141  		isEncrypted := manifestResource.Properties != nil && manifestResource.Properties.Encrypted != nil
   142  		name, err := url.QueryUnescape(manifestResource.Href)
   143  		if err != nil {
   144  			log.Printf("Error unescaping %s in manifest", manifestResource.Href)
   145  		}
   146  		if files[name] != nil {
   147  			resources = append(resources, &rwpResource{file: files[name], isEncrypted: isEncrypted, contentType: manifestResource.Type})
   148  		} else {
   149  			log.Printf("No file found in the archive for href %s in manifest", manifestResource.Href)
   150  		}
   151  	}
   152  
   153  	return resources
   154  }
   155  
   156  func (reader *RPFReader) Close() error {
   157  	return reader.zipArchive.Close()
   158  }
   159  
   160  type rwpResource struct {
   161  	isEncrypted bool
   162  	contentType string
   163  	file        *zip.File
   164  }
   165  
   166  // rwpResource supports the Resource interface
   167  func (resource *rwpResource) Path() string                   { return resource.file.Name }
   168  func (resource *rwpResource) ContentType() string            { return resource.contentType }
   169  func (resource *rwpResource) Size() int64                    { return int64(resource.file.UncompressedSize64) }
   170  func (resource *rwpResource) Encrypted() bool                { return resource.isEncrypted }
   171  func (resource *rwpResource) Open() (io.ReadCloser, error)   { return resource.file.Open() }
   172  func (resource *rwpResource) CompressBeforeEncryption() bool { return false }
   173  func (resource *rwpResource) CanBeEncrypted() bool           { return true }
   174  
   175  func (resource *rwpResource) CopyTo(packageWriter PackageWriter) error {
   176  
   177  	wc, err := packageWriter.NewFile(resource.Path(), resource.contentType, resource.file.Method)
   178  	if err != nil {
   179  		return err
   180  	}
   181  
   182  	rc, err := resource.file.Open()
   183  	if err != nil {
   184  		return err
   185  	}
   186  	defer rc.Close()
   187  
   188  	_, err = io.Copy(wc, rc)
   189  
   190  	rCloseError := rc.Close()
   191  	wCloseError := wc.Close()
   192  
   193  	if err != nil {
   194  		return err
   195  	}
   196  
   197  	if rCloseError != nil {
   198  		return rCloseError
   199  	}
   200  
   201  	return wCloseError
   202  }
   203  
   204  // Close closes a NopWriteCloser
   205  func (nc *NopWriteCloser) Close() error {
   206  	return nil
   207  }
   208  
   209  // NewFile creates a header in the zip archive and adds an entry to the writer reading order if missing.
   210  // This function is called in two main cases:
   211  // - one is the creation of a Readium Package for a PDF file (no existing entry in the manifest)
   212  // - another in the encryption of an existing Readium Package (there is already an entry in the manifest)
   213  // FIXME: the PackageWriter interface is obscure; let's make it better.
   214  func (writer *RPFWriter) NewFile(path string, contentType string, storageMethod uint16) (io.WriteCloser, error) {
   215  
   216  	w, err := writer.zipWriter.CreateHeader(&zip.FileHeader{
   217  		Name:   path,
   218  		Method: storageMethod,
   219  	})
   220  
   221  	// add an entry to the writer reading order if missing
   222  	found := false
   223  	for _, resource := range writer.manifest.ReadingOrder {
   224  		if path == resource.Href {
   225  			found = true
   226  			break
   227  		}
   228  	}
   229  	if !found {
   230  		writer.manifest.ReadingOrder = append(writer.manifest.ReadingOrder, rwpm.Link{Href: path, Type: contentType})
   231  	}
   232  
   233  	return &NopWriteCloser{w}, err
   234  }
   235  
   236  // MarkAsEncrypted marks a resource as encrypted (with an algorithm), in the writer manifest
   237  // FIXME: currently only looks into the reading order. Add "alternates", think about adding "resources"
   238  // FIXME: process resources which are compressed before encryption -> add Compression and OriginalLength properties in this case
   239  func (writer *RPFWriter) MarkAsEncrypted(path string, originalSize int64, algorithm string) {
   240  
   241  	for i, resource := range writer.manifest.ReadingOrder {
   242  		if path == resource.Href {
   243  			// add encryption properties
   244  			if resource.Properties == nil {
   245  				writer.manifest.ReadingOrder[i].Properties = new(rwpm.Properties)
   246  			}
   247  			writer.manifest.ReadingOrder[i].Properties.Encrypted = &rwpm.Encrypted{
   248  				Scheme: "http://readium.org/2014/01/lcp",
   249  				// profile data is not useful and even misleading: the same encryption algorithm applies to basic and 1.0 profiles.
   250  				//Profile:   profile.String(),
   251  				Algorithm: algorithm,
   252  			}
   253  
   254  			break
   255  		}
   256  	}
   257  }
   258  
   259  // ManifestLocation is the path if the Readium manifest in a package
   260  const ManifestLocation = "manifest.json"
   261  
   262  func (writer *RPFWriter) writeManifest() error {
   263  	w, err := writer.zipWriter.Create(ManifestLocation)
   264  	if err != nil {
   265  		return err
   266  	}
   267  
   268  	encoder := json.NewEncoder(w)
   269  	return encoder.Encode(writer.manifest)
   270  }
   271  
   272  // Close closes a Readium Package Writer
   273  // Writes the updated manifest in the zip archive.
   274  func (writer *RPFWriter) Close() error {
   275  	err := writer.writeManifest()
   276  	if err != nil {
   277  		return err
   278  	}
   279  
   280  	return writer.zipWriter.Close()
   281  }
   282  
   283  // OpenRPF opens a Readium Package and returns a zip reader + a manifest
   284  func OpenRPF(name string) (*RPFReader, error) {
   285  
   286  	zipArchive, err := zip.OpenReader(name)
   287  	if err != nil {
   288  		return nil, err
   289  	}
   290  
   291  	// find and parse the manifest
   292  	var manifest rwpm.Publication
   293  	var found bool
   294  	for _, file := range zipArchive.File {
   295  		if file.Name == ManifestLocation {
   296  			found = true
   297  
   298  			fileReader, err := file.Open()
   299  			if err != nil {
   300  				return nil, err
   301  			}
   302  			decoder := json.NewDecoder(fileReader)
   303  
   304  			err = decoder.Decode(&manifest)
   305  			fileReader.Close()
   306  			if err != nil {
   307  				return nil, err
   308  			}
   309  			break
   310  		}
   311  	}
   312  
   313  	if !found {
   314  		return nil, errors.New("could not find manifest")
   315  	}
   316  
   317  	return &RPFReader{zipArchive: zipArchive, manifest: manifest}, nil
   318  }
   319  
   320  // BuildRPFFromPDF builds a Readium Package (rwpp) which embeds a PDF file
   321  func BuildRPFFromPDF(title string, inputPath string, outputPath string) error {
   322  
   323  	// create the rwpp
   324  	f, err := os.Create(outputPath)
   325  	if err != nil {
   326  		return err
   327  	}
   328  	defer f.Close()
   329  
   330  	// copy the content of the pdf input file into the zip output, as 'publication.pdf'.
   331  	// the pdf content is stored compressed so that the encryption performance on Windows is better (!).
   332  	zipWriter := zip.NewWriter(f)
   333  	writer, err := zipWriter.CreateHeader(&zip.FileHeader{
   334  		Name:   "publication.pdf",
   335  		Method: zip.Deflate,
   336  	})
   337  	if err != nil {
   338  		return err
   339  	}
   340  	inputFile, err := os.Open(inputPath)
   341  	if err != nil {
   342  		zipWriter.Close()
   343  		return err
   344  	}
   345  	defer inputFile.Close()
   346  
   347  	_, err = io.Copy(writer, inputFile)
   348  	if err != nil {
   349  		zipWriter.Close()
   350  		return err
   351  	}
   352  
   353  	// inject a Readium manifest into the zip output
   354  	manifest := `
   355  	{
   356  		"@context": [
   357  			"https://readium.org/webpub-manifest/context.jsonld"
   358  		],
   359  		"metadata": {
   360  			"title": "{{.Title}}"
   361  		},
   362  		"readingOrder": [
   363  			{
   364  				"href": "publication.pdf",
   365  				"type": "application/pdf"
   366  			}
   367  		]
   368  	}
   369  	`
   370  
   371  	manifestWriter, err := zipWriter.Create(ManifestLocation)
   372  	if err != nil {
   373  		return err
   374  	}
   375  
   376  	tmpl, err := template.New("manifest").Parse(manifest)
   377  	if err != nil {
   378  		zipWriter.Close()
   379  		return err
   380  	}
   381  
   382  	err = tmpl.Execute(manifestWriter, struct{ Title string }{title})
   383  	if err != nil {
   384  		zipWriter.Close()
   385  		return err
   386  	}
   387  
   388  	return zipWriter.Close()
   389  }