github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/pack/rwppackage.go (about) 1 // Copyright 2020 Readium Foundation. All rights reserved. 2 // Use of this source code is governed by a BSD-style license 3 // that can be found in the LICENSE file exposed on Github (readium) in the project repository. 4 5 package pack 6 7 import ( 8 "archive/zip" 9 "encoding/json" 10 "errors" 11 "io" 12 "log" 13 "net/url" 14 "os" 15 "text/template" 16 17 "github.com/readium/readium-lcp-server/rwpm" 18 ) 19 20 // RPFReader is a Readium Package reader 21 type RPFReader struct { 22 manifest rwpm.Publication 23 zipArchive *zip.ReadCloser 24 } 25 26 // RPFWriter is a Readium Package writer 27 type RPFWriter struct { 28 manifest rwpm.Publication 29 zipWriter *zip.Writer 30 } 31 32 // NopWriteCloser object 33 type NopWriteCloser struct { 34 io.Writer 35 } 36 37 // NewWriter returns a new PackageWriter writing a RPF file to the output file 38 func (reader *RPFReader) NewWriter(writer io.Writer) (PackageWriter, error) { 39 40 zipWriter := zip.NewWriter(writer) 41 42 files := map[string]*zip.File{} 43 for _, file := range reader.zipArchive.File { 44 files[file.Name] = file 45 } 46 47 // copy immediately the W3C manifest if it exists in the source package 48 if w3cmanFile, ok := files[W3CManifestName]; ok { 49 fw, err := zipWriter.Create(W3CManifestName) 50 if err != nil { 51 return nil, err 52 } 53 file, err := w3cmanFile.Open() 54 if err != nil { 55 return nil, err 56 } 57 _, err = io.Copy(fw, file) 58 if err != nil { 59 return nil, err 60 } 61 file.Close() 62 } 63 64 // copy immediately all ancilliary resources from the source manifest 65 // as they will not be encrypted in the current implementation 66 // FIXME: work on the encryption of ancilliary resources (except the W3C Entry Page?). 67 for _, manifestResource := range reader.manifest.Resources { 68 sourceFile := files[manifestResource.Href] 69 fw, err := zipWriter.Create(sourceFile.Name) 70 if err != nil { 71 return nil, err 72 } 73 file, err := sourceFile.Open() 74 if err != nil { 75 return nil, err 76 } 77 _, err = io.Copy(fw, file) 78 if err != nil { 79 return nil, err 80 } 81 file.Close() 82 } 83 84 // copy immediately all linked resources, except the manifest itself (self link), 85 // from the source manifest as they should not be encrypted. 86 for _, manifestLink := range reader.manifest.Links { 87 if manifestLink.Href == ManifestLocation { 88 continue 89 } 90 isSelf := false 91 for _, rel := range manifestLink.Rel { 92 if rel == "self" { 93 isSelf = true 94 continue 95 } 96 } 97 if isSelf { 98 continue 99 } 100 sourceFile := files[manifestLink.Href] 101 if sourceFile == nil { 102 continue 103 } 104 fw, err := zipWriter.Create(sourceFile.Name) 105 if err != nil { 106 return nil, err 107 } 108 file, err := sourceFile.Open() 109 if err != nil { 110 return nil, err 111 } 112 _, err = io.Copy(fw, file) 113 if err != nil { 114 return nil, err 115 } 116 file.Close() 117 } 118 119 manifest := reader.manifest 120 121 return &RPFWriter{ 122 zipWriter: zipWriter, 123 manifest: manifest, 124 }, nil 125 } 126 127 // Resources returns a list of all resources which may be encrypted 128 // It is part of the PackageReader interface. 129 // Note: the current design choice is to leave ancillary resources (in "resources" and "alternates") unencrypted 130 // FIXME: add "resources" and "alternates" to the slice 131 func (reader *RPFReader) Resources() []Resource { 132 // index files by name to avoid multiple linear searches 133 files := map[string]*zip.File{} 134 for _, file := range reader.zipArchive.File { 135 files[file.Name] = file 136 } 137 138 // list files from the reading order; keep their type and encryption status 139 var resources []Resource 140 for _, manifestResource := range reader.manifest.ReadingOrder { 141 isEncrypted := manifestResource.Properties != nil && manifestResource.Properties.Encrypted != nil 142 name, err := url.QueryUnescape(manifestResource.Href) 143 if err != nil { 144 log.Printf("Error unescaping %s in manifest", manifestResource.Href) 145 } 146 if files[name] != nil { 147 resources = append(resources, &rwpResource{file: files[name], isEncrypted: isEncrypted, contentType: manifestResource.Type}) 148 } else { 149 log.Printf("No file found in the archive for href %s in manifest", manifestResource.Href) 150 } 151 } 152 153 return resources 154 } 155 156 func (reader *RPFReader) Close() error { 157 return reader.zipArchive.Close() 158 } 159 160 type rwpResource struct { 161 isEncrypted bool 162 contentType string 163 file *zip.File 164 } 165 166 // rwpResource supports the Resource interface 167 func (resource *rwpResource) Path() string { return resource.file.Name } 168 func (resource *rwpResource) ContentType() string { return resource.contentType } 169 func (resource *rwpResource) Size() int64 { return int64(resource.file.UncompressedSize64) } 170 func (resource *rwpResource) Encrypted() bool { return resource.isEncrypted } 171 func (resource *rwpResource) Open() (io.ReadCloser, error) { return resource.file.Open() } 172 func (resource *rwpResource) CompressBeforeEncryption() bool { return false } 173 func (resource *rwpResource) CanBeEncrypted() bool { return true } 174 175 func (resource *rwpResource) CopyTo(packageWriter PackageWriter) error { 176 177 wc, err := packageWriter.NewFile(resource.Path(), resource.contentType, resource.file.Method) 178 if err != nil { 179 return err 180 } 181 182 rc, err := resource.file.Open() 183 if err != nil { 184 return err 185 } 186 defer rc.Close() 187 188 _, err = io.Copy(wc, rc) 189 190 rCloseError := rc.Close() 191 wCloseError := wc.Close() 192 193 if err != nil { 194 return err 195 } 196 197 if rCloseError != nil { 198 return rCloseError 199 } 200 201 return wCloseError 202 } 203 204 // Close closes a NopWriteCloser 205 func (nc *NopWriteCloser) Close() error { 206 return nil 207 } 208 209 // NewFile creates a header in the zip archive and adds an entry to the writer reading order if missing. 210 // This function is called in two main cases: 211 // - one is the creation of a Readium Package for a PDF file (no existing entry in the manifest) 212 // - another in the encryption of an existing Readium Package (there is already an entry in the manifest) 213 // FIXME: the PackageWriter interface is obscure; let's make it better. 214 func (writer *RPFWriter) NewFile(path string, contentType string, storageMethod uint16) (io.WriteCloser, error) { 215 216 w, err := writer.zipWriter.CreateHeader(&zip.FileHeader{ 217 Name: path, 218 Method: storageMethod, 219 }) 220 221 // add an entry to the writer reading order if missing 222 found := false 223 for _, resource := range writer.manifest.ReadingOrder { 224 if path == resource.Href { 225 found = true 226 break 227 } 228 } 229 if !found { 230 writer.manifest.ReadingOrder = append(writer.manifest.ReadingOrder, rwpm.Link{Href: path, Type: contentType}) 231 } 232 233 return &NopWriteCloser{w}, err 234 } 235 236 // MarkAsEncrypted marks a resource as encrypted (with an algorithm), in the writer manifest 237 // FIXME: currently only looks into the reading order. Add "alternates", think about adding "resources" 238 // FIXME: process resources which are compressed before encryption -> add Compression and OriginalLength properties in this case 239 func (writer *RPFWriter) MarkAsEncrypted(path string, originalSize int64, algorithm string) { 240 241 for i, resource := range writer.manifest.ReadingOrder { 242 if path == resource.Href { 243 // add encryption properties 244 if resource.Properties == nil { 245 writer.manifest.ReadingOrder[i].Properties = new(rwpm.Properties) 246 } 247 writer.manifest.ReadingOrder[i].Properties.Encrypted = &rwpm.Encrypted{ 248 Scheme: "http://readium.org/2014/01/lcp", 249 // profile data is not useful and even misleading: the same encryption algorithm applies to basic and 1.0 profiles. 250 //Profile: profile.String(), 251 Algorithm: algorithm, 252 } 253 254 break 255 } 256 } 257 } 258 259 // ManifestLocation is the path if the Readium manifest in a package 260 const ManifestLocation = "manifest.json" 261 262 func (writer *RPFWriter) writeManifest() error { 263 w, err := writer.zipWriter.Create(ManifestLocation) 264 if err != nil { 265 return err 266 } 267 268 encoder := json.NewEncoder(w) 269 return encoder.Encode(writer.manifest) 270 } 271 272 // Close closes a Readium Package Writer 273 // Writes the updated manifest in the zip archive. 274 func (writer *RPFWriter) Close() error { 275 err := writer.writeManifest() 276 if err != nil { 277 return err 278 } 279 280 return writer.zipWriter.Close() 281 } 282 283 // OpenRPF opens a Readium Package and returns a zip reader + a manifest 284 func OpenRPF(name string) (*RPFReader, error) { 285 286 zipArchive, err := zip.OpenReader(name) 287 if err != nil { 288 return nil, err 289 } 290 291 // find and parse the manifest 292 var manifest rwpm.Publication 293 var found bool 294 for _, file := range zipArchive.File { 295 if file.Name == ManifestLocation { 296 found = true 297 298 fileReader, err := file.Open() 299 if err != nil { 300 return nil, err 301 } 302 decoder := json.NewDecoder(fileReader) 303 304 err = decoder.Decode(&manifest) 305 fileReader.Close() 306 if err != nil { 307 return nil, err 308 } 309 break 310 } 311 } 312 313 if !found { 314 return nil, errors.New("could not find manifest") 315 } 316 317 return &RPFReader{zipArchive: zipArchive, manifest: manifest}, nil 318 } 319 320 // BuildRPFFromPDF builds a Readium Package (rwpp) which embeds a PDF file 321 func BuildRPFFromPDF(title string, inputPath string, outputPath string) error { 322 323 // create the rwpp 324 f, err := os.Create(outputPath) 325 if err != nil { 326 return err 327 } 328 defer f.Close() 329 330 // copy the content of the pdf input file into the zip output, as 'publication.pdf'. 331 // the pdf content is stored compressed so that the encryption performance on Windows is better (!). 332 zipWriter := zip.NewWriter(f) 333 writer, err := zipWriter.CreateHeader(&zip.FileHeader{ 334 Name: "publication.pdf", 335 Method: zip.Deflate, 336 }) 337 if err != nil { 338 return err 339 } 340 inputFile, err := os.Open(inputPath) 341 if err != nil { 342 zipWriter.Close() 343 return err 344 } 345 defer inputFile.Close() 346 347 _, err = io.Copy(writer, inputFile) 348 if err != nil { 349 zipWriter.Close() 350 return err 351 } 352 353 // inject a Readium manifest into the zip output 354 manifest := ` 355 { 356 "@context": [ 357 "https://readium.org/webpub-manifest/context.jsonld" 358 ], 359 "metadata": { 360 "title": "{{.Title}}" 361 }, 362 "readingOrder": [ 363 { 364 "href": "publication.pdf", 365 "type": "application/pdf" 366 } 367 ] 368 } 369 ` 370 371 manifestWriter, err := zipWriter.Create(ManifestLocation) 372 if err != nil { 373 return err 374 } 375 376 tmpl, err := template.New("manifest").Parse(manifest) 377 if err != nil { 378 zipWriter.Close() 379 return err 380 } 381 382 err = tmpl.Execute(manifestWriter, struct{ Title string }{title}) 383 if err != nil { 384 zipWriter.Close() 385 return err 386 } 387 388 return zipWriter.Close() 389 }