github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/encrypt/process_encrypt.go (about) 1 // Copyright 2021 Readium Foundation. All rights reserved. 2 // Use of this source code is governed by a BSD-style license 3 // that can be found in the LICENSE file exposed on Github (readium) in the project repository. 4 5 package encrypt 6 7 import ( 8 "archive/zip" 9 "crypto/sha256" 10 "encoding/hex" 11 "errors" 12 "fmt" 13 "io" 14 "log" 15 "net/http" 16 "net/url" 17 "os" 18 "path" 19 "path/filepath" 20 "strings" 21 22 "github.com/readium/readium-lcp-server/crypto" 23 "github.com/readium/readium-lcp-server/epub" 24 apilcp "github.com/readium/readium-lcp-server/lcpserver/api" 25 "github.com/readium/readium-lcp-server/pack" 26 uuid "github.com/satori/go.uuid" 27 ) 28 29 // Publication aggregates information during the process 30 type Publication struct { 31 UUID string 32 Title string 33 Date string 34 Description string 35 Language []string 36 Publisher []string 37 Author []string 38 Subject []string 39 CoverUrl string 40 StorageMode int 41 FileName string 42 EncryptionKey []byte 43 Location string 44 ContentType string 45 Size uint32 46 Checksum string 47 } 48 49 // ProcessEncryption encrypts a publication 50 // inputPath must contain a processable file extension (EPUB, PDF, LPF or RPF) 51 func ProcessEncryption(contentID, contentKey, inputPath, tempRepo, outputRepo, storageRepo, storageURL, storageFilename string, extractCover bool) (*Publication, error) { 52 53 if inputPath == "" { 54 return nil, errors.New("ProcessEncryption, parameter error") 55 } 56 57 var pub Publication 58 59 // if contentID is not set, generate a random UUID 60 if contentID == "" { 61 uid, err := uuid.NewV4() 62 if err != nil { 63 return nil, err 64 } 65 contentID = uid.String() 66 } 67 pub.UUID = contentID 68 69 // create a temp folder if declared, or use the current dir 70 if tempRepo != "" { 71 err := os.MkdirAll(tempRepo, os.ModePerm) 72 if err != nil && !os.IsExist(err) { 73 return nil, err 74 } 75 } else { 76 tempRepo, _ = os.Getwd() 77 } 78 79 // if the input file is stored on a remote server, fetch it and store it into a temp folder 80 tempPath, err := fetchInputFile(inputPath, tempRepo, contentID) 81 if err != nil { 82 return nil, err 83 } 84 deleteTemp := false 85 // if a temp file has been fetched, it will be deleted later 86 if tempPath != "" { 87 deleteTemp = true 88 inputPath = tempPath 89 } 90 91 // select a storage mode 92 pub.StorageMode = apilcp.Storage_none 93 // if the storage repo is set, set storage mode and output repository 94 // note: the -storage parameter takes precedence over -output 95 if storageRepo != "" { 96 // S3 storage is specified by the presence of "s3:" at the start of the -storage param 97 if strings.HasPrefix(storageRepo, "s3:") { 98 pub.StorageMode = apilcp.Storage_s3 99 outputRepo = tempRepo // before move to s3 100 // file system storage 101 } else { 102 pub.StorageMode = apilcp.Storage_fs 103 // create the storage folder when necessary 104 err := os.MkdirAll(storageRepo, os.ModePerm) 105 if err != nil && !os.IsExist(err) { 106 return nil, err 107 } 108 // the encrypted file will be directly generated inside the storage path 109 outputRepo = storageRepo 110 } 111 } 112 // if the output repo is still not set, use the temp directory. 113 if outputRepo == "" { 114 outputRepo = tempRepo 115 } 116 117 // set target file info 118 targetFileInfo(&pub, inputPath, storageFilename) 119 120 // set the target file name; use the content id by default 121 if storageFilename == "" { 122 storageFilename = pub.UUID 123 } 124 125 // set the output path 126 outputPath := filepath.Join(outputRepo, storageFilename) 127 fmt.Println("Output path:", outputPath) 128 129 // define an AES encrypter 130 encrypter := crypto.NewAESEncrypter_PUBLICATION_RESOURCES() 131 132 // select the encryption process from the input file extension 133 err = nil 134 135 inputExt := filepath.Ext(inputPath) 136 137 // the cover can be extracted if lcpencrypt stores the file and the file is an EPUB 138 if storageRepo == "" { 139 extractCover = false 140 } 141 142 switch inputExt { 143 case ".epub": 144 err = processEPUB(&pub, inputPath, outputPath, encrypter, contentKey, extractCover) 145 case ".pdf": 146 extractCover = false 147 err = processPDF(&pub, inputPath, outputPath, encrypter, contentKey) 148 case ".lpf": 149 extractCover = false 150 err = processLPF(&pub, inputPath, outputPath, encrypter, contentKey) 151 case ".audiobook", ".divina", ".webpub", ".rpf": 152 extractCover = false 153 err = processRPF(&pub, inputPath, outputPath, encrypter, contentKey) 154 default: 155 return nil, errors.New("unprocessable extension " + inputExt) 156 } 157 if err != nil { 158 return nil, err 159 } 160 161 if deleteTemp { 162 err = os.Remove(inputPath) 163 if err != nil { 164 return nil, err 165 } 166 } 167 168 // store the publication if required, and set pub.Location 169 switch pub.StorageMode { 170 // the license server will have to store the encrypted publication 171 // warning: the license server must have read access to the output repo. 172 case apilcp.Storage_none: 173 // location indicates to the license server the path to the encrypted publication 174 pub.Location = outputPath 175 // the encryption tools stores the encrypted publication in a file system 176 case apilcp.Storage_fs: 177 // location indicates the url of the publication 178 pub.Location, err = url.JoinPath(storageURL, storageFilename) 179 // the encryption tools stores the encrypted publication in an S3 storage 180 case apilcp.Storage_s3: 181 // store the encrypted file in its definitive S3 storage, delete the temp file 182 err = StoreS3Publication(outputPath, storageRepo, storageFilename) 183 if err != nil { 184 return nil, err 185 } 186 // location indicates the url of the publication on S3 187 pub.Location, err = url.JoinPath(storageURL, storageFilename) 188 } 189 if err != nil { 190 return nil, err 191 } 192 if extractCover { 193 coverExt := path.Ext(pub.CoverUrl) 194 pub.CoverUrl, _ = url.JoinPath(storageURL, storageFilename+coverExt) 195 } 196 197 return &pub, nil 198 } 199 200 // fetchInputFile fetches the input file from a remote server 201 func fetchInputFile(inputPath, tempRepo, contentID string) (string, error) { 202 203 if inputPath == "" || tempRepo == "" || contentID == "" { 204 return "", errors.New("fetchInputFile, parameter error") 205 } 206 207 url, err := url.Parse(inputPath) 208 if err != nil { 209 // this is not a valid URL 210 return "", nil 211 } 212 213 // no need to fetch the file, which is in a file system 214 if url.Scheme != "http" && url.Scheme != "https" && url.Scheme != "ftp" { 215 return "", nil 216 } 217 218 // the temp file has the same extension as the remote file 219 inputExt := filepath.Ext(inputPath) 220 tempPath := filepath.Join(tempRepo, contentID+inputExt) 221 // create the temp file 222 out, err := os.Create(tempPath) 223 if err != nil { 224 return "", err 225 } 226 defer out.Close() 227 228 // fetch the file 229 if url.Scheme == "http" || url.Scheme == "https" { 230 res, err := http.Get(inputPath) 231 if err != nil { 232 return "", err 233 } 234 defer res.Body.Close() 235 defer out.Close() 236 _, err = io.Copy(out, res.Body) 237 if err != nil { 238 return "", err 239 } 240 } else if url.Scheme == "ftp" { 241 // we'll use https://github.com/jlaffaye/ftp when requested 242 return "", errors.New("ftp not supported yet") 243 } 244 return tempPath, nil 245 } 246 247 // targetFileInfo sets the file name and content type 248 // which will be used during future downloads 249 func targetFileInfo(pub *Publication, inputPath, storageFilename string) error { 250 251 // if the storage filename was imposed, use it 252 if storageFilename != "" { 253 pub.FileName = storageFilename 254 } else { 255 // generate a filename from the input filename and a target extension 256 inputFile := filepath.Base(inputPath) 257 inputExt := filepath.Ext(inputPath) 258 fileNameNoExt := inputFile[:len(inputFile)-len(inputExt)] 259 260 var ext string 261 switch inputExt { 262 case ".epub": 263 ext = inputExt 264 case ".pdf": 265 ext = ".lcpdf" 266 case ".audiobook", ".rpf": 267 ext = ".lcpau" 268 case ".divina": 269 ext = ".lcpdi" 270 case ".lpf": 271 // short term solution. We'll need to inspect the W3C manifest and check conformsTo, 272 // to be certain this is an audiobook (vs another profile of Web Publication) 273 ext = ".lcpau" 274 case ".webpub": 275 // short term solution. We'll need to inspect the RWP manifest and check conformsTo, 276 // to be certain this package contains a pdf 277 ext = ".lcpdf" 278 } 279 pub.FileName = fileNameNoExt + ext 280 } 281 282 // find the target mime type 283 outputExt := filepath.Ext(pub.FileName) 284 switch outputExt { 285 case ".epub": 286 pub.ContentType = epub.ContentType_EPUB 287 case ".lcpdf": 288 pub.ContentType = "application/pdf+lcp" 289 case ".lcpau": 290 pub.ContentType = "application/audiobook+lcp" 291 case ".lcpdi": 292 pub.ContentType = "application/divina+lcp" 293 } 294 return nil 295 } 296 297 // checksum calculates the checksum of a file 298 func checksum(file *os.File) string { 299 300 hasher := sha256.New() 301 file.Seek(0, 0) 302 if _, err := io.Copy(hasher, file); err != nil { 303 return "" 304 } 305 return hex.EncodeToString(hasher.Sum(nil)) 306 } 307 308 // processEPUB encrypts resources in an EPUB 309 func processEPUB(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string, extractCover bool) error { 310 311 // create a zip reader from the input path 312 zr, err := zip.OpenReader(inputPath) 313 if err != nil { 314 return err 315 } 316 defer zr.Close() 317 318 // generate an EPUB object 319 epub, err := epub.Read(&zr.Reader) 320 if err != nil { 321 return err 322 } 323 324 // init metadata 325 pub.Title = epub.Package[0].Metadata.Title[0] 326 pub.Date = epub.Package[0].Metadata.Date 327 pub.Description = epub.Package[0].Metadata.Description 328 pub.Language = epub.Package[0].Metadata.Language 329 pub.Publisher = epub.Package[0].Metadata.Publisher 330 pub.Author = epub.Package[0].Metadata.Author 331 pub.Subject = epub.Package[0].Metadata.Subject 332 333 // look for the cover image 334 coverImageID := "cover-image" 335 for _, meta := range epub.Package[0].Metadata.Metas { 336 if meta.Name == "cover" { 337 coverImageID = meta.Content 338 } 339 } 340 var coverPath string 341 for _, item := range epub.Package[0].Manifest.Items { 342 if strings.Contains(item.Properties, "cover-image") || 343 item.ID == coverImageID { 344 // re-construct a path, avoid insertion of backslashes as separator on Windows 345 coverPath = filepath.ToSlash(filepath.Join(epub.Package[0].BasePath, item.Href)) 346 } 347 } 348 349 // create the output file 350 outputFile, err := os.Create(outputPath) 351 if err != nil { 352 return err 353 } 354 // will close the output file 355 defer outputFile.Close() 356 357 // encrypt the content of the publication, 358 // write into the output file 359 _, encryptionKey, err := pack.Do(encrypter, contentKey, epub, outputFile) 360 if err != nil { 361 return err 362 } 363 pub.EncryptionKey = encryptionKey 364 // calculate the output file size and checksum 365 stats, err := outputFile.Stat() 366 if err == nil && (stats.Size() > 0) { 367 filesize := stats.Size() 368 pub.Size = uint32(filesize) 369 cs := checksum(outputFile) 370 pub.Checksum = cs 371 } 372 if stats.Size() == 0 { 373 return errors.New("empty output file") 374 } 375 376 if extractCover { 377 // extract the cover image and store it at the target location 378 for _, f := range zr.File { 379 if f.Name == coverPath { 380 epubCover, err := f.Open() 381 if err != nil { 382 log.Printf("Error opening the cover in %s, %s", coverPath, err.Error()) 383 break // move out of the loop 384 } 385 defer epubCover.Close() 386 // create the output cover 387 coverExt := path.Ext(coverPath) 388 coverFile, err := os.Create(outputPath + coverExt) 389 if err != nil { 390 return err 391 } 392 defer coverFile.Close() 393 _, err = io.Copy(coverFile, epubCover) 394 if err != nil { 395 // we do not consider it as an error 396 log.Printf("Error copying cover data, %s", err.Error()) 397 } 398 // set temporarily, will be updated later 399 pub.CoverUrl = coverPath 400 break 401 } 402 } 403 } 404 405 return nil 406 } 407 408 // processPDF wraps a PDF file inside a Readium Package and encrypts its resources 409 func processPDF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error { 410 411 // generate a temp Readium Package (rwpp) which embeds the PDF file; its title is the PDF file name 412 tmpPackagePath := outputPath + ".tmp" 413 err := pack.BuildRPFFromPDF(filepath.Base(inputPath), inputPath, tmpPackagePath) 414 // will remove the tmp file even if an error is returned 415 defer os.Remove(tmpPackagePath) 416 // process error 417 if err != nil { 418 return err 419 } 420 421 // build an encrypted package 422 return buildEncryptedRPF(pub, tmpPackagePath, outputPath, encrypter, contentKey) 423 } 424 425 // processLPF transforms a W3C LPF file into a Readium Package and encrypts its resources 426 func processLPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error { 427 428 // generate a tmp Readium Package (rwpp) out of a W3C Package (lpf) 429 tmpPackagePath := outputPath + ".tmp" 430 err := pack.BuildRPFFromLPF(inputPath, tmpPackagePath) 431 // will remove the tmp file even if an error is returned 432 defer os.Remove(tmpPackagePath) 433 // process error 434 if err != nil { 435 return err 436 } 437 438 // build an encrypted package 439 return buildEncryptedRPF(pub, tmpPackagePath, outputPath, encrypter, contentKey) 440 } 441 442 // processRPF encrypts the source Readium Package 443 func processRPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error { 444 445 // build an encrypted package 446 return buildEncryptedRPF(pub, inputPath, outputPath, encrypter, contentKey) 447 } 448 449 // buildEncryptedRPF builds an encrypted Readium package out of an un-encrypted one 450 // FIXME: it cannot be used for EPUB as long as Do() and Process() are not merged 451 func buildEncryptedRPF(pub *Publication, inputPath string, outputPath string, encrypter crypto.Encrypter, contentKey string) error { 452 453 // create a reader on the un-encrypted readium package 454 reader, err := pack.OpenRPF(inputPath) 455 if err != nil { 456 return err 457 } 458 defer reader.Close() 459 // create the encrypted package file 460 outputFile, err := os.Create(outputPath) 461 if err != nil { 462 return err 463 } 464 defer outputFile.Close() 465 // create a writer on the encrypted package 466 writer, err := reader.NewWriter(outputFile) 467 if err != nil { 468 return err 469 } 470 // encrypt resources from the input package, return the encryption key 471 encryptionKey, err := pack.Process(encrypter, contentKey, reader, writer) 472 if err != nil { 473 return err 474 } 475 pub.EncryptionKey = encryptionKey 476 477 err = writer.Close() 478 if err != nil { 479 return err 480 } 481 482 // calculate the output file size and checksum 483 stats, err := outputFile.Stat() 484 if err == nil && (stats.Size() > 0) { 485 filesize := stats.Size() 486 pub.Size = uint32(filesize) 487 cs := checksum(outputFile) 488 pub.Checksum = cs 489 } 490 if stats.Size() == 0 { 491 return errors.New("empty output file") 492 } 493 return nil 494 }