github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/pack/w3cpackage.go (about)

     1  // Copyright 2020 Readium Foundation. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license
     3  // that can be found in the LICENSE file exposed on Github (readium) in the project repository.
     4  
     5  package pack
     6  
     7  import (
     8  	"archive/zip"
     9  	"crypto/rand"
    10  	"encoding/json"
    11  	"fmt"
    12  	"io"
    13  	"os"
    14  	"path/filepath"
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/readium/readium-lcp-server/rwpm"
    19  	"github.com/rickb777/date/period"
    20  )
    21  
    22  // W3CManifestName is the name of the W3C manifest in an LPF package
    23  const W3CManifestName = "publication.json"
    24  
    25  // W3CEntryPageName is the name of the W3C entry page in an LPF package
    26  const W3CEntryPageName = "index.html"
    27  
    28  // RWPManifestName is the name of the Readium Manifest in a package
    29  const RWPManifestName = "manifest.json"
    30  
    31  // displayW3CMan displays a serialized W3C Manifest (debug purposes only)
    32  /*
    33  func displayW3CMan(w3cman rwpm.W3CPublication) error {
    34  
    35  	json, err := json.MarshalIndent(w3cman, "", " ")
    36  	if err != nil {
    37  		return err
    38  	}
    39  	fmt.Println(string(json))
    40  	return nil
    41  }
    42  */
    43  
    44  // mapXlanglProperty maps a multilingual property (e.g. name)
    45  // from a W3C manifest to a Readium manifest.
    46  // Note: 'direction' cannot be mapped.
    47  func mapXlanglProperty(w3clp rwpm.W3CMultiLanguage) (ml rwpm.MultiLanguage) {
    48  
    49  	ml = make(map[string]string)
    50  	for _, p := range w3clp {
    51  		ml[p.Language] = p.Value
    52  	}
    53  	return
    54  }
    55  
    56  // mapContributor maps a Contributors property (e.g. author)
    57  // from a W3C manifest to a Readium manifest.
    58  // Note: ID is mapped to Identifier
    59  func mapContributor(w3cctors rwpm.W3CContributors) (ctors rwpm.Contributors) {
    60  
    61  	ctors = make(rwpm.Contributors, len(w3cctors))
    62  	for i, c := range w3cctors {
    63  		ctors[i].Name = mapXlanglProperty(c.Name)
    64  		ctors[i].Identifier = c.ID
    65  	}
    66  	return
    67  }
    68  
    69  // getMediaType infers a media type from a file extension
    70  // the media type is mandatory in the Readium manifest;
    71  // if a media type is missing in the input link,
    72  // try to infer it from the file extension.
    73  // Note: a test on the magic number of the input file could be added.
    74  func getMediaType(ext string) (mt string) {
    75  
    76  	switch ext {
    77  	case ".mp3":
    78  		mt = "audio/mpeg"
    79  	case ".aac":
    80  		mt = "audio/aac"
    81  	case ".opus":
    82  		mt = "audio/ogg"
    83  	case ".wav":
    84  		mt = "audio/wav"
    85  	case ".jpeg":
    86  		mt = "image/jpeg"
    87  	case ".jpg":
    88  		mt = "image/jpeg"
    89  	case ".png":
    90  		mt = "image/png"
    91  	case ".gif":
    92  		mt = "image/gif"
    93  	case ".webp":
    94  		mt = "image/webp"
    95  	case ".json":
    96  		mt = "application/json"
    97  	case ".html":
    98  		mt = "text/html"
    99  	case ".css":
   100  		mt = "text/css"
   101  	case ".js":
   102  		mt = "application/javascript"
   103  	case ".epub":
   104  		mt = "application/epub+zip"
   105  	case ".pdf":
   106  		mt = "application/pdf"
   107  	}
   108  	return
   109  }
   110  
   111  // mapLinks copies a collection of links (reading order, resources ...)
   112  // from a W3C manifest to a Readium manifest
   113  func mapLinks(w3clinks []rwpm.W3CLink) (rwpmLinks []rwpm.Link) {
   114  
   115  	for _, w3cl := range w3clinks {
   116  		var rwpml rwpm.Link
   117  		rwpml.Href = w3cl.URL
   118  		if w3cl.EncodingFormat != "" {
   119  			rwpml.Type = w3cl.EncodingFormat
   120  		} else {
   121  			rwpml.Type = getMediaType(filepath.Ext(w3cl.URL))
   122  		}
   123  		rwpml.Rel = w3cl.Rel
   124  		// a multilingual name is lost during mapping
   125  		if w3cl.Name != nil {
   126  			rwpml.Title = w3cl.Name.Text()
   127  		}
   128  		rwpml.Duration, _ = isoDurationToSc(w3cl.Duration)
   129  
   130  		rwpml.Alternate = mapLinks(w3cl.Alternate)
   131  
   132  		rwpmLinks = append(rwpmLinks, rwpml)
   133  	}
   134  	return
   135  }
   136  
   137  // generateRWPManifest generates a json Readium manifest (as []byte) out of a W3C Manifest
   138  func generateRWPManifest(w3cman rwpm.W3CPublication) (manifest rwpm.Publication) {
   139  
   140  	// debug
   141  	//displayW3CMan(w3cman)
   142  
   143  	manifest.Context = []string{"https://readium.org/webpub-manifest/context.jsonld"}
   144  
   145  	if w3cman.ConformsTo == "https://www.w3.org/TR/audiobooks/" {
   146  		manifest.Metadata.Type = "https://schema.org/Audiobook"
   147  		manifest.Metadata.ConformsTo = "https://readium.org/webpub-manifest/profiles/audiobook"
   148  	} else {
   149  		manifest.Metadata.Type = "https://schema.org/CreativeWork"
   150  	}
   151  
   152  	var identifier string
   153  	if w3cman.ID != "" {
   154  		identifier = w3cman.ID
   155  	} else if w3cman.URL != "" {
   156  		identifier = w3cman.URL
   157  	} else {
   158  		identifier, _ = newUUID()
   159  	}
   160  	manifest.Metadata.Identifier = identifier
   161  	manifest.Metadata.Title = mapXlanglProperty(w3cman.Name)
   162  	manifest.Metadata.Description = w3cman.Description
   163  	manifest.Metadata.Subject = w3cman.Subject
   164  	manifest.Metadata.Language = w3cman.InLanguage
   165  	// W3C manifest: published and modified are date-or-datetime,
   166  	// Readium manifest: published is a date; modified is a datetime
   167  	// The use of pointer helps dealing with nil values
   168  	if w3cman.DatePublished != nil {
   169  		published := rwpm.Date(time.Time(*w3cman.DatePublished))
   170  		manifest.Metadata.Published = &published
   171  	}
   172  	if w3cman.DateModified != nil {
   173  		modified := time.Time(*w3cman.DateModified)
   174  		manifest.Metadata.Modified = &modified
   175  	}
   176  	manifest.Metadata.Duration, _ = isoDurationToSc(w3cman.Duration)
   177  	manifest.Metadata.ReadingProgression = w3cman.ReadingProgression
   178  
   179  	manifest.Metadata.Publisher = mapContributor(w3cman.Publisher)
   180  	manifest.Metadata.Artist = mapContributor(w3cman.Artist)
   181  	manifest.Metadata.Author = mapContributor(w3cman.Author)
   182  	manifest.Metadata.Colorist = mapContributor(w3cman.Colorist)
   183  	manifest.Metadata.Contributor = mapContributor(w3cman.Contributor)
   184  	manifest.Metadata.Editor = mapContributor(w3cman.Editor)
   185  	manifest.Metadata.Illustrator = mapContributor(w3cman.Illustrator)
   186  	manifest.Metadata.Inker = mapContributor(w3cman.Inker)
   187  	manifest.Metadata.Letterer = mapContributor(w3cman.Letterer)
   188  	manifest.Metadata.Penciler = mapContributor(w3cman.Penciler)
   189  	manifest.Metadata.Narrator = mapContributor(w3cman.ReadBy)
   190  	manifest.Metadata.Translator = mapContributor(w3cman.Translator)
   191  
   192  	manifest.Links = mapLinks(w3cman.Links)
   193  	manifest.ReadingOrder = mapLinks(w3cman.ReadingOrder)
   194  	manifest.Resources = mapLinks(w3cman.Resources)
   195  
   196  	// FIXME: add to the Readium manifest the ToC from index.html
   197  
   198  	return
   199  }
   200  
   201  // BuildRPFFromLPF builds a Readium package (rwpp) from a W3C LPF file (lpfPath)
   202  func BuildRPFFromLPF(lpfPath string, rwppPath string) error {
   203  
   204  	// open the lpf file
   205  	lpfFile, err := zip.OpenReader(lpfPath)
   206  	if err != nil {
   207  		return err
   208  	}
   209  	defer lpfFile.Close()
   210  
   211  	// extract the W3C manifest from the LPF
   212  	var w3cManifest rwpm.W3CPublication
   213  	found := false
   214  	for _, file := range lpfFile.File {
   215  		if file.Name == W3CManifestName {
   216  			m, err := file.Open()
   217  			if err != nil {
   218  				return err
   219  			}
   220  			defer m.Close()
   221  			decoder := json.NewDecoder(m)
   222  			err = decoder.Decode(&w3cManifest)
   223  			if err != nil {
   224  				return err
   225  			}
   226  			found = true
   227  			break
   228  		}
   229  	}
   230  	// return an error if the W3C manifest is missing
   231  	if !found {
   232  		return fmt.Errorf("W3C LPF %s: missing publication.json", lpfPath)
   233  	}
   234  
   235  	// extract the primary entry page from the LPF
   236  	// FIXME: extract the primary entry page from the LPF
   237  
   238  	// generate a Readium manifest out of the W3C manifest
   239  	// and primary entry page
   240  	rwpManifest := generateRWPManifest(w3cManifest)
   241  
   242  	// marshal the Readium manifest
   243  	rwpJSON, err := json.MarshalIndent(rwpManifest, "", " ")
   244  	if err != nil {
   245  		return err
   246  	}
   247  	// debug
   248  	//println(string(rwpJSON))
   249  
   250  	// create the rwpp file
   251  	rwppFile, err := os.Create(rwppPath)
   252  	if err != nil {
   253  		return err
   254  	}
   255  
   256  	defer rwppFile.Close()
   257  
   258  	// create a zip writer on the rwpp
   259  	zipWriter := zip.NewWriter(rwppFile)
   260  	defer zipWriter.Close()
   261  
   262  	// Add the Readium manifest to the rwpp
   263  	man, err := zipWriter.Create(RWPManifestName)
   264  	if err != nil {
   265  		return err
   266  	}
   267  	_, err = man.Write(rwpJSON)
   268  	if err != nil {
   269  		return err
   270  	}
   271  
   272  	// Append every lpf resource to the rwpp
   273  	for _, file := range lpfFile.File {
   274  		// filter MacOS specific files (present if a standard zipper has been used)
   275  		runes := []rune(file.Name)
   276  		if string(runes[:8]) == "__MACOSX" {
   277  			continue
   278  		}
   279  		// keep the original compression value (store vs deflate)
   280  		writer, err := zipWriter.CreateHeader(&file.FileHeader)
   281  		if err != nil {
   282  			return err
   283  		}
   284  		reader, err := file.Open()
   285  		if err != nil {
   286  			return err
   287  		}
   288  		defer reader.Close()
   289  		_, err = io.Copy(writer, reader)
   290  		if err != nil {
   291  			return err
   292  		}
   293  	}
   294  	return nil
   295  }
   296  
   297  // newUUID generates a random UUID according to RFC 4122
   298  // note: this small function is copied from license.go
   299  func newUUID() (string, error) {
   300  	uuid := make([]byte, 16)
   301  	n, err := io.ReadFull(rand.Reader, uuid)
   302  	if n != len(uuid) || err != nil {
   303  		return "", err
   304  	}
   305  	// variant bits; see section 4.1.1
   306  	uuid[8] = uuid[8]&^0xc0 | 0x80
   307  	// version 4 (pseudo-random); see section 4.1.3
   308  	uuid[6] = uuid[6]&^0xf0 | 0x40
   309  	return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:]), nil
   310  }
   311  
   312  // isoDurationToSc transforms an ISO duration to a number of seconds, as a float
   313  func isoDurationToSc(iso string) (seconds float32, err error) {
   314  	period, err := period.Parse(iso)
   315  	seconds = float32(period.Hours()*3600 + period.Minutes()*60 + period.Seconds())
   316  	return
   317  }
   318  
   319  // ------------------------- unused
   320  
   321  // UnzipToFolder fills a folder (dest) with the content of a zip file (src)
   322  // returns an array of unzipped file names
   323  func UnzipToFolder(src string, dest string) ([]string, error) {
   324  
   325  	var filepaths []string
   326  
   327  	r, err := zip.OpenReader(src)
   328  	if err != nil {
   329  		return filepaths, err
   330  	}
   331  	defer r.Close()
   332  
   333  	for _, f := range r.File {
   334  
   335  		// store filename/path for returning and using later on
   336  		fpath := filepath.Join(dest, f.Name)
   337  
   338  		// check for ZipSlip. More Info: http://bit.ly/2MsjAWE
   339  		if !strings.HasPrefix(fpath, filepath.Clean(dest)+string(os.PathSeparator)) {
   340  			return filepaths, fmt.Errorf("%s: illegal file path", fpath)
   341  		}
   342  
   343  		filepaths = append(filepaths, fpath)
   344  
   345  		if f.FileInfo().IsDir() {
   346  			// make Folder
   347  			os.MkdirAll(fpath, os.ModePerm)
   348  			continue
   349  		}
   350  
   351  		// make File
   352  		if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil {
   353  			return filepaths, err
   354  		}
   355  
   356  		outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
   357  		if err != nil {
   358  			return filepaths, err
   359  		}
   360  
   361  		rc, err := f.Open()
   362  		if err != nil {
   363  			return filepaths, err
   364  		}
   365  
   366  		_, err = io.Copy(outFile, rc)
   367  
   368  		// close the file without defer to close before next iteration of loop
   369  		outFile.Close()
   370  		rc.Close()
   371  
   372  		if err != nil {
   373  			return filepaths, err
   374  		}
   375  	}
   376  	return filepaths, nil
   377  }