github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/pack/w3cpackage.go (about) 1 // Copyright 2020 Readium Foundation. All rights reserved. 2 // Use of this source code is governed by a BSD-style license 3 // that can be found in the LICENSE file exposed on Github (readium) in the project repository. 4 5 package pack 6 7 import ( 8 "archive/zip" 9 "crypto/rand" 10 "encoding/json" 11 "fmt" 12 "io" 13 "os" 14 "path/filepath" 15 "strings" 16 "time" 17 18 "github.com/readium/readium-lcp-server/rwpm" 19 "github.com/rickb777/date/period" 20 ) 21 22 // W3CManifestName is the name of the W3C manifest in an LPF package 23 const W3CManifestName = "publication.json" 24 25 // W3CEntryPageName is the name of the W3C entry page in an LPF package 26 const W3CEntryPageName = "index.html" 27 28 // RWPManifestName is the name of the Readium Manifest in a package 29 const RWPManifestName = "manifest.json" 30 31 // displayW3CMan displays a serialized W3C Manifest (debug purposes only) 32 /* 33 func displayW3CMan(w3cman rwpm.W3CPublication) error { 34 35 json, err := json.MarshalIndent(w3cman, "", " ") 36 if err != nil { 37 return err 38 } 39 fmt.Println(string(json)) 40 return nil 41 } 42 */ 43 44 // mapXlanglProperty maps a multilingual property (e.g. name) 45 // from a W3C manifest to a Readium manifest. 46 // Note: 'direction' cannot be mapped. 47 func mapXlanglProperty(w3clp rwpm.W3CMultiLanguage) (ml rwpm.MultiLanguage) { 48 49 ml = make(map[string]string) 50 for _, p := range w3clp { 51 ml[p.Language] = p.Value 52 } 53 return 54 } 55 56 // mapContributor maps a Contributors property (e.g. author) 57 // from a W3C manifest to a Readium manifest. 58 // Note: ID is mapped to Identifier 59 func mapContributor(w3cctors rwpm.W3CContributors) (ctors rwpm.Contributors) { 60 61 ctors = make(rwpm.Contributors, len(w3cctors)) 62 for i, c := range w3cctors { 63 ctors[i].Name = mapXlanglProperty(c.Name) 64 ctors[i].Identifier = c.ID 65 } 66 return 67 } 68 69 // getMediaType infers a media type from a file extension 70 // the media type is mandatory in the Readium manifest; 71 // if a media type is missing in the input link, 72 // try to infer it from the file extension. 73 // Note: a test on the magic number of the input file could be added. 74 func getMediaType(ext string) (mt string) { 75 76 switch ext { 77 case ".mp3": 78 mt = "audio/mpeg" 79 case ".aac": 80 mt = "audio/aac" 81 case ".opus": 82 mt = "audio/ogg" 83 case ".wav": 84 mt = "audio/wav" 85 case ".jpeg": 86 mt = "image/jpeg" 87 case ".jpg": 88 mt = "image/jpeg" 89 case ".png": 90 mt = "image/png" 91 case ".gif": 92 mt = "image/gif" 93 case ".webp": 94 mt = "image/webp" 95 case ".json": 96 mt = "application/json" 97 case ".html": 98 mt = "text/html" 99 case ".css": 100 mt = "text/css" 101 case ".js": 102 mt = "application/javascript" 103 case ".epub": 104 mt = "application/epub+zip" 105 case ".pdf": 106 mt = "application/pdf" 107 } 108 return 109 } 110 111 // mapLinks copies a collection of links (reading order, resources ...) 112 // from a W3C manifest to a Readium manifest 113 func mapLinks(w3clinks []rwpm.W3CLink) (rwpmLinks []rwpm.Link) { 114 115 for _, w3cl := range w3clinks { 116 var rwpml rwpm.Link 117 rwpml.Href = w3cl.URL 118 if w3cl.EncodingFormat != "" { 119 rwpml.Type = w3cl.EncodingFormat 120 } else { 121 rwpml.Type = getMediaType(filepath.Ext(w3cl.URL)) 122 } 123 rwpml.Rel = w3cl.Rel 124 // a multilingual name is lost during mapping 125 if w3cl.Name != nil { 126 rwpml.Title = w3cl.Name.Text() 127 } 128 rwpml.Duration, _ = isoDurationToSc(w3cl.Duration) 129 130 rwpml.Alternate = mapLinks(w3cl.Alternate) 131 132 rwpmLinks = append(rwpmLinks, rwpml) 133 } 134 return 135 } 136 137 // generateRWPManifest generates a json Readium manifest (as []byte) out of a W3C Manifest 138 func generateRWPManifest(w3cman rwpm.W3CPublication) (manifest rwpm.Publication) { 139 140 // debug 141 //displayW3CMan(w3cman) 142 143 manifest.Context = []string{"https://readium.org/webpub-manifest/context.jsonld"} 144 145 if w3cman.ConformsTo == "https://www.w3.org/TR/audiobooks/" { 146 manifest.Metadata.Type = "https://schema.org/Audiobook" 147 manifest.Metadata.ConformsTo = "https://readium.org/webpub-manifest/profiles/audiobook" 148 } else { 149 manifest.Metadata.Type = "https://schema.org/CreativeWork" 150 } 151 152 var identifier string 153 if w3cman.ID != "" { 154 identifier = w3cman.ID 155 } else if w3cman.URL != "" { 156 identifier = w3cman.URL 157 } else { 158 identifier, _ = newUUID() 159 } 160 manifest.Metadata.Identifier = identifier 161 manifest.Metadata.Title = mapXlanglProperty(w3cman.Name) 162 manifest.Metadata.Description = w3cman.Description 163 manifest.Metadata.Subject = w3cman.Subject 164 manifest.Metadata.Language = w3cman.InLanguage 165 // W3C manifest: published and modified are date-or-datetime, 166 // Readium manifest: published is a date; modified is a datetime 167 // The use of pointer helps dealing with nil values 168 if w3cman.DatePublished != nil { 169 published := rwpm.Date(time.Time(*w3cman.DatePublished)) 170 manifest.Metadata.Published = &published 171 } 172 if w3cman.DateModified != nil { 173 modified := time.Time(*w3cman.DateModified) 174 manifest.Metadata.Modified = &modified 175 } 176 manifest.Metadata.Duration, _ = isoDurationToSc(w3cman.Duration) 177 manifest.Metadata.ReadingProgression = w3cman.ReadingProgression 178 179 manifest.Metadata.Publisher = mapContributor(w3cman.Publisher) 180 manifest.Metadata.Artist = mapContributor(w3cman.Artist) 181 manifest.Metadata.Author = mapContributor(w3cman.Author) 182 manifest.Metadata.Colorist = mapContributor(w3cman.Colorist) 183 manifest.Metadata.Contributor = mapContributor(w3cman.Contributor) 184 manifest.Metadata.Editor = mapContributor(w3cman.Editor) 185 manifest.Metadata.Illustrator = mapContributor(w3cman.Illustrator) 186 manifest.Metadata.Inker = mapContributor(w3cman.Inker) 187 manifest.Metadata.Letterer = mapContributor(w3cman.Letterer) 188 manifest.Metadata.Penciler = mapContributor(w3cman.Penciler) 189 manifest.Metadata.Narrator = mapContributor(w3cman.ReadBy) 190 manifest.Metadata.Translator = mapContributor(w3cman.Translator) 191 192 manifest.Links = mapLinks(w3cman.Links) 193 manifest.ReadingOrder = mapLinks(w3cman.ReadingOrder) 194 manifest.Resources = mapLinks(w3cman.Resources) 195 196 // FIXME: add to the Readium manifest the ToC from index.html 197 198 return 199 } 200 201 // BuildRPFFromLPF builds a Readium package (rwpp) from a W3C LPF file (lpfPath) 202 func BuildRPFFromLPF(lpfPath string, rwppPath string) error { 203 204 // open the lpf file 205 lpfFile, err := zip.OpenReader(lpfPath) 206 if err != nil { 207 return err 208 } 209 defer lpfFile.Close() 210 211 // extract the W3C manifest from the LPF 212 var w3cManifest rwpm.W3CPublication 213 found := false 214 for _, file := range lpfFile.File { 215 if file.Name == W3CManifestName { 216 m, err := file.Open() 217 if err != nil { 218 return err 219 } 220 defer m.Close() 221 decoder := json.NewDecoder(m) 222 err = decoder.Decode(&w3cManifest) 223 if err != nil { 224 return err 225 } 226 found = true 227 break 228 } 229 } 230 // return an error if the W3C manifest is missing 231 if !found { 232 return fmt.Errorf("W3C LPF %s: missing publication.json", lpfPath) 233 } 234 235 // extract the primary entry page from the LPF 236 // FIXME: extract the primary entry page from the LPF 237 238 // generate a Readium manifest out of the W3C manifest 239 // and primary entry page 240 rwpManifest := generateRWPManifest(w3cManifest) 241 242 // marshal the Readium manifest 243 rwpJSON, err := json.MarshalIndent(rwpManifest, "", " ") 244 if err != nil { 245 return err 246 } 247 // debug 248 //println(string(rwpJSON)) 249 250 // create the rwpp file 251 rwppFile, err := os.Create(rwppPath) 252 if err != nil { 253 return err 254 } 255 256 defer rwppFile.Close() 257 258 // create a zip writer on the rwpp 259 zipWriter := zip.NewWriter(rwppFile) 260 defer zipWriter.Close() 261 262 // Add the Readium manifest to the rwpp 263 man, err := zipWriter.Create(RWPManifestName) 264 if err != nil { 265 return err 266 } 267 _, err = man.Write(rwpJSON) 268 if err != nil { 269 return err 270 } 271 272 // Append every lpf resource to the rwpp 273 for _, file := range lpfFile.File { 274 // filter MacOS specific files (present if a standard zipper has been used) 275 runes := []rune(file.Name) 276 if string(runes[:8]) == "__MACOSX" { 277 continue 278 } 279 // keep the original compression value (store vs deflate) 280 writer, err := zipWriter.CreateHeader(&file.FileHeader) 281 if err != nil { 282 return err 283 } 284 reader, err := file.Open() 285 if err != nil { 286 return err 287 } 288 defer reader.Close() 289 _, err = io.Copy(writer, reader) 290 if err != nil { 291 return err 292 } 293 } 294 return nil 295 } 296 297 // newUUID generates a random UUID according to RFC 4122 298 // note: this small function is copied from license.go 299 func newUUID() (string, error) { 300 uuid := make([]byte, 16) 301 n, err := io.ReadFull(rand.Reader, uuid) 302 if n != len(uuid) || err != nil { 303 return "", err 304 } 305 // variant bits; see section 4.1.1 306 uuid[8] = uuid[8]&^0xc0 | 0x80 307 // version 4 (pseudo-random); see section 4.1.3 308 uuid[6] = uuid[6]&^0xf0 | 0x40 309 return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:]), nil 310 } 311 312 // isoDurationToSc transforms an ISO duration to a number of seconds, as a float 313 func isoDurationToSc(iso string) (seconds float32, err error) { 314 period, err := period.Parse(iso) 315 seconds = float32(period.Hours()*3600 + period.Minutes()*60 + period.Seconds()) 316 return 317 } 318 319 // ------------------------- unused 320 321 // UnzipToFolder fills a folder (dest) with the content of a zip file (src) 322 // returns an array of unzipped file names 323 func UnzipToFolder(src string, dest string) ([]string, error) { 324 325 var filepaths []string 326 327 r, err := zip.OpenReader(src) 328 if err != nil { 329 return filepaths, err 330 } 331 defer r.Close() 332 333 for _, f := range r.File { 334 335 // store filename/path for returning and using later on 336 fpath := filepath.Join(dest, f.Name) 337 338 // check for ZipSlip. More Info: http://bit.ly/2MsjAWE 339 if !strings.HasPrefix(fpath, filepath.Clean(dest)+string(os.PathSeparator)) { 340 return filepaths, fmt.Errorf("%s: illegal file path", fpath) 341 } 342 343 filepaths = append(filepaths, fpath) 344 345 if f.FileInfo().IsDir() { 346 // make Folder 347 os.MkdirAll(fpath, os.ModePerm) 348 continue 349 } 350 351 // make File 352 if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil { 353 return filepaths, err 354 } 355 356 outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) 357 if err != nil { 358 return filepaths, err 359 } 360 361 rc, err := f.Open() 362 if err != nil { 363 return filepaths, err 364 } 365 366 _, err = io.Copy(outFile, rc) 367 368 // close the file without defer to close before next iteration of loop 369 outFile.Close() 370 rc.Close() 371 372 if err != nil { 373 return filepaths, err 374 } 375 } 376 return filepaths, nil 377 }