github.com/readium/readium-lcp-server@v0.0.0-20240101192032-6e95190e99f1/epub/reader.go (about) 1 // Copyright 2019 European Digital Reading Lab. All rights reserved. 2 // Licensed to the Readium Foundation under one or more contributor license agreements. 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the LICENSE file exposed on Github (readium) in the project repository. 5 6 package epub 7 8 import ( 9 "archive/zip" 10 "encoding/xml" 11 "fmt" 12 "io" 13 "path/filepath" 14 "sort" 15 "strings" 16 17 "github.com/readium/readium-lcp-server/epub/opf" 18 "github.com/readium/readium-lcp-server/xmlenc" 19 "golang.org/x/net/html/charset" 20 ) 21 22 // root element of the opf 23 const ( 24 RootFileElement = "rootfile" 25 ) 26 27 type rootFile struct { 28 FullPath string `xml:"full-path,attr"` 29 MediaType string `xml:"media-type,attr"` 30 } 31 32 // findRootFiles looks for the epub root files 33 func findRootFiles(r io.Reader) ([]rootFile, error) { 34 xd := xml.NewDecoder(r) 35 // deal with non utf-8 xml files 36 xd.CharsetReader = charset.NewReaderLabel 37 var roots []rootFile 38 for x, err := xd.Token(); x != nil && err == nil; x, err = xd.Token() { 39 if err != nil { 40 return nil, err 41 } 42 switch x.(type) { 43 case xml.StartElement: 44 start := x.(xml.StartElement) 45 if start.Name.Local == RootFileElement { 46 var file rootFile 47 err = xd.DecodeElement(&file, &start) 48 if err != nil { 49 return nil, err 50 } 51 roots = append(roots, file) 52 } 53 } 54 } 55 56 return roots, nil 57 } 58 59 func (ep *Epub) addCleartextResource(name string) { 60 if ep.cleartextResources == nil { 61 ep.cleartextResources = []string{} 62 } 63 64 ep.cleartextResources = append(ep.cleartextResources, name) 65 } 66 67 // Read reads the opf file in the zip passed as a parameter, 68 // selects resources which mustn't be encrypted 69 // and returns an EPUB object 70 func Read(r *zip.Reader) (Epub, error) { 71 var ep Epub 72 container, err := findFileInZip(r, ContainerFile) 73 if err != nil { 74 return ep, err 75 } 76 fd, err := container.Open() 77 if err != nil { 78 return ep, err 79 } 80 defer fd.Close() 81 82 rootFiles, err := findRootFiles(fd) 83 if err != nil { 84 return ep, err 85 } 86 87 packages := make([]opf.Package, len(rootFiles)) 88 for i, rootFile := range rootFiles { 89 ep.addCleartextResource(rootFile.FullPath) 90 file, err := findFileInZip(r, rootFile.FullPath) 91 if err != nil { 92 return ep, err 93 } 94 packageFile, err := file.Open() 95 if err != nil { 96 return ep, err 97 } 98 defer packageFile.Close() 99 100 packages[i], err = opf.Parse(packageFile) 101 if err != nil { 102 fmt.Println("Error parsing the opf file") 103 return ep, err 104 } 105 packages[i].BasePath = filepath.Dir(rootFile.FullPath) 106 addCleartextResources(&ep, packages[i]) 107 } 108 109 var resources []*Resource 110 111 var encryption *xmlenc.Manifest 112 f, err := findFileInZip(r, EncryptionFile) 113 if err == nil { 114 r, err := f.Open() 115 if err != nil { 116 return Epub{}, err 117 } 118 defer r.Close() 119 m, err := xmlenc.Read(r) 120 encryption = &m 121 } 122 123 for _, file := range r.File { 124 125 // EPUBs do not require us to keep directory entries and we cannot process them 126 if file.FileInfo().IsDir() { 127 continue 128 } 129 130 if file.Name != EncryptionFile && 131 file.Name != "mimetype" { 132 rc, err := file.Open() 133 if err != nil { 134 return Epub{}, err 135 } 136 compressed := false 137 138 if encryption != nil { 139 if data, ok := encryption.DataForFile(file.Name); ok { 140 if data.Properties != nil { 141 for _, prop := range data.Properties.Properties { 142 if prop.Compression.Method == 8 { 143 compressed = true 144 break 145 } 146 } 147 } 148 } 149 } 150 151 resource := &Resource{Path: file.Name, Contents: rc, StorageMethod: file.Method, OriginalSize: file.FileHeader.UncompressedSize64, Compressed: compressed} 152 if item, ok := findResourceInPackages(resource, packages); ok { 153 resource.ContentType = item.MediaType 154 } 155 resources = append(resources, resource) 156 } 157 if strings.HasPrefix(file.Name, "META-INF") { 158 ep.addCleartextResource(file.Name) 159 } 160 } 161 162 ep.Package = packages 163 ep.Resource = resources 164 ep.Encryption = encryption 165 sort.Strings(ep.cleartextResources) 166 167 return ep, nil 168 } 169 170 // addCleartextResources searches for resources which must no be encrypted 171 // i.e. cover, nav and NCX 172 func addCleartextResources(ep *Epub, p opf.Package) { 173 coverImageID := "cover-image" 174 for _, meta := range p.Metadata.Metas { 175 if meta.Name == "cover" { 176 coverImageID = meta.Content 177 } 178 } 179 180 // Look for cover, nav and NCX items 181 for _, item := range p.Manifest.Items { 182 if strings.Contains(item.Properties, "cover-image") || 183 item.ID == coverImageID || 184 strings.Contains(item.Properties, "nav") || 185 item.MediaType == ContentType_NCX { 186 // re-construct a path, avoid insertion of backslashes as separator on Windows 187 path := filepath.ToSlash(filepath.Join(p.BasePath, item.Href)) 188 ep.addCleartextResource(path) 189 } 190 } 191 } 192 193 // findResourceInPackages returns an opf item which corresponds to 194 // the path of the resource given as parameter 195 func findResourceInPackages(r *Resource, packages []opf.Package) (opf.Item, bool) { 196 for _, p := range packages { 197 relative, err := filepath.Rel(p.BasePath, r.Path) 198 if err != nil { 199 return opf.Item{}, false 200 } 201 // avoid insertion of backslashes as separator on Windows 202 relative = filepath.ToSlash(relative) 203 204 if item, ok := p.Manifest.ItemWithPath(relative); ok { 205 return item, ok 206 } 207 } 208 209 return opf.Item{}, false 210 }