github.com/soulteary/pocket-bookcase@v0.0.0-20240428065142-0b5a9a0fc98a/internal/core/processing.go (about) 1 package core 2 3 import ( 4 "bytes" 5 "fmt" 6 "image" 7 "image/color" 8 "image/draw" 9 "image/jpeg" 10 "io" 11 "log" 12 "math" 13 "net/url" 14 "os" 15 fp "path/filepath" 16 "strconv" 17 "strings" 18 19 "github.com/disintegration/imaging" 20 "github.com/go-shiori/go-readability" 21 "github.com/go-shiori/warc" 22 "github.com/pkg/errors" 23 "github.com/soulteary/pocket-bookcase/internal/dependencies" 24 "github.com/soulteary/pocket-bookcase/internal/model" 25 _ "golang.org/x/image/webp" 26 27 // Add support for png 28 _ "image/png" 29 ) 30 31 // ProcessRequest is the request for processing bookmark. 32 type ProcessRequest struct { 33 DataDir string 34 Bookmark model.BookmarkDTO 35 Content io.Reader 36 ContentType string 37 KeepTitle bool 38 KeepExcerpt bool 39 LogArchival bool 40 } 41 42 var ErrNoSupportedImageType = errors.New("unsupported image type") 43 44 // ProcessBookmark process the bookmark and archive it if needed. 45 // Return three values, is error fatal, and error value. 46 func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book model.BookmarkDTO, isFatalErr bool, err error) { 47 book = req.Bookmark 48 contentType := req.ContentType 49 50 // Make sure bookmark ID is defined 51 if book.ID == 0 { 52 return book, true, fmt.Errorf("bookmark ID is not valid") 53 } 54 55 // Split bookmark content so it can be processed several times 56 archivalInput := bytes.NewBuffer(nil) 57 readabilityInput := bytes.NewBuffer(nil) 58 readabilityCheckInput := bytes.NewBuffer(nil) 59 60 var multiWriter io.Writer 61 if !strings.Contains(contentType, "text/html") { 62 multiWriter = io.MultiWriter(archivalInput) 63 } else { 64 multiWriter = io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput) 65 } 66 67 _, err = io.Copy(multiWriter, req.Content) 68 if err != nil { 69 return book, false, fmt.Errorf("failed to process article: %v", err) 70 } 71 72 // If this is HTML, parse for readable content 73 strID := strconv.Itoa(book.ID) 74 imgPath := model.GetThumbnailPath(&book) 75 var imageURLs []string 76 if strings.Contains(contentType, "text/html") { 77 isReadable := readability.Check(readabilityCheckInput) 78 79 nurl, err := url.Parse(book.URL) 80 if err != nil { 81 return book, true, fmt.Errorf("failed to parse url: %v", err) 82 } 83 84 article, err := readability.FromReader(readabilityInput, nurl) 85 if err != nil { 86 return book, false, fmt.Errorf("failed to parse article: %v", err) 87 } 88 89 book.Author = article.Byline 90 book.Content = article.TextContent 91 book.HTML = article.Content 92 93 // If title and excerpt doesnt have submitted value, use from article 94 if !req.KeepTitle || book.Title == "" { 95 book.Title = article.Title 96 } 97 98 if !req.KeepExcerpt || book.Excerpt == "" { 99 book.Excerpt = article.Excerpt 100 } 101 102 // Sometimes article doesn't have any title, so make sure it is not empty 103 if book.Title == "" { 104 book.Title = book.URL 105 } 106 107 // Get image URL 108 if article.Image != "" { 109 imageURLs = append(imageURLs, article.Image) 110 } else { 111 deps.Domains.Storage.FS().Remove(imgPath) 112 } 113 114 if article.Favicon != "" { 115 imageURLs = append(imageURLs, article.Favicon) 116 } 117 118 if !isReadable { 119 book.Content = "" 120 } 121 122 book.HasContent = book.Content != "" 123 } 124 125 // Save article image to local disk 126 for i, imageURL := range imageURLs { 127 err = DownloadBookImage(deps, imageURL, imgPath) 128 if err != nil && errors.Is(err, ErrNoSupportedImageType) { 129 log.Printf("%s: %s", err, imageURL) 130 if i == len(imageURLs)-1 { 131 deps.Domains.Storage.FS().Remove(imgPath) 132 } 133 } 134 if err != nil { 135 log.Printf("File download not successful for image URL: %s", imageURL) 136 continue 137 } 138 if err == nil { 139 book.ImageURL = fp.Join("/", "bookmark", strID, "thumb") 140 break 141 } 142 } 143 144 // If needed, create ebook as well 145 if book.CreateEbook { 146 ebookPath := model.GetEbookPath(&book) 147 req.Bookmark = book 148 149 if strings.Contains(contentType, "application/pdf") { 150 return book, false, errors.Wrap(err, "can't create ebook from pdf") 151 } else { 152 _, err = GenerateEbook(deps, req, ebookPath) 153 if err != nil { 154 return book, true, errors.Wrap(err, "failed to create ebook") 155 } 156 book.HasEbook = true 157 } 158 } 159 160 // If needed, create offline archive as well 161 if book.CreateArchive { 162 tmpFile, err := os.CreateTemp("", "archive") 163 if err != nil { 164 return book, false, fmt.Errorf("failed to create temp archive: %v", err) 165 } 166 defer deps.Domains.Storage.FS().Remove(tmpFile.Name()) 167 168 archivalRequest := warc.ArchivalRequest{ 169 URL: book.URL, 170 Reader: archivalInput, 171 ContentType: contentType, 172 UserAgent: userAgent, 173 LogEnabled: req.LogArchival, 174 } 175 176 err = warc.NewArchive(archivalRequest, tmpFile.Name()) 177 if err != nil { 178 defer os.Remove(tmpFile.Name()) 179 return book, false, fmt.Errorf("failed to create archive: %v", err) 180 } 181 182 dstPath := model.GetArchivePath(&book) 183 err = deps.Domains.Storage.WriteFile(dstPath, tmpFile) 184 if err != nil { 185 return book, false, fmt.Errorf("failed move archive to destination `: %v", err) 186 } 187 188 book.HasArchive = true 189 } 190 191 return book, false, nil 192 } 193 194 func DownloadBookImage(deps *dependencies.Dependencies, url, dstPath string) error { 195 // Fetch data from URL 196 resp, err := httpClient.Get(url) 197 if err != nil { 198 return err 199 } 200 defer resp.Body.Close() 201 202 // Make sure it's JPG or PNG image 203 cp := resp.Header.Get("Content-Type") 204 if !strings.Contains(cp, "image/jpeg") && 205 !strings.Contains(cp, "image/pjpeg") && 206 !strings.Contains(cp, "image/jpg") && 207 !strings.Contains(cp, "image/webp") && 208 !strings.Contains(cp, "image/png") { 209 return ErrNoSupportedImageType 210 } 211 212 // At this point, the download has finished successfully. 213 // Create tmpFile 214 tmpFile, err := os.CreateTemp("", "image") 215 if err != nil { 216 return fmt.Errorf("failed to create temporary image file: %v", err) 217 } 218 defer os.Remove(tmpFile.Name()) 219 220 // Parse image and process it. 221 // If image is smaller than 600x400 or its ratio is less than 4:3, resize. 222 // Else, save it as it is. 223 img, _, err := image.Decode(resp.Body) 224 if err != nil { 225 return fmt.Errorf("failed to parse image %s: %v", url, err) 226 } 227 228 imgRect := img.Bounds() 229 imgWidth := imgRect.Dx() 230 imgHeight := imgRect.Dy() 231 imgRatio := float64(imgWidth) / float64(imgHeight) 232 233 if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 { 234 err = jpeg.Encode(tmpFile, img, nil) 235 } else { 236 // Create background 237 bg := image.NewNRGBA(imgRect) 238 draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src) 239 draw.Draw(bg, imgRect, img, image.Point{}, draw.Over) 240 241 bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos) 242 bg = imaging.Blur(bg, 150) 243 bg = imaging.AdjustBrightness(bg, 30) 244 245 // Create foreground 246 fg := imaging.Fit(img, 600, 400, imaging.Lanczos) 247 248 // Merge foreground and background 249 bgRect := bg.Bounds() 250 fgRect := fg.Bounds() 251 fgPosition := image.Point{ 252 X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)), 253 Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)), 254 } 255 256 draw.Draw(bg, bgRect, fg, fgPosition, draw.Over) 257 258 // Save to file 259 err = jpeg.Encode(tmpFile, bg, nil) 260 } 261 262 if err != nil { 263 return fmt.Errorf("failed to save image %s: %v", url, err) 264 } 265 266 err = deps.Domains.Storage.WriteFile(dstPath, tmpFile) 267 if err != nil { 268 return err 269 } 270 271 return nil 272 }