github.com/soulteary/pocket-bookcase@v0.0.0-20240428065142-0b5a9a0fc98a/internal/core/processing.go (about)

     1  package core
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"image"
     7  	"image/color"
     8  	"image/draw"
     9  	"image/jpeg"
    10  	"io"
    11  	"log"
    12  	"math"
    13  	"net/url"
    14  	"os"
    15  	fp "path/filepath"
    16  	"strconv"
    17  	"strings"
    18  
    19  	"github.com/disintegration/imaging"
    20  	"github.com/go-shiori/go-readability"
    21  	"github.com/go-shiori/warc"
    22  	"github.com/pkg/errors"
    23  	"github.com/soulteary/pocket-bookcase/internal/dependencies"
    24  	"github.com/soulteary/pocket-bookcase/internal/model"
    25  	_ "golang.org/x/image/webp"
    26  
    27  	// Add support for png
    28  	_ "image/png"
    29  )
    30  
    31  // ProcessRequest is the request for processing bookmark.
    32  type ProcessRequest struct {
    33  	DataDir     string
    34  	Bookmark    model.BookmarkDTO
    35  	Content     io.Reader
    36  	ContentType string
    37  	KeepTitle   bool
    38  	KeepExcerpt bool
    39  	LogArchival bool
    40  }
    41  
    42  var ErrNoSupportedImageType = errors.New("unsupported image type")
    43  
    44  // ProcessBookmark process the bookmark and archive it if needed.
    45  // Return three values, is error fatal, and error value.
    46  func ProcessBookmark(deps *dependencies.Dependencies, req ProcessRequest) (book model.BookmarkDTO, isFatalErr bool, err error) {
    47  	book = req.Bookmark
    48  	contentType := req.ContentType
    49  
    50  	// Make sure bookmark ID is defined
    51  	if book.ID == 0 {
    52  		return book, true, fmt.Errorf("bookmark ID is not valid")
    53  	}
    54  
    55  	// Split bookmark content so it can be processed several times
    56  	archivalInput := bytes.NewBuffer(nil)
    57  	readabilityInput := bytes.NewBuffer(nil)
    58  	readabilityCheckInput := bytes.NewBuffer(nil)
    59  
    60  	var multiWriter io.Writer
    61  	if !strings.Contains(contentType, "text/html") {
    62  		multiWriter = io.MultiWriter(archivalInput)
    63  	} else {
    64  		multiWriter = io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
    65  	}
    66  
    67  	_, err = io.Copy(multiWriter, req.Content)
    68  	if err != nil {
    69  		return book, false, fmt.Errorf("failed to process article: %v", err)
    70  	}
    71  
    72  	// If this is HTML, parse for readable content
    73  	strID := strconv.Itoa(book.ID)
    74  	imgPath := model.GetThumbnailPath(&book)
    75  	var imageURLs []string
    76  	if strings.Contains(contentType, "text/html") {
    77  		isReadable := readability.Check(readabilityCheckInput)
    78  
    79  		nurl, err := url.Parse(book.URL)
    80  		if err != nil {
    81  			return book, true, fmt.Errorf("failed to parse url: %v", err)
    82  		}
    83  
    84  		article, err := readability.FromReader(readabilityInput, nurl)
    85  		if err != nil {
    86  			return book, false, fmt.Errorf("failed to parse article: %v", err)
    87  		}
    88  
    89  		book.Author = article.Byline
    90  		book.Content = article.TextContent
    91  		book.HTML = article.Content
    92  
    93  		// If title and excerpt doesnt have submitted value, use from article
    94  		if !req.KeepTitle || book.Title == "" {
    95  			book.Title = article.Title
    96  		}
    97  
    98  		if !req.KeepExcerpt || book.Excerpt == "" {
    99  			book.Excerpt = article.Excerpt
   100  		}
   101  
   102  		// Sometimes article doesn't have any title, so make sure it is not empty
   103  		if book.Title == "" {
   104  			book.Title = book.URL
   105  		}
   106  
   107  		// Get image URL
   108  		if article.Image != "" {
   109  			imageURLs = append(imageURLs, article.Image)
   110  		} else {
   111  			deps.Domains.Storage.FS().Remove(imgPath)
   112  		}
   113  
   114  		if article.Favicon != "" {
   115  			imageURLs = append(imageURLs, article.Favicon)
   116  		}
   117  
   118  		if !isReadable {
   119  			book.Content = ""
   120  		}
   121  
   122  		book.HasContent = book.Content != ""
   123  	}
   124  
   125  	// Save article image to local disk
   126  	for i, imageURL := range imageURLs {
   127  		err = DownloadBookImage(deps, imageURL, imgPath)
   128  		if err != nil && errors.Is(err, ErrNoSupportedImageType) {
   129  			log.Printf("%s: %s", err, imageURL)
   130  			if i == len(imageURLs)-1 {
   131  				deps.Domains.Storage.FS().Remove(imgPath)
   132  			}
   133  		}
   134  		if err != nil {
   135  			log.Printf("File download not successful for image URL: %s", imageURL)
   136  			continue
   137  		}
   138  		if err == nil {
   139  			book.ImageURL = fp.Join("/", "bookmark", strID, "thumb")
   140  			break
   141  		}
   142  	}
   143  
   144  	// If needed, create ebook as well
   145  	if book.CreateEbook {
   146  		ebookPath := model.GetEbookPath(&book)
   147  		req.Bookmark = book
   148  
   149  		if strings.Contains(contentType, "application/pdf") {
   150  			return book, false, errors.Wrap(err, "can't create ebook from pdf")
   151  		} else {
   152  			_, err = GenerateEbook(deps, req, ebookPath)
   153  			if err != nil {
   154  				return book, true, errors.Wrap(err, "failed to create ebook")
   155  			}
   156  			book.HasEbook = true
   157  		}
   158  	}
   159  
   160  	// If needed, create offline archive as well
   161  	if book.CreateArchive {
   162  		tmpFile, err := os.CreateTemp("", "archive")
   163  		if err != nil {
   164  			return book, false, fmt.Errorf("failed to create temp archive: %v", err)
   165  		}
   166  		defer deps.Domains.Storage.FS().Remove(tmpFile.Name())
   167  
   168  		archivalRequest := warc.ArchivalRequest{
   169  			URL:         book.URL,
   170  			Reader:      archivalInput,
   171  			ContentType: contentType,
   172  			UserAgent:   userAgent,
   173  			LogEnabled:  req.LogArchival,
   174  		}
   175  
   176  		err = warc.NewArchive(archivalRequest, tmpFile.Name())
   177  		if err != nil {
   178  			defer os.Remove(tmpFile.Name())
   179  			return book, false, fmt.Errorf("failed to create archive: %v", err)
   180  		}
   181  
   182  		dstPath := model.GetArchivePath(&book)
   183  		err = deps.Domains.Storage.WriteFile(dstPath, tmpFile)
   184  		if err != nil {
   185  			return book, false, fmt.Errorf("failed move archive to destination `: %v", err)
   186  		}
   187  
   188  		book.HasArchive = true
   189  	}
   190  
   191  	return book, false, nil
   192  }
   193  
   194  func DownloadBookImage(deps *dependencies.Dependencies, url, dstPath string) error {
   195  	// Fetch data from URL
   196  	resp, err := httpClient.Get(url)
   197  	if err != nil {
   198  		return err
   199  	}
   200  	defer resp.Body.Close()
   201  
   202  	// Make sure it's JPG or PNG image
   203  	cp := resp.Header.Get("Content-Type")
   204  	if !strings.Contains(cp, "image/jpeg") &&
   205  		!strings.Contains(cp, "image/pjpeg") &&
   206  		!strings.Contains(cp, "image/jpg") &&
   207  		!strings.Contains(cp, "image/webp") &&
   208  		!strings.Contains(cp, "image/png") {
   209  		return ErrNoSupportedImageType
   210  	}
   211  
   212  	// At this point, the download has finished successfully.
   213  	// Create tmpFile
   214  	tmpFile, err := os.CreateTemp("", "image")
   215  	if err != nil {
   216  		return fmt.Errorf("failed to create temporary image file: %v", err)
   217  	}
   218  	defer os.Remove(tmpFile.Name())
   219  
   220  	// Parse image and process it.
   221  	// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
   222  	// Else, save it as it is.
   223  	img, _, err := image.Decode(resp.Body)
   224  	if err != nil {
   225  		return fmt.Errorf("failed to parse image %s: %v", url, err)
   226  	}
   227  
   228  	imgRect := img.Bounds()
   229  	imgWidth := imgRect.Dx()
   230  	imgHeight := imgRect.Dy()
   231  	imgRatio := float64(imgWidth) / float64(imgHeight)
   232  
   233  	if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
   234  		err = jpeg.Encode(tmpFile, img, nil)
   235  	} else {
   236  		// Create background
   237  		bg := image.NewNRGBA(imgRect)
   238  		draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
   239  		draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
   240  
   241  		bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
   242  		bg = imaging.Blur(bg, 150)
   243  		bg = imaging.AdjustBrightness(bg, 30)
   244  
   245  		// Create foreground
   246  		fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
   247  
   248  		// Merge foreground and background
   249  		bgRect := bg.Bounds()
   250  		fgRect := fg.Bounds()
   251  		fgPosition := image.Point{
   252  			X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
   253  			Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
   254  		}
   255  
   256  		draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
   257  
   258  		// Save to file
   259  		err = jpeg.Encode(tmpFile, bg, nil)
   260  	}
   261  
   262  	if err != nil {
   263  		return fmt.Errorf("failed to save image %s: %v", url, err)
   264  	}
   265  
   266  	err = deps.Domains.Storage.WriteFile(dstPath, tmpFile)
   267  	if err != nil {
   268  		return err
   269  	}
   270  
   271  	return nil
   272  }