github.com/soulteary/pocket-bookcase@v0.0.0-20240428065142-0b5a9a0fc98a/internal/cmd/pocket.go (about)

     1  package cmd
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/PuerkitoBio/goquery"
    11  	"github.com/soulteary/pocket-bookcase/internal/core"
    12  	"github.com/soulteary/pocket-bookcase/internal/model"
    13  	"github.com/spf13/cobra"
    14  )
    15  
    16  func pocketCmd() *cobra.Command {
    17  	cmd := &cobra.Command{
    18  		Use:   "pocket source-file",
    19  		Short: "Import bookmarks from Pocket's exported HTML file",
    20  		Args:  cobra.ExactArgs(1),
    21  		Run:   pocketHandler,
    22  	}
    23  
    24  	return cmd
    25  }
    26  
    27  func pocketHandler(cmd *cobra.Command, args []string) {
    28  	_, deps := initShiori(cmd.Context(), cmd)
    29  
    30  	// Open pocket's file
    31  	srcFile, err := os.Open(args[0])
    32  	if err != nil {
    33  		cError.Println(err)
    34  		os.Exit(1)
    35  	}
    36  	defer srcFile.Close()
    37  
    38  	// Parse pocket's file
    39  	bookmarks := []model.BookmarkDTO{}
    40  	mapURL := make(map[string]struct{})
    41  
    42  	doc, err := goquery.NewDocumentFromReader(srcFile)
    43  	if err != nil {
    44  		cError.Println(err)
    45  		os.Exit(1)
    46  	}
    47  
    48  	doc.Find("a").Each(func(_ int, a *goquery.Selection) {
    49  		// Get metadata
    50  		title := a.Text()
    51  		url, _ := a.Attr("href")
    52  		strTags, _ := a.Attr("tags")
    53  		strModified, _ := a.Attr("time_added")
    54  		intModified, _ := strconv.ParseInt(strModified, 10, 64)
    55  		modified := time.Unix(intModified, 0)
    56  
    57  		// Clean up URL
    58  		var err error
    59  		url, err = core.RemoveUTMParams(url)
    60  		if err != nil {
    61  			cError.Printf("Skip %s: URL is not valid\n", url)
    62  			return
    63  		}
    64  
    65  		// Make sure title is valid Utf-8
    66  		title = validateTitle(title, url)
    67  
    68  		// Check if the URL already exist before, both in bookmark
    69  		// file or in database
    70  		if _, exist := mapURL[url]; exist {
    71  			cError.Printf("Skip %s: URL already exists\n", url)
    72  			return
    73  		}
    74  
    75  		_, exist, err := deps.Database.GetBookmark(cmd.Context(), 0, url)
    76  		if err != nil {
    77  			cError.Printf("Skip %s: Get Bookmark fail, %v", url, err)
    78  			return
    79  		}
    80  
    81  		if exist {
    82  			cError.Printf("Skip %s: URL already exists\n", url)
    83  			mapURL[url] = struct{}{}
    84  			return
    85  		}
    86  
    87  		// Get bookmark tags
    88  		tags := []model.Tag{}
    89  		for _, strTag := range strings.Split(strTags, ",") {
    90  			if strTag != "" {
    91  				tags = append(tags, model.Tag{Name: strTag})
    92  			}
    93  		}
    94  
    95  		// Add item to list
    96  		bookmark := model.BookmarkDTO{
    97  			URL:      url,
    98  			Title:    title,
    99  			Modified: modified.Format(model.DatabaseDateFormat),
   100  			Tags:     tags,
   101  		}
   102  
   103  		mapURL[url] = struct{}{}
   104  		bookmarks = append(bookmarks, bookmark)
   105  	})
   106  
   107  	// Save bookmark to database
   108  	bookmarks, err = deps.Database.SaveBookmarks(cmd.Context(), true, bookmarks...)
   109  	if err != nil {
   110  		cError.Printf("Failed to save bookmarks: %v\n", err)
   111  		os.Exit(1)
   112  	}
   113  
   114  	// Print imported bookmark
   115  	fmt.Println()
   116  	printBookmarks(bookmarks...)
   117  }