github.com/soulteary/pocket-bookcase@v0.0.0-20240428065142-0b5a9a0fc98a/internal/cmd/import.go (about)

     1  package cmd
     2  
     3  import (
     4  	"database/sql"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"strconv"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/PuerkitoBio/goquery"
    13  	"github.com/soulteary/pocket-bookcase/internal/core"
    14  	"github.com/soulteary/pocket-bookcase/internal/model"
    15  	"github.com/spf13/cobra"
    16  )
    17  
    18  func importCmd() *cobra.Command {
    19  	cmd := &cobra.Command{
    20  		Use:   "import source-file",
    21  		Short: "Import bookmarks from HTML file in Netscape Bookmark format",
    22  		Args:  cobra.ExactArgs(1),
    23  		Run:   importHandler,
    24  	}
    25  
    26  	cmd.Flags().BoolP("generate-tag", "t", false, "Auto generate tag from bookmark's category")
    27  
    28  	return cmd
    29  }
    30  
    31  func importHandler(cmd *cobra.Command, args []string) {
    32  	_, deps := initShiori(cmd.Context(), cmd)
    33  
    34  	// Parse flags
    35  	generateTag := cmd.Flags().Changed("generate-tag")
    36  
    37  	// If user doesn't specify, ask if tag need to be generated
    38  	if !generateTag {
    39  		var submit string
    40  		fmt.Print("Add parents folder as tag? (y/N): ")
    41  		fmt.Scanln(&submit)
    42  
    43  		generateTag = submit == "y"
    44  	}
    45  
    46  	// Open bookmark's file
    47  	srcFile, err := os.Open(args[0])
    48  	if err != nil {
    49  		cError.Printf("Failed to open %s: %v\n", args[0], err)
    50  		os.Exit(1)
    51  	}
    52  	defer srcFile.Close()
    53  
    54  	// Parse bookmark's file
    55  	bookmarks := []model.BookmarkDTO{}
    56  	mapURL := make(map[string]struct{})
    57  
    58  	doc, err := goquery.NewDocumentFromReader(srcFile)
    59  	if err != nil {
    60  		cError.Printf("Failed to parse bookmark: %v\n", err)
    61  		os.Exit(1)
    62  	}
    63  
    64  	doc.Find("dt>a").Each(func(_ int, a *goquery.Selection) {
    65  		// Get related elements
    66  		dt := a.Parent()
    67  		dl := dt.Parent()
    68  		h3 := dl.Parent().Find("h3").First()
    69  
    70  		// Get metadata
    71  		title := a.Text()
    72  		url, _ := a.Attr("href")
    73  		strTags, _ := a.Attr("tags")
    74  
    75  		dateStr, fieldExists := a.Attr("last_modified")
    76  		if !fieldExists {
    77  			dateStr, _ = a.Attr("add_date")
    78  		}
    79  
    80  		// Using now as default date in case no last_modified nor add_date are present
    81  		modifiedDate := time.Now()
    82  		if dateStr != "" {
    83  			modifiedTsInt, err := strconv.Atoi(dateStr)
    84  			if err != nil {
    85  				cError.Printf("Skip %s: date field is not valid: %s", url, err)
    86  				return
    87  			}
    88  
    89  			modifiedDate = time.Unix(int64(modifiedTsInt), 0)
    90  		}
    91  
    92  		// Clean up URL
    93  		url, err = core.RemoveUTMParams(url)
    94  		if err != nil {
    95  			cError.Printf("Skip %s: URL is not valid\n", url)
    96  			return
    97  		}
    98  
    99  		// Make sure title is valid Utf-8
   100  		title = validateTitle(title, url)
   101  
   102  		// Check if the URL already exist before, both in bookmark
   103  		// file or in database
   104  		if _, exist := mapURL[url]; exist {
   105  			cError.Printf("Skip %s: URL already exists\n", url)
   106  			return
   107  		}
   108  
   109  		_, exist, err := deps.Database.GetBookmark(cmd.Context(), 0, url)
   110  		if err != nil && !errors.Is(err, sql.ErrNoRows) {
   111  			cError.Printf("Skip %s: Get Bookmark fail, %v", url, err)
   112  			return
   113  		}
   114  
   115  		if exist {
   116  			cError.Printf("Skip %s: URL already exists\n", url)
   117  			mapURL[url] = struct{}{}
   118  			return
   119  		}
   120  
   121  		// Get bookmark tags
   122  		tags := []model.Tag{}
   123  		for _, strTag := range strings.Split(strTags, ",") {
   124  			strTag = normalizeSpace(strTag)
   125  			if strTag != "" {
   126  				tags = append(tags, model.Tag{Name: strTag})
   127  			}
   128  		}
   129  
   130  		// Get category name for this bookmark
   131  		// and add it as tags (if necessary)
   132  		category := normalizeSpace(h3.Text())
   133  		if category != "" && generateTag {
   134  			tags = append(tags, model.Tag{Name: category})
   135  		}
   136  
   137  		// Add item to list
   138  		bookmark := model.BookmarkDTO{
   139  			URL:      url,
   140  			Title:    title,
   141  			Tags:     tags,
   142  			Modified: modifiedDate.Format(model.DatabaseDateFormat),
   143  		}
   144  
   145  		mapURL[url] = struct{}{}
   146  		bookmarks = append(bookmarks, bookmark)
   147  	})
   148  
   149  	// Save bookmark to database
   150  	bookmarks, err = deps.Database.SaveBookmarks(cmd.Context(), true, bookmarks...)
   151  	if err != nil {
   152  		cError.Printf("Failed to save bookmarks: %v\n", err)
   153  		os.Exit(1)
   154  	}
   155  
   156  	// Print imported bookmark
   157  	fmt.Println()
   158  	printBookmarks(bookmarks...)
   159  }