github.com/soulteary/pocket-bookcase@v0.0.0-20240428065142-0b5a9a0fc98a/internal/cmd/import.go (about) 1 package cmd 2 3 import ( 4 "database/sql" 5 "errors" 6 "fmt" 7 "os" 8 "strconv" 9 "strings" 10 "time" 11 12 "github.com/PuerkitoBio/goquery" 13 "github.com/soulteary/pocket-bookcase/internal/core" 14 "github.com/soulteary/pocket-bookcase/internal/model" 15 "github.com/spf13/cobra" 16 ) 17 18 func importCmd() *cobra.Command { 19 cmd := &cobra.Command{ 20 Use: "import source-file", 21 Short: "Import bookmarks from HTML file in Netscape Bookmark format", 22 Args: cobra.ExactArgs(1), 23 Run: importHandler, 24 } 25 26 cmd.Flags().BoolP("generate-tag", "t", false, "Auto generate tag from bookmark's category") 27 28 return cmd 29 } 30 31 func importHandler(cmd *cobra.Command, args []string) { 32 _, deps := initShiori(cmd.Context(), cmd) 33 34 // Parse flags 35 generateTag := cmd.Flags().Changed("generate-tag") 36 37 // If user doesn't specify, ask if tag need to be generated 38 if !generateTag { 39 var submit string 40 fmt.Print("Add parents folder as tag? (y/N): ") 41 fmt.Scanln(&submit) 42 43 generateTag = submit == "y" 44 } 45 46 // Open bookmark's file 47 srcFile, err := os.Open(args[0]) 48 if err != nil { 49 cError.Printf("Failed to open %s: %v\n", args[0], err) 50 os.Exit(1) 51 } 52 defer srcFile.Close() 53 54 // Parse bookmark's file 55 bookmarks := []model.BookmarkDTO{} 56 mapURL := make(map[string]struct{}) 57 58 doc, err := goquery.NewDocumentFromReader(srcFile) 59 if err != nil { 60 cError.Printf("Failed to parse bookmark: %v\n", err) 61 os.Exit(1) 62 } 63 64 doc.Find("dt>a").Each(func(_ int, a *goquery.Selection) { 65 // Get related elements 66 dt := a.Parent() 67 dl := dt.Parent() 68 h3 := dl.Parent().Find("h3").First() 69 70 // Get metadata 71 title := a.Text() 72 url, _ := a.Attr("href") 73 strTags, _ := a.Attr("tags") 74 75 dateStr, fieldExists := a.Attr("last_modified") 76 if !fieldExists { 77 dateStr, _ = a.Attr("add_date") 78 } 79 80 // Using now as default date in case no last_modified nor add_date are present 81 modifiedDate := time.Now() 82 if dateStr != "" { 83 modifiedTsInt, err := strconv.Atoi(dateStr) 84 if err != nil { 85 cError.Printf("Skip %s: date field is not valid: %s", url, err) 86 return 87 } 88 89 modifiedDate = time.Unix(int64(modifiedTsInt), 0) 90 } 91 92 // Clean up URL 93 url, err = core.RemoveUTMParams(url) 94 if err != nil { 95 cError.Printf("Skip %s: URL is not valid\n", url) 96 return 97 } 98 99 // Make sure title is valid Utf-8 100 title = validateTitle(title, url) 101 102 // Check if the URL already exist before, both in bookmark 103 // file or in database 104 if _, exist := mapURL[url]; exist { 105 cError.Printf("Skip %s: URL already exists\n", url) 106 return 107 } 108 109 _, exist, err := deps.Database.GetBookmark(cmd.Context(), 0, url) 110 if err != nil && !errors.Is(err, sql.ErrNoRows) { 111 cError.Printf("Skip %s: Get Bookmark fail, %v", url, err) 112 return 113 } 114 115 if exist { 116 cError.Printf("Skip %s: URL already exists\n", url) 117 mapURL[url] = struct{}{} 118 return 119 } 120 121 // Get bookmark tags 122 tags := []model.Tag{} 123 for _, strTag := range strings.Split(strTags, ",") { 124 strTag = normalizeSpace(strTag) 125 if strTag != "" { 126 tags = append(tags, model.Tag{Name: strTag}) 127 } 128 } 129 130 // Get category name for this bookmark 131 // and add it as tags (if necessary) 132 category := normalizeSpace(h3.Text()) 133 if category != "" && generateTag { 134 tags = append(tags, model.Tag{Name: category}) 135 } 136 137 // Add item to list 138 bookmark := model.BookmarkDTO{ 139 URL: url, 140 Title: title, 141 Tags: tags, 142 Modified: modifiedDate.Format(model.DatabaseDateFormat), 143 } 144 145 mapURL[url] = struct{}{} 146 bookmarks = append(bookmarks, bookmark) 147 }) 148 149 // Save bookmark to database 150 bookmarks, err = deps.Database.SaveBookmarks(cmd.Context(), true, bookmarks...) 151 if err != nil { 152 cError.Printf("Failed to save bookmarks: %v\n", err) 153 os.Exit(1) 154 } 155 156 // Print imported bookmark 157 fmt.Println() 158 printBookmarks(bookmarks...) 159 }