github.com/status-im/status-go@v1.1.0/protocol/messenger_linkpreview.go (about) 1 package protocol 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "net/http" 8 neturl "net/url" 9 "regexp" 10 "strings" 11 12 "go.uber.org/zap" 13 "golang.org/x/net/publicsuffix" 14 15 "github.com/status-im/markdown" 16 17 "github.com/status-im/status-go/multiaccounts/settings" 18 "github.com/status-im/status-go/protocol/common" 19 ) 20 21 const UnfurledLinksPerMessageLimit = 5 22 23 type URLUnfurlPermission int 24 25 const ( 26 URLUnfurlingAllowed URLUnfurlPermission = iota 27 URLUnfurlingAskUser 28 URLUnfurlingForbiddenBySettings 29 URLUnfurlingNotSupported 30 ) 31 32 type URLUnfurlingMetadata struct { 33 URL string `json:"url"` 34 Permission URLUnfurlPermission `json:"permission"` 35 IsStatusSharedURL bool `json:"isStatusSharedURL"` 36 } 37 38 type URLsUnfurlPlan struct { 39 URLs []URLUnfurlingMetadata `json:"urls"` 40 } 41 42 func URLUnfurlingSupported(url string) bool { 43 return !strings.HasSuffix(url, ".gif") 44 } 45 46 type UnfurlURLsResponse struct { 47 LinkPreviews []*common.LinkPreview `json:"linkPreviews,omitempty"` 48 StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"` 49 } 50 51 func normalizeHostname(hostname string) string { 52 hostname = strings.ToLower(hostname) 53 re := regexp.MustCompile(`^www\.(.*)$`) 54 return re.ReplaceAllString(hostname, "$1") 55 } 56 57 func (m *Messenger) newURLUnfurler(httpClient *http.Client, url *neturl.URL) Unfurler { 58 59 if IsSupportedImageURL(url) { 60 return NewImageUnfurler( 61 url, 62 m.logger, 63 httpClient) 64 } 65 66 switch normalizeHostname(url.Hostname()) { 67 case "reddit.com": 68 return NewOEmbedUnfurler( 69 "https://www.reddit.com/oembed", 70 url, 71 m.logger, 72 httpClient) 73 default: 74 return NewOpenGraphUnfurler( 75 url, 76 m.logger, 77 httpClient) 78 } 79 } 80 81 func (m *Messenger) unfurlURL(httpClient *http.Client, url string) (*common.LinkPreview, error) { 82 preview := new(common.LinkPreview) 83 84 parsedURL, err := neturl.Parse(url) 85 if err != nil { 86 return preview, err 87 } 88 89 unfurler := m.newURLUnfurler(httpClient, parsedURL) 90 preview, err = unfurler.Unfurl() 91 if err != nil { 92 return preview, err 93 } 94 preview.Hostname = strings.ToLower(parsedURL.Hostname()) 95 96 return preview, nil 97 } 98 99 // parseValidURL is a stricter version of url.Parse that performs additional 100 // checks to ensure the URL is valid for clients to request a link preview. 101 func parseValidURL(rawURL string) (*neturl.URL, error) { 102 u, err := neturl.Parse(rawURL) 103 if err != nil { 104 return nil, fmt.Errorf("parsing URL failed: %w", err) 105 } 106 107 if u.Scheme == "" { 108 return nil, errors.New("missing URL scheme") 109 } 110 111 _, err = publicsuffix.EffectiveTLDPlusOne(u.Hostname()) 112 if err != nil { 113 return nil, fmt.Errorf("missing known URL domain: %w", err) 114 } 115 116 return u, nil 117 } 118 119 func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan { 120 s, err := m.getSettings() 121 if err != nil { 122 // log the error and keep parsing the text 123 m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err)) 124 s.URLUnfurlingMode = settings.URLUnfurlingDisableAll 125 } 126 127 indexedUrls := map[string]struct{}{} 128 result := &URLsUnfurlPlan{ 129 // The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate 130 // some not-zero place here, using the limit number is at least some binding. 131 URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit), 132 } 133 parsedText := markdown.Parse([]byte(text), nil) 134 visitor := common.RunLinksVisitor(parsedText) 135 136 for _, rawURL := range visitor.Links { 137 parsedURL, err := parseValidURL(rawURL) 138 if err != nil { 139 continue 140 } 141 // Lowercase the host so the URL can be used as a cache key. Particularly on 142 // mobile clients it is common that the first character in a text input is 143 // automatically uppercased. In WhatsApp they incorrectly lowercase the 144 // URL's path, but this is incorrect. For instance, some URL shorteners are 145 // case-sensitive, some websites encode base64 in the path, etc. 146 parsedURL.Host = strings.ToLower(parsedURL.Host) 147 148 url := parsedURL.String() 149 url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash. 150 if _, exists := indexedUrls[url]; exists { 151 continue 152 } 153 154 metadata := URLUnfurlingMetadata{ 155 URL: url, 156 IsStatusSharedURL: IsStatusSharedURL(url), 157 } 158 159 if !URLUnfurlingSupported(rawURL) { 160 metadata.Permission = URLUnfurlingNotSupported 161 } else if metadata.IsStatusSharedURL { 162 metadata.Permission = URLUnfurlingAllowed 163 } else { 164 switch s.URLUnfurlingMode { 165 case settings.URLUnfurlingAlwaysAsk: 166 metadata.Permission = URLUnfurlingAskUser 167 case settings.URLUnfurlingEnableAll: 168 metadata.Permission = URLUnfurlingAllowed 169 case settings.URLUnfurlingDisableAll: 170 metadata.Permission = URLUnfurlingForbiddenBySettings 171 default: 172 metadata.Permission = URLUnfurlingForbiddenBySettings 173 } 174 } 175 176 result.URLs = append(result.URLs, metadata) 177 } 178 179 return result 180 } 181 182 // Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl. 183 // 184 // This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text 185 // without any additional information. 186 func (m *Messenger) GetURLs(text string) []string { 187 plan := m.GetTextURLsToUnfurl(text) 188 limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs)))) 189 urls := make([]string, 0, limit) 190 for _, metadata := range plan.URLs { 191 urls = append(urls, metadata.URL) 192 if len(urls) == limit { 193 break 194 } 195 } 196 return urls 197 } 198 199 func NewDefaultHTTPClient() *http.Client { 200 return &http.Client{Timeout: DefaultRequestTimeout} 201 } 202 203 // UnfurlURLs assumes clients pass URLs verbatim that were validated and 204 // processed by GetURLs. 205 func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) { 206 response := UnfurlURLsResponse{} 207 208 // Unfurl in a loop 209 210 response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls)) 211 response.StatusLinkPreviews = make([]*common.StatusLinkPreview, 0, len(urls)) 212 213 if httpClient == nil { 214 httpClient = NewDefaultHTTPClient() 215 } 216 217 for _, url := range urls { 218 m.logger.Debug("unfurling", zap.String("url", url)) 219 220 if IsStatusSharedURL(url) { 221 unfurler := NewStatusUnfurler(url, m, m.logger) 222 preview, err := unfurler.Unfurl() 223 if err != nil { 224 m.logger.Warn("failed to unfurl status link", zap.String("url", url), zap.Error(err)) 225 continue 226 } 227 response.StatusLinkPreviews = append(response.StatusLinkPreviews, preview) 228 continue 229 } 230 231 p, err := m.unfurlURL(httpClient, url) 232 if err != nil { 233 m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err)) 234 continue 235 } 236 response.LinkPreviews = append(response.LinkPreviews, p) 237 } 238 239 return response, nil 240 }