github.com/status-im/status-go@v1.1.0/protocol/messenger_linkpreview.go (about)

     1  package protocol
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"net/http"
     8  	neturl "net/url"
     9  	"regexp"
    10  	"strings"
    11  
    12  	"go.uber.org/zap"
    13  	"golang.org/x/net/publicsuffix"
    14  
    15  	"github.com/status-im/markdown"
    16  
    17  	"github.com/status-im/status-go/multiaccounts/settings"
    18  	"github.com/status-im/status-go/protocol/common"
    19  )
    20  
    21  const UnfurledLinksPerMessageLimit = 5
    22  
    23  type URLUnfurlPermission int
    24  
    25  const (
    26  	URLUnfurlingAllowed URLUnfurlPermission = iota
    27  	URLUnfurlingAskUser
    28  	URLUnfurlingForbiddenBySettings
    29  	URLUnfurlingNotSupported
    30  )
    31  
    32  type URLUnfurlingMetadata struct {
    33  	URL               string              `json:"url"`
    34  	Permission        URLUnfurlPermission `json:"permission"`
    35  	IsStatusSharedURL bool                `json:"isStatusSharedURL"`
    36  }
    37  
    38  type URLsUnfurlPlan struct {
    39  	URLs []URLUnfurlingMetadata `json:"urls"`
    40  }
    41  
    42  func URLUnfurlingSupported(url string) bool {
    43  	return !strings.HasSuffix(url, ".gif")
    44  }
    45  
    46  type UnfurlURLsResponse struct {
    47  	LinkPreviews       []*common.LinkPreview       `json:"linkPreviews,omitempty"`
    48  	StatusLinkPreviews []*common.StatusLinkPreview `json:"statusLinkPreviews,omitempty"`
    49  }
    50  
    51  func normalizeHostname(hostname string) string {
    52  	hostname = strings.ToLower(hostname)
    53  	re := regexp.MustCompile(`^www\.(.*)$`)
    54  	return re.ReplaceAllString(hostname, "$1")
    55  }
    56  
    57  func (m *Messenger) newURLUnfurler(httpClient *http.Client, url *neturl.URL) Unfurler {
    58  
    59  	if IsSupportedImageURL(url) {
    60  		return NewImageUnfurler(
    61  			url,
    62  			m.logger,
    63  			httpClient)
    64  	}
    65  
    66  	switch normalizeHostname(url.Hostname()) {
    67  	case "reddit.com":
    68  		return NewOEmbedUnfurler(
    69  			"https://www.reddit.com/oembed",
    70  			url,
    71  			m.logger,
    72  			httpClient)
    73  	default:
    74  		return NewOpenGraphUnfurler(
    75  			url,
    76  			m.logger,
    77  			httpClient)
    78  	}
    79  }
    80  
    81  func (m *Messenger) unfurlURL(httpClient *http.Client, url string) (*common.LinkPreview, error) {
    82  	preview := new(common.LinkPreview)
    83  
    84  	parsedURL, err := neturl.Parse(url)
    85  	if err != nil {
    86  		return preview, err
    87  	}
    88  
    89  	unfurler := m.newURLUnfurler(httpClient, parsedURL)
    90  	preview, err = unfurler.Unfurl()
    91  	if err != nil {
    92  		return preview, err
    93  	}
    94  	preview.Hostname = strings.ToLower(parsedURL.Hostname())
    95  
    96  	return preview, nil
    97  }
    98  
    99  // parseValidURL is a stricter version of url.Parse that performs additional
   100  // checks to ensure the URL is valid for clients to request a link preview.
   101  func parseValidURL(rawURL string) (*neturl.URL, error) {
   102  	u, err := neturl.Parse(rawURL)
   103  	if err != nil {
   104  		return nil, fmt.Errorf("parsing URL failed: %w", err)
   105  	}
   106  
   107  	if u.Scheme == "" {
   108  		return nil, errors.New("missing URL scheme")
   109  	}
   110  
   111  	_, err = publicsuffix.EffectiveTLDPlusOne(u.Hostname())
   112  	if err != nil {
   113  		return nil, fmt.Errorf("missing known URL domain: %w", err)
   114  	}
   115  
   116  	return u, nil
   117  }
   118  
   119  func (m *Messenger) GetTextURLsToUnfurl(text string) *URLsUnfurlPlan {
   120  	s, err := m.getSettings()
   121  	if err != nil {
   122  		// log the error and keep parsing the text
   123  		m.logger.Error("GetTextURLsToUnfurl: failed to get settings", zap.Error(err))
   124  		s.URLUnfurlingMode = settings.URLUnfurlingDisableAll
   125  	}
   126  
   127  	indexedUrls := map[string]struct{}{}
   128  	result := &URLsUnfurlPlan{
   129  		// The usage of `UnfurledLinksPerMessageLimit` is quite random here. I wanted to allocate
   130  		// some not-zero place here, using the limit number is at least some binding.
   131  		URLs: make([]URLUnfurlingMetadata, 0, UnfurledLinksPerMessageLimit),
   132  	}
   133  	parsedText := markdown.Parse([]byte(text), nil)
   134  	visitor := common.RunLinksVisitor(parsedText)
   135  
   136  	for _, rawURL := range visitor.Links {
   137  		parsedURL, err := parseValidURL(rawURL)
   138  		if err != nil {
   139  			continue
   140  		}
   141  		// Lowercase the host so the URL can be used as a cache key. Particularly on
   142  		// mobile clients it is common that the first character in a text input is
   143  		// automatically uppercased. In WhatsApp they incorrectly lowercase the
   144  		// URL's path, but this is incorrect. For instance, some URL shorteners are
   145  		// case-sensitive, some websites encode base64 in the path, etc.
   146  		parsedURL.Host = strings.ToLower(parsedURL.Host)
   147  
   148  		url := parsedURL.String()
   149  		url = strings.TrimRight(url, "/") // Removes the spurious trailing forward slash.
   150  		if _, exists := indexedUrls[url]; exists {
   151  			continue
   152  		}
   153  
   154  		metadata := URLUnfurlingMetadata{
   155  			URL:               url,
   156  			IsStatusSharedURL: IsStatusSharedURL(url),
   157  		}
   158  
   159  		if !URLUnfurlingSupported(rawURL) {
   160  			metadata.Permission = URLUnfurlingNotSupported
   161  		} else if metadata.IsStatusSharedURL {
   162  			metadata.Permission = URLUnfurlingAllowed
   163  		} else {
   164  			switch s.URLUnfurlingMode {
   165  			case settings.URLUnfurlingAlwaysAsk:
   166  				metadata.Permission = URLUnfurlingAskUser
   167  			case settings.URLUnfurlingEnableAll:
   168  				metadata.Permission = URLUnfurlingAllowed
   169  			case settings.URLUnfurlingDisableAll:
   170  				metadata.Permission = URLUnfurlingForbiddenBySettings
   171  			default:
   172  				metadata.Permission = URLUnfurlingForbiddenBySettings
   173  			}
   174  		}
   175  
   176  		result.URLs = append(result.URLs, metadata)
   177  	}
   178  
   179  	return result
   180  }
   181  
   182  // Deprecated: GetURLs is deprecated in favor of more generic GetTextURLsToUnfurl.
   183  //
   184  // This is a wrapper around GetTextURLsToUnfurl that returns the list of URLs found in the text
   185  // without any additional information.
   186  func (m *Messenger) GetURLs(text string) []string {
   187  	plan := m.GetTextURLsToUnfurl(text)
   188  	limit := int(math.Min(UnfurledLinksPerMessageLimit, float64(len(plan.URLs))))
   189  	urls := make([]string, 0, limit)
   190  	for _, metadata := range plan.URLs {
   191  		urls = append(urls, metadata.URL)
   192  		if len(urls) == limit {
   193  			break
   194  		}
   195  	}
   196  	return urls
   197  }
   198  
   199  func NewDefaultHTTPClient() *http.Client {
   200  	return &http.Client{Timeout: DefaultRequestTimeout}
   201  }
   202  
   203  // UnfurlURLs assumes clients pass URLs verbatim that were validated and
   204  // processed by GetURLs.
   205  func (m *Messenger) UnfurlURLs(httpClient *http.Client, urls []string) (UnfurlURLsResponse, error) {
   206  	response := UnfurlURLsResponse{}
   207  
   208  	// Unfurl in a loop
   209  
   210  	response.LinkPreviews = make([]*common.LinkPreview, 0, len(urls))
   211  	response.StatusLinkPreviews = make([]*common.StatusLinkPreview, 0, len(urls))
   212  
   213  	if httpClient == nil {
   214  		httpClient = NewDefaultHTTPClient()
   215  	}
   216  
   217  	for _, url := range urls {
   218  		m.logger.Debug("unfurling", zap.String("url", url))
   219  
   220  		if IsStatusSharedURL(url) {
   221  			unfurler := NewStatusUnfurler(url, m, m.logger)
   222  			preview, err := unfurler.Unfurl()
   223  			if err != nil {
   224  				m.logger.Warn("failed to unfurl status link", zap.String("url", url), zap.Error(err))
   225  				continue
   226  			}
   227  			response.StatusLinkPreviews = append(response.StatusLinkPreviews, preview)
   228  			continue
   229  		}
   230  
   231  		p, err := m.unfurlURL(httpClient, url)
   232  		if err != nil {
   233  			m.logger.Warn("failed to unfurl", zap.String("url", url), zap.Error(err))
   234  			continue
   235  		}
   236  		response.LinkPreviews = append(response.LinkPreviews, p)
   237  	}
   238  
   239  	return response, nil
   240  }