github.com/status-im/status-go@v1.1.0/protocol/linkpreview_unfurler_opengraph.go (about)

     1  package protocol
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net/http"
     8  	neturl "net/url"
     9  	"strings"
    10  
    11  	"github.com/keighl/metabolize"
    12  	"go.uber.org/zap"
    13  	"golang.org/x/net/html"
    14  
    15  	"github.com/status-im/status-go/images"
    16  	"github.com/status-im/status-go/protocol/common"
    17  	"github.com/status-im/status-go/protocol/protobuf"
    18  )
    19  
    20  type OpenGraphMetadata struct {
    21  	Title        string `json:"title" meta:"og:title"`
    22  	Description  string `json:"description" meta:"og:description"`
    23  	ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"`
    24  }
    25  
    26  // OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed
    27  // gives back a JSON response with a "html" field that's supposed to be embedded
    28  // in an iframe (hardly useful for existing Status' clients).
    29  type OpenGraphUnfurler struct {
    30  	url        *neturl.URL
    31  	logger     *zap.Logger
    32  	httpClient *http.Client
    33  }
    34  
    35  func NewOpenGraphUnfurler(URL *neturl.URL, logger *zap.Logger, httpClient *http.Client) *OpenGraphUnfurler {
    36  	return &OpenGraphUnfurler{
    37  		url:        URL,
    38  		logger:     logger,
    39  		httpClient: httpClient,
    40  	}
    41  }
    42  
    43  func GetFavicon(bodyBytes []byte) string {
    44  	htmlTokens := html.NewTokenizer(bytes.NewBuffer(bodyBytes))
    45  loop:
    46  	for {
    47  		tt := htmlTokens.Next()
    48  		switch tt {
    49  		case html.ErrorToken:
    50  			break loop
    51  		case html.StartTagToken:
    52  			t := htmlTokens.Token()
    53  			if t.Data != "link" {
    54  				continue
    55  			}
    56  
    57  			isIcon := false
    58  			href := ""
    59  			for _, attr := range t.Attr {
    60  				k := attr.Key
    61  				v := attr.Val
    62  				if k == "rel" && (v == "icon" || v == "shortcut icon") {
    63  					isIcon = true
    64  				} else if k == "href" &&
    65  					(strings.Contains(v, ".ico") ||
    66  						strings.Contains(v, ".png") ||
    67  						strings.Contains(v, ".svg")) {
    68  					href = v
    69  				}
    70  			}
    71  
    72  			if isIcon && href != "" {
    73  				return href
    74  			}
    75  		}
    76  	}
    77  	return ""
    78  }
    79  
    80  func (u *OpenGraphUnfurler) Unfurl() (*common.LinkPreview, error) {
    81  	preview := newDefaultLinkPreview(u.url)
    82  	preview.Type = protobuf.UnfurledLink_LINK
    83  
    84  	headers := map[string]string{
    85  		"accept":          headerAcceptText,
    86  		"accept-language": headerAcceptLanguage,
    87  		"user-agent":      headerUserAgent,
    88  	}
    89  	bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers)
    90  	if err != nil {
    91  		return preview, err
    92  	}
    93  
    94  	var ogMetadata OpenGraphMetadata
    95  	err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata)
    96  	if err != nil {
    97  		return preview, fmt.Errorf("failed to parse OpenGraph data")
    98  	}
    99  
   100  	faviconPath := GetFavicon(bodyBytes)
   101  	t, err := fetchImage(u.logger, u.httpClient, faviconPath, false)
   102  	if err != nil {
   103  		u.logger.Info("failed to fetch favicon", zap.String("url", u.url.String()), zap.Error(err))
   104  	} else {
   105  		preview.Favicon.DataURI = t.DataURI
   106  	}
   107  	// There are URLs like https://wikipedia.org/ that don't have an OpenGraph
   108  	// title tag, but article pages do. In the future, we can fallback to the
   109  	// website's title by using the <title> tag.
   110  	if ogMetadata.Title == "" {
   111  		return preview, fmt.Errorf("missing required title in OpenGraph response")
   112  	}
   113  
   114  	if ogMetadata.ThumbnailURL != "" {
   115  		t, err := fetchImage(u.logger, u.httpClient, ogMetadata.ThumbnailURL, true)
   116  		if err != nil {
   117  			// Given we want to fetch thumbnails on a best-effort basis, if an error
   118  			// happens we simply log it.
   119  			u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err))
   120  		} else {
   121  			preview.Thumbnail = t
   122  		}
   123  	}
   124  
   125  	preview.Title = ogMetadata.Title
   126  	preview.Description = ogMetadata.Description
   127  
   128  	return preview, nil
   129  }
   130  
   131  func fetchImage(logger *zap.Logger, httpClient *http.Client, url string, getDimensions bool) (common.LinkPreviewThumbnail, error) {
   132  	var thumbnail common.LinkPreviewThumbnail
   133  
   134  	imgBytes, err := fetchBody(logger, httpClient, url, nil)
   135  	if err != nil {
   136  		return thumbnail, fmt.Errorf("could not fetch thumbnail url='%s': %w", url, err)
   137  	}
   138  	if getDimensions {
   139  		width, height, err := images.GetImageDimensions(imgBytes)
   140  		if err != nil {
   141  			return thumbnail, fmt.Errorf("could not get image dimensions url='%s': %w", url, err)
   142  		}
   143  		thumbnail.Width = width
   144  		thumbnail.Height = height
   145  	}
   146  	dataURI, err := images.GetPayloadDataURI(imgBytes)
   147  	if err != nil {
   148  		return thumbnail, fmt.Errorf("could not build data URI url='%s': %w", url, err)
   149  	}
   150  	thumbnail.DataURI = dataURI
   151  
   152  	return thumbnail, nil
   153  }