github.com/status-im/status-go@v1.1.0/protocol/linkpreview_unfurler_opengraph.go (about) 1 package protocol 2 3 import ( 4 "bytes" 5 "fmt" 6 "io/ioutil" 7 "net/http" 8 neturl "net/url" 9 "strings" 10 11 "github.com/keighl/metabolize" 12 "go.uber.org/zap" 13 "golang.org/x/net/html" 14 15 "github.com/status-im/status-go/images" 16 "github.com/status-im/status-go/protocol/common" 17 "github.com/status-im/status-go/protocol/protobuf" 18 ) 19 20 type OpenGraphMetadata struct { 21 Title string `json:"title" meta:"og:title"` 22 Description string `json:"description" meta:"og:description"` 23 ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"` 24 } 25 26 // OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed 27 // gives back a JSON response with a "html" field that's supposed to be embedded 28 // in an iframe (hardly useful for existing Status' clients). 29 type OpenGraphUnfurler struct { 30 url *neturl.URL 31 logger *zap.Logger 32 httpClient *http.Client 33 } 34 35 func NewOpenGraphUnfurler(URL *neturl.URL, logger *zap.Logger, httpClient *http.Client) *OpenGraphUnfurler { 36 return &OpenGraphUnfurler{ 37 url: URL, 38 logger: logger, 39 httpClient: httpClient, 40 } 41 } 42 43 func GetFavicon(bodyBytes []byte) string { 44 htmlTokens := html.NewTokenizer(bytes.NewBuffer(bodyBytes)) 45 loop: 46 for { 47 tt := htmlTokens.Next() 48 switch tt { 49 case html.ErrorToken: 50 break loop 51 case html.StartTagToken: 52 t := htmlTokens.Token() 53 if t.Data != "link" { 54 continue 55 } 56 57 isIcon := false 58 href := "" 59 for _, attr := range t.Attr { 60 k := attr.Key 61 v := attr.Val 62 if k == "rel" && (v == "icon" || v == "shortcut icon") { 63 isIcon = true 64 } else if k == "href" && 65 (strings.Contains(v, ".ico") || 66 strings.Contains(v, ".png") || 67 strings.Contains(v, ".svg")) { 68 href = v 69 } 70 } 71 72 if isIcon && href != "" { 73 return href 74 } 75 } 76 } 77 return "" 78 } 79 80 func (u *OpenGraphUnfurler) Unfurl() (*common.LinkPreview, error) { 81 preview := newDefaultLinkPreview(u.url) 82 preview.Type = protobuf.UnfurledLink_LINK 83 84 headers := map[string]string{ 85 "accept": headerAcceptText, 86 "accept-language": headerAcceptLanguage, 87 "user-agent": headerUserAgent, 88 } 89 bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers) 90 if err != nil { 91 return preview, err 92 } 93 94 var ogMetadata OpenGraphMetadata 95 err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata) 96 if err != nil { 97 return preview, fmt.Errorf("failed to parse OpenGraph data") 98 } 99 100 faviconPath := GetFavicon(bodyBytes) 101 t, err := fetchImage(u.logger, u.httpClient, faviconPath, false) 102 if err != nil { 103 u.logger.Info("failed to fetch favicon", zap.String("url", u.url.String()), zap.Error(err)) 104 } else { 105 preview.Favicon.DataURI = t.DataURI 106 } 107 // There are URLs like https://wikipedia.org/ that don't have an OpenGraph 108 // title tag, but article pages do. In the future, we can fallback to the 109 // website's title by using the <title> tag. 110 if ogMetadata.Title == "" { 111 return preview, fmt.Errorf("missing required title in OpenGraph response") 112 } 113 114 if ogMetadata.ThumbnailURL != "" { 115 t, err := fetchImage(u.logger, u.httpClient, ogMetadata.ThumbnailURL, true) 116 if err != nil { 117 // Given we want to fetch thumbnails on a best-effort basis, if an error 118 // happens we simply log it. 119 u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err)) 120 } else { 121 preview.Thumbnail = t 122 } 123 } 124 125 preview.Title = ogMetadata.Title 126 preview.Description = ogMetadata.Description 127 128 return preview, nil 129 } 130 131 func fetchImage(logger *zap.Logger, httpClient *http.Client, url string, getDimensions bool) (common.LinkPreviewThumbnail, error) { 132 var thumbnail common.LinkPreviewThumbnail 133 134 imgBytes, err := fetchBody(logger, httpClient, url, nil) 135 if err != nil { 136 return thumbnail, fmt.Errorf("could not fetch thumbnail url='%s': %w", url, err) 137 } 138 if getDimensions { 139 width, height, err := images.GetImageDimensions(imgBytes) 140 if err != nil { 141 return thumbnail, fmt.Errorf("could not get image dimensions url='%s': %w", url, err) 142 } 143 thumbnail.Width = width 144 thumbnail.Height = height 145 } 146 dataURI, err := images.GetPayloadDataURI(imgBytes) 147 if err != nil { 148 return thumbnail, fmt.Errorf("could not build data URI url='%s': %w", url, err) 149 } 150 thumbnail.DataURI = dataURI 151 152 return thumbnail, nil 153 }