github.com/mattermosttest/mattermost-server/v5@v5.0.0-20200917143240-9dfa12e121f9/app/post_metadata.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See LICENSE.txt for license information.
     3  
     4  package app
     5  
     6  import (
     7  	"bytes"
     8  	"image"
     9  	"io"
    10  	"io/ioutil"
    11  	"net/http"
    12  	"net/url"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/dyatlov/go-opengraph/opengraph"
    18  	"github.com/mattermost/mattermost-server/v5/mlog"
    19  	"github.com/mattermost/mattermost-server/v5/model"
    20  	"github.com/mattermost/mattermost-server/v5/services/cache"
    21  	"github.com/mattermost/mattermost-server/v5/utils/imgutils"
    22  	"github.com/mattermost/mattermost-server/v5/utils/markdown"
    23  )
    24  
    25  type linkMetadataCache struct {
    26  	OpenGraph *opengraph.OpenGraph
    27  	PostImage *model.PostImage
    28  }
    29  
    30  const LINK_CACHE_SIZE = 10000
    31  const LINK_CACHE_DURATION = 1 * time.Hour
    32  const MaxMetadataImageSize = MaxOpenGraphResponseSize
    33  
    34  var linkCache = cache.NewLRU(&cache.LRUOptions{
    35  	Size: LINK_CACHE_SIZE,
    36  })
    37  
    38  func (a *App) InitPostMetadata() {
    39  	// Dump any cached links if the proxy settings have changed so image URLs can be updated
    40  	a.AddConfigListener(func(before, after *model.Config) {
    41  		if (before.ImageProxySettings.Enable != after.ImageProxySettings.Enable) ||
    42  			(before.ImageProxySettings.ImageProxyType != after.ImageProxySettings.ImageProxyType) ||
    43  			(before.ImageProxySettings.RemoteImageProxyURL != after.ImageProxySettings.RemoteImageProxyURL) ||
    44  			(before.ImageProxySettings.RemoteImageProxyOptions != after.ImageProxySettings.RemoteImageProxyOptions) {
    45  			linkCache.Purge()
    46  		}
    47  	})
    48  }
    49  
    50  func (a *App) PreparePostListForClient(originalList *model.PostList) *model.PostList {
    51  	list := &model.PostList{
    52  		Posts:      make(map[string]*model.Post, len(originalList.Posts)),
    53  		Order:      originalList.Order,
    54  		NextPostId: originalList.NextPostId,
    55  		PrevPostId: originalList.PrevPostId,
    56  	}
    57  
    58  	for id, originalPost := range originalList.Posts {
    59  		post := a.PreparePostForClient(originalPost, false, false)
    60  
    61  		list.Posts[id] = post
    62  	}
    63  
    64  	return list
    65  }
    66  
    67  // OverrideIconURLIfEmoji changes the post icon override URL prop, if it has an emoji icon,
    68  // so that it points to the URL (relative) of the emoji - static if emoji is default, /api if custom.
    69  func (a *App) OverrideIconURLIfEmoji(post *model.Post) {
    70  	prop, ok := post.GetProps()[model.POST_PROPS_OVERRIDE_ICON_EMOJI]
    71  	if !ok || prop == nil {
    72  		return
    73  	}
    74  	emojiName := prop.(string)
    75  
    76  	if !*a.Config().ServiceSettings.EnablePostIconOverride || emojiName == "" {
    77  		return
    78  	}
    79  
    80  	if emojiUrl, err := a.GetEmojiStaticUrl(emojiName); err == nil {
    81  		post.AddProp(model.POST_PROPS_OVERRIDE_ICON_URL, emojiUrl)
    82  	} else {
    83  		mlog.Warn("Failed to retrieve URL for overridden profile icon (emoji)", mlog.String("emojiName", emojiName), mlog.Err(err))
    84  	}
    85  }
    86  
    87  func (a *App) PreparePostForClient(originalPost *model.Post, isNewPost bool, isEditPost bool) *model.Post {
    88  	post := originalPost.Clone()
    89  
    90  	// Proxy image links before constructing metadata so that requests go through the proxy
    91  	post = a.PostWithProxyAddedToImageURLs(post)
    92  
    93  	a.OverrideIconURLIfEmoji(post)
    94  
    95  	post.Metadata = &model.PostMetadata{}
    96  
    97  	if post.DeleteAt > 0 {
    98  		// For deleted posts we don't fill out metadata nor do we return the post content
    99  		post.Message = ""
   100  		return post
   101  	}
   102  
   103  	// Emojis and reaction counts
   104  	if emojis, reactions, err := a.getEmojisAndReactionsForPost(post); err != nil {
   105  		mlog.Warn("Failed to get emojis and reactions for a post", mlog.String("post_id", post.Id), mlog.Err(err))
   106  	} else {
   107  		post.Metadata.Emojis = emojis
   108  		post.Metadata.Reactions = reactions
   109  	}
   110  
   111  	// Files
   112  	if fileInfos, err := a.getFileMetadataForPost(post, isNewPost || isEditPost); err != nil {
   113  		mlog.Warn("Failed to get files for a post", mlog.String("post_id", post.Id), mlog.Err(err))
   114  	} else {
   115  		post.Metadata.Files = fileInfos
   116  	}
   117  
   118  	// Embeds and image dimensions
   119  	firstLink, images := getFirstLinkAndImages(post.Message)
   120  
   121  	if embed, err := a.getEmbedForPost(post, firstLink, isNewPost); err != nil {
   122  		mlog.Debug("Failed to get embedded content for a post", mlog.String("post_id", post.Id), mlog.Err(err))
   123  	} else if embed == nil {
   124  		post.Metadata.Embeds = []*model.PostEmbed{}
   125  	} else {
   126  		post.Metadata.Embeds = []*model.PostEmbed{embed}
   127  	}
   128  
   129  	post.Metadata.Images = a.getImagesForPost(post, images, isNewPost)
   130  
   131  	return post
   132  }
   133  
   134  func (a *App) getFileMetadataForPost(post *model.Post, fromMaster bool) ([]*model.FileInfo, *model.AppError) {
   135  	if len(post.FileIds) == 0 {
   136  		return nil, nil
   137  	}
   138  
   139  	return a.GetFileInfosForPost(post.Id, fromMaster)
   140  }
   141  
   142  func (a *App) getEmojisAndReactionsForPost(post *model.Post) ([]*model.Emoji, []*model.Reaction, *model.AppError) {
   143  	var reactions []*model.Reaction
   144  	if post.HasReactions {
   145  		var err *model.AppError
   146  		reactions, err = a.GetReactionsForPost(post.Id)
   147  		if err != nil {
   148  			return nil, nil, err
   149  		}
   150  	}
   151  
   152  	emojis, err := a.getCustomEmojisForPost(post, reactions)
   153  	if err != nil {
   154  		return nil, nil, err
   155  	}
   156  
   157  	return emojis, reactions, nil
   158  }
   159  
   160  func (a *App) getEmbedForPost(post *model.Post, firstLink string, isNewPost bool) (*model.PostEmbed, error) {
   161  	if _, ok := post.GetProps()["attachments"]; ok {
   162  		return &model.PostEmbed{
   163  			Type: model.POST_EMBED_MESSAGE_ATTACHMENT,
   164  		}, nil
   165  	}
   166  
   167  	if firstLink == "" || !*a.Config().ServiceSettings.EnableLinkPreviews {
   168  		return nil, nil
   169  	}
   170  
   171  	og, image, err := a.getLinkMetadata(firstLink, post.CreateAt, isNewPost)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	if og != nil {
   177  		return &model.PostEmbed{
   178  			Type: model.POST_EMBED_OPENGRAPH,
   179  			URL:  firstLink,
   180  			Data: og,
   181  		}, nil
   182  	}
   183  
   184  	if image != nil {
   185  		// Note that we're not passing the image info here since it'll be part of the PostMetadata.Images field
   186  		return &model.PostEmbed{
   187  			Type: model.POST_EMBED_IMAGE,
   188  			URL:  firstLink,
   189  		}, nil
   190  	}
   191  
   192  	return &model.PostEmbed{
   193  		Type: model.POST_EMBED_LINK,
   194  		URL:  firstLink,
   195  	}, nil
   196  }
   197  
   198  func (a *App) getImagesForPost(post *model.Post, imageURLs []string, isNewPost bool) map[string]*model.PostImage {
   199  	images := map[string]*model.PostImage{}
   200  
   201  	for _, embed := range post.Metadata.Embeds {
   202  		switch embed.Type {
   203  		case model.POST_EMBED_IMAGE:
   204  			// These dimensions will generally be cached by a previous call to getEmbedForPost
   205  			imageURLs = append(imageURLs, embed.URL)
   206  
   207  		case model.POST_EMBED_MESSAGE_ATTACHMENT:
   208  			imageURLs = append(imageURLs, getImagesInMessageAttachments(post)...)
   209  
   210  		case model.POST_EMBED_OPENGRAPH:
   211  			for _, image := range embed.Data.(*opengraph.OpenGraph).Images {
   212  				var imageURL string
   213  				if image.SecureURL != "" {
   214  					imageURL = image.SecureURL
   215  				} else if image.URL != "" {
   216  					imageURL = image.URL
   217  				}
   218  
   219  				if imageURL == "" {
   220  					continue
   221  				}
   222  
   223  				imageURLs = append(imageURLs, imageURL)
   224  			}
   225  		}
   226  	}
   227  
   228  	// Removing duplicates isn't strictly since images is a map, but it feels safer to do it beforehand
   229  	if len(imageURLs) > 1 {
   230  		imageURLs = model.RemoveDuplicateStrings(imageURLs)
   231  	}
   232  
   233  	for _, imageURL := range imageURLs {
   234  		if _, image, err := a.getLinkMetadata(imageURL, post.CreateAt, isNewPost); err != nil {
   235  			mlog.Debug("Failed to get dimensions of an image in a post",
   236  				mlog.String("post_id", post.Id), mlog.String("image_url", imageURL), mlog.Err(err))
   237  		} else if image != nil {
   238  			images[imageURL] = image
   239  		}
   240  	}
   241  
   242  	return images
   243  }
   244  
   245  func getEmojiNamesForString(s string) []string {
   246  	names := model.EMOJI_PATTERN.FindAllString(s, -1)
   247  
   248  	for i, name := range names {
   249  		names[i] = strings.Trim(name, ":")
   250  	}
   251  
   252  	return names
   253  }
   254  
   255  func getEmojiNamesForPost(post *model.Post, reactions []*model.Reaction) []string {
   256  	// Post message
   257  	names := getEmojiNamesForString(post.Message)
   258  
   259  	// Reactions
   260  	for _, reaction := range reactions {
   261  		names = append(names, reaction.EmojiName)
   262  	}
   263  
   264  	// Post attachments
   265  	for _, attachment := range post.Attachments() {
   266  		if attachment.Text != "" {
   267  			names = append(names, getEmojiNamesForString(attachment.Text)...)
   268  		}
   269  
   270  		if attachment.Pretext != "" {
   271  			names = append(names, getEmojiNamesForString(attachment.Pretext)...)
   272  		}
   273  
   274  		for _, field := range attachment.Fields {
   275  			if value, ok := field.Value.(string); ok {
   276  				names = append(names, getEmojiNamesForString(value)...)
   277  			}
   278  		}
   279  	}
   280  
   281  	// Remove duplicates
   282  	names = model.RemoveDuplicateStrings(names)
   283  
   284  	return names
   285  }
   286  
   287  func (a *App) getCustomEmojisForPost(post *model.Post, reactions []*model.Reaction) ([]*model.Emoji, *model.AppError) {
   288  	if !*a.Config().ServiceSettings.EnableCustomEmoji {
   289  		// Only custom emoji are returned
   290  		return []*model.Emoji{}, nil
   291  	}
   292  
   293  	names := getEmojiNamesForPost(post, reactions)
   294  
   295  	if len(names) == 0 {
   296  		return []*model.Emoji{}, nil
   297  	}
   298  
   299  	return a.GetMultipleEmojiByName(names)
   300  }
   301  
   302  // Given a string, returns the first autolinked URL in the string as well as an array of all Markdown
   303  // images of the form ![alt text](image url). Note that this does not return Markdown links of the
   304  // form [text](url).
   305  func getFirstLinkAndImages(str string) (string, []string) {
   306  	firstLink := ""
   307  	images := []string{}
   308  
   309  	markdown.Inspect(str, func(blockOrInline interface{}) bool {
   310  		switch v := blockOrInline.(type) {
   311  		case *markdown.Autolink:
   312  			if firstLink == "" {
   313  				firstLink = v.Destination()
   314  			}
   315  		case *markdown.InlineImage:
   316  			images = append(images, v.Destination())
   317  		case *markdown.ReferenceImage:
   318  			images = append(images, v.ReferenceDefinition.Destination())
   319  		}
   320  
   321  		return true
   322  	})
   323  
   324  	return firstLink, images
   325  }
   326  
   327  func getImagesInMessageAttachments(post *model.Post) []string {
   328  	var images []string
   329  
   330  	for _, attachment := range post.Attachments() {
   331  		_, imagesInText := getFirstLinkAndImages(attachment.Text)
   332  		images = append(images, imagesInText...)
   333  
   334  		_, imagesInPretext := getFirstLinkAndImages(attachment.Pretext)
   335  		images = append(images, imagesInPretext...)
   336  
   337  		for _, field := range attachment.Fields {
   338  			if value, ok := field.Value.(string); ok {
   339  				_, imagesInFieldValue := getFirstLinkAndImages(value)
   340  				images = append(images, imagesInFieldValue...)
   341  			}
   342  		}
   343  
   344  		if attachment.AuthorIcon != "" {
   345  			images = append(images, attachment.AuthorIcon)
   346  		}
   347  
   348  		if attachment.ImageURL != "" {
   349  			images = append(images, attachment.ImageURL)
   350  		}
   351  
   352  		if attachment.ThumbURL != "" {
   353  			images = append(images, attachment.ThumbURL)
   354  		}
   355  
   356  		if attachment.FooterIcon != "" {
   357  			images = append(images, attachment.FooterIcon)
   358  		}
   359  	}
   360  
   361  	return images
   362  }
   363  
   364  func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool) (*opengraph.OpenGraph, *model.PostImage, error) {
   365  	requestURL = resolveMetadataURL(requestURL, a.GetSiteURL())
   366  
   367  	timestamp = model.FloorToNearestHour(timestamp)
   368  
   369  	// Check cache
   370  	og, image, ok := getLinkMetadataFromCache(requestURL, timestamp)
   371  	if ok {
   372  		return og, image, nil
   373  	}
   374  
   375  	// Check the database if this isn't a new post. If it is a new post and the data is cached, it should be in memory.
   376  	if !isNewPost {
   377  		og, image, ok = a.getLinkMetadataFromDatabase(requestURL, timestamp)
   378  		if ok {
   379  			cacheLinkMetadata(requestURL, timestamp, og, image)
   380  
   381  			return og, image, nil
   382  		}
   383  	}
   384  
   385  	// Make request for a web page or an image
   386  	request, err := http.NewRequest("GET", requestURL, nil)
   387  	if err != nil {
   388  		return nil, nil, err
   389  	}
   390  
   391  	var body io.ReadCloser
   392  	var contentType string
   393  
   394  	if (request.URL.Scheme+"://"+request.URL.Host) == a.GetSiteURL() && request.URL.Path == "/api/v4/image" {
   395  		// /api/v4/image requires authentication, so bypass the API by hitting the proxy directly
   396  		body, contentType, err = a.ImageProxy().GetImageDirect(a.ImageProxy().GetUnproxiedImageURL(request.URL.String()))
   397  	} else {
   398  		request.Header.Add("Accept", "image/*")
   399  		request.Header.Add("Accept", "text/html;q=0.8")
   400  
   401  		client := a.HTTPService().MakeClient(false)
   402  		client.Timeout = time.Duration(*a.Config().ExperimentalSettings.LinkMetadataTimeoutMilliseconds) * time.Millisecond
   403  
   404  		var res *http.Response
   405  		res, err = client.Do(request)
   406  
   407  		if res != nil {
   408  			body = res.Body
   409  			contentType = res.Header.Get("Content-Type")
   410  		}
   411  	}
   412  
   413  	if body != nil {
   414  		defer func() {
   415  			io.Copy(ioutil.Discard, body)
   416  			body.Close()
   417  		}()
   418  	}
   419  
   420  	if err == nil {
   421  		// Parse the data
   422  		og, image, err = a.parseLinkMetadata(requestURL, body, contentType)
   423  	}
   424  	og = model.TruncateOpenGraph(og) // remove unwanted length of texts
   425  
   426  	// Write back to cache and database, even if there was an error and the results are nil
   427  	cacheLinkMetadata(requestURL, timestamp, og, image)
   428  
   429  	a.saveLinkMetadataToDatabase(requestURL, timestamp, og, image)
   430  
   431  	return og, image, err
   432  }
   433  
   434  // resolveMetadataURL resolves a given URL relative to the server's site URL.
   435  func resolveMetadataURL(requestURL string, siteURL string) string {
   436  	base, err := url.Parse(siteURL)
   437  	if err != nil {
   438  		return ""
   439  	}
   440  
   441  	resolved, err := base.Parse(requestURL)
   442  	if err != nil {
   443  		return ""
   444  	}
   445  
   446  	return resolved.String()
   447  }
   448  
   449  func getLinkMetadataFromCache(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) {
   450  	var cached linkMetadataCache
   451  	err := linkCache.Get(strconv.FormatInt(model.GenerateLinkMetadataHash(requestURL, timestamp), 16), &cached)
   452  	if err != nil {
   453  		return nil, nil, false
   454  	}
   455  
   456  	return cached.OpenGraph, cached.PostImage, true
   457  }
   458  
   459  func (a *App) getLinkMetadataFromDatabase(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) {
   460  	linkMetadata, err := a.Srv().Store.LinkMetadata().Get(requestURL, timestamp)
   461  	if err != nil {
   462  		return nil, nil, false
   463  	}
   464  
   465  	data := linkMetadata.Data
   466  
   467  	switch v := data.(type) {
   468  	case *opengraph.OpenGraph:
   469  		return v, nil, true
   470  	case *model.PostImage:
   471  		return nil, v, true
   472  	default:
   473  		return nil, nil, true
   474  	}
   475  }
   476  
   477  func (a *App) saveLinkMetadataToDatabase(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) {
   478  	metadata := &model.LinkMetadata{
   479  		URL:       requestURL,
   480  		Timestamp: timestamp,
   481  	}
   482  
   483  	if og != nil {
   484  		metadata.Type = model.LINK_METADATA_TYPE_OPENGRAPH
   485  		metadata.Data = og
   486  	} else if image != nil {
   487  		metadata.Type = model.LINK_METADATA_TYPE_IMAGE
   488  		metadata.Data = image
   489  	} else {
   490  		metadata.Type = model.LINK_METADATA_TYPE_NONE
   491  	}
   492  
   493  	_, err := a.Srv().Store.LinkMetadata().Save(metadata)
   494  	if err != nil {
   495  		mlog.Warn("Failed to write link metadata", mlog.String("request_url", requestURL), mlog.Err(err))
   496  	}
   497  }
   498  
   499  func cacheLinkMetadata(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) {
   500  	metadata := linkMetadataCache{
   501  		OpenGraph: og,
   502  		PostImage: image,
   503  	}
   504  
   505  	linkCache.SetWithExpiry(strconv.FormatInt(model.GenerateLinkMetadataHash(requestURL, timestamp), 16), metadata, LINK_CACHE_DURATION)
   506  }
   507  
   508  func (a *App) parseLinkMetadata(requestURL string, body io.Reader, contentType string) (*opengraph.OpenGraph, *model.PostImage, error) {
   509  	if contentType == "image/svg+xml" {
   510  		image := &model.PostImage{
   511  			Format: "svg",
   512  		}
   513  
   514  		return nil, image, nil
   515  	} else if strings.HasPrefix(contentType, "image") {
   516  		image, err := parseImages(io.LimitReader(body, MaxMetadataImageSize))
   517  		return nil, image, err
   518  	} else if strings.HasPrefix(contentType, "text/html") {
   519  		og := a.parseOpenGraphMetadata(requestURL, body, contentType)
   520  
   521  		// The OpenGraph library and Go HTML library don't error for malformed input, so check that at least
   522  		// one of these required fields exists before returning the OpenGraph data
   523  		if og.Title != "" || og.Type != "" || og.URL != "" {
   524  			return og, nil, nil
   525  		} else {
   526  			return nil, nil, nil
   527  		}
   528  	} else {
   529  		// Not an image or web page with OpenGraph information
   530  		return nil, nil, nil
   531  	}
   532  }
   533  
   534  func parseImages(body io.Reader) (*model.PostImage, error) {
   535  	// Store any data that is read for the config for any further processing
   536  	buf := &bytes.Buffer{}
   537  	t := io.TeeReader(body, buf)
   538  
   539  	// Read the image config to get the format and dimensions
   540  	config, format, err := image.DecodeConfig(t)
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  
   545  	image := &model.PostImage{
   546  		Width:  config.Width,
   547  		Height: config.Height,
   548  		Format: format,
   549  	}
   550  
   551  	if format == "gif" {
   552  		// Decoding the config may have read some of the image data, so re-read the data that has already been read first
   553  		frameCount, err := imgutils.CountFrames(io.MultiReader(buf, body))
   554  		if err != nil {
   555  			return nil, err
   556  		}
   557  
   558  		image.FrameCount = frameCount
   559  	}
   560  
   561  	// Make image information nil when the format is tiff
   562  	if format == "tiff" {
   563  		image = nil
   564  	}
   565  
   566  	return image, nil
   567  }