github.com/nhannv/mattermost-server@v5.11.1+incompatible/app/post_metadata.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See License.txt for license information.
     3  
     4  package app
     5  
     6  import (
     7  	"bytes"
     8  	"image"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/dyatlov/go-opengraph/opengraph"
    16  	"github.com/mattermost/mattermost-server/mlog"
    17  	"github.com/mattermost/mattermost-server/model"
    18  	"github.com/mattermost/mattermost-server/utils"
    19  	"github.com/mattermost/mattermost-server/utils/imgutils"
    20  	"github.com/mattermost/mattermost-server/utils/markdown"
    21  )
    22  
    23  const LINK_CACHE_SIZE = 10000
    24  const LINK_CACHE_DURATION = 3600
    25  const MaxMetadataImageSize = MaxOpenGraphResponseSize
    26  
    27  var linkCache = utils.NewLru(LINK_CACHE_SIZE)
    28  
    29  func (a *App) InitPostMetadata() {
    30  	// Dump any cached links if the proxy settings have changed so image URLs can be updated
    31  	a.AddConfigListener(func(before, after *model.Config) {
    32  		if (before.ImageProxySettings.Enable != after.ImageProxySettings.Enable) ||
    33  			(before.ImageProxySettings.ImageProxyType != after.ImageProxySettings.ImageProxyType) ||
    34  			(before.ImageProxySettings.RemoteImageProxyURL != after.ImageProxySettings.RemoteImageProxyURL) ||
    35  			(before.ImageProxySettings.RemoteImageProxyOptions != after.ImageProxySettings.RemoteImageProxyOptions) {
    36  			linkCache.Purge()
    37  		}
    38  	})
    39  }
    40  
    41  func (a *App) PreparePostListForClient(originalList *model.PostList) *model.PostList {
    42  	list := &model.PostList{
    43  		Posts: make(map[string]*model.Post, len(originalList.Posts)),
    44  		Order: originalList.Order, // Note that this uses the original Order array, so it isn't a deep copy
    45  	}
    46  
    47  	for id, originalPost := range originalList.Posts {
    48  		post := a.PreparePostForClient(originalPost, false)
    49  
    50  		list.Posts[id] = post
    51  	}
    52  
    53  	return list
    54  }
    55  
    56  func (a *App) PreparePostForClient(originalPost *model.Post, isNewPost bool) *model.Post {
    57  	post := originalPost.Clone()
    58  
    59  	// Proxy image links before constructing metadata so that requests go through the proxy
    60  	post = a.PostWithProxyAddedToImageURLs(post)
    61  
    62  	if *a.Config().ExperimentalSettings.DisablePostMetadata {
    63  		return post
    64  	}
    65  
    66  	post.Metadata = &model.PostMetadata{}
    67  
    68  	// Emojis and reaction counts
    69  	if emojis, reactions, err := a.getEmojisAndReactionsForPost(post); err != nil {
    70  		mlog.Warn("Failed to get emojis and reactions for a post", mlog.String("post_id", post.Id), mlog.Any("err", err))
    71  	} else {
    72  		post.Metadata.Emojis = emojis
    73  		post.Metadata.Reactions = reactions
    74  	}
    75  
    76  	// Files
    77  	if fileInfos, err := a.getFileMetadataForPost(post); err != nil {
    78  		mlog.Warn("Failed to get files for a post", mlog.String("post_id", post.Id), mlog.Any("err", err))
    79  	} else {
    80  		post.Metadata.Files = fileInfos
    81  	}
    82  
    83  	// Embeds and image dimensions
    84  	firstLink, images := getFirstLinkAndImages(post.Message)
    85  
    86  	if embed, err := a.getEmbedForPost(post, firstLink, isNewPost); err != nil {
    87  		mlog.Debug("Failed to get embedded content for a post", mlog.String("post_id", post.Id), mlog.Any("err", err))
    88  	} else if embed == nil {
    89  		post.Metadata.Embeds = []*model.PostEmbed{}
    90  	} else {
    91  		post.Metadata.Embeds = []*model.PostEmbed{embed}
    92  	}
    93  
    94  	post.Metadata.Images = a.getImagesForPost(post, images, isNewPost)
    95  
    96  	return post
    97  }
    98  
    99  func (a *App) getFileMetadataForPost(post *model.Post) ([]*model.FileInfo, *model.AppError) {
   100  	if len(post.FileIds) == 0 {
   101  		return nil, nil
   102  	}
   103  
   104  	return a.GetFileInfosForPost(post.Id)
   105  }
   106  
   107  func (a *App) getEmojisAndReactionsForPost(post *model.Post) ([]*model.Emoji, []*model.Reaction, *model.AppError) {
   108  	var reactions []*model.Reaction
   109  	if post.HasReactions {
   110  		var err *model.AppError
   111  		reactions, err = a.GetReactionsForPost(post.Id)
   112  		if err != nil {
   113  			return nil, nil, err
   114  		}
   115  	}
   116  
   117  	emojis, err := a.getCustomEmojisForPost(post, reactions)
   118  	if err != nil {
   119  		return nil, nil, err
   120  	}
   121  
   122  	return emojis, reactions, nil
   123  }
   124  
   125  func (a *App) getEmbedForPost(post *model.Post, firstLink string, isNewPost bool) (*model.PostEmbed, error) {
   126  	if _, ok := post.Props["attachments"]; ok {
   127  		return &model.PostEmbed{
   128  			Type: model.POST_EMBED_MESSAGE_ATTACHMENT,
   129  		}, nil
   130  	}
   131  
   132  	if firstLink == "" || !*a.Config().ServiceSettings.EnableLinkPreviews {
   133  		return nil, nil
   134  	}
   135  
   136  	og, image, err := a.getLinkMetadata(firstLink, post.CreateAt, isNewPost)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  
   141  	if og != nil {
   142  		return &model.PostEmbed{
   143  			Type: model.POST_EMBED_OPENGRAPH,
   144  			URL:  firstLink,
   145  			Data: og,
   146  		}, nil
   147  	}
   148  
   149  	if image != nil {
   150  		// Note that we're not passing the image info here since it'll be part of the PostMetadata.Images field
   151  		return &model.PostEmbed{
   152  			Type: model.POST_EMBED_IMAGE,
   153  			URL:  firstLink,
   154  		}, nil
   155  	}
   156  
   157  	return nil, nil
   158  }
   159  
   160  func (a *App) getImagesForPost(post *model.Post, imageURLs []string, isNewPost bool) map[string]*model.PostImage {
   161  	images := map[string]*model.PostImage{}
   162  
   163  	for _, embed := range post.Metadata.Embeds {
   164  		switch embed.Type {
   165  		case model.POST_EMBED_IMAGE:
   166  			// These dimensions will generally be cached by a previous call to getEmbedForPost
   167  			imageURLs = append(imageURLs, embed.URL)
   168  
   169  		case model.POST_EMBED_MESSAGE_ATTACHMENT:
   170  			imageURLs = append(imageURLs, getImagesInMessageAttachments(post)...)
   171  
   172  		case model.POST_EMBED_OPENGRAPH:
   173  			for _, image := range embed.Data.(*opengraph.OpenGraph).Images {
   174  				var imageURL string
   175  				if image.SecureURL != "" {
   176  					imageURL = image.SecureURL
   177  				} else if image.URL != "" {
   178  					imageURL = image.URL
   179  				}
   180  
   181  				if imageURL == "" {
   182  					continue
   183  				}
   184  
   185  				imageURLs = append(imageURLs, imageURL)
   186  			}
   187  		}
   188  	}
   189  
   190  	// Removing duplicates isn't strictly since images is a map, but it feels safer to do it beforehand
   191  	if len(imageURLs) > 1 {
   192  		imageURLs = model.RemoveDuplicateStrings(imageURLs)
   193  	}
   194  
   195  	for _, imageURL := range imageURLs {
   196  		if _, image, err := a.getLinkMetadata(imageURL, post.CreateAt, isNewPost); err != nil {
   197  			mlog.Debug("Failed to get dimensions of an image in a post",
   198  				mlog.String("post_id", post.Id), mlog.String("image_url", imageURL), mlog.Any("err", err))
   199  		} else if image != nil {
   200  			images[imageURL] = image
   201  		}
   202  	}
   203  
   204  	return images
   205  }
   206  
   207  func getEmojiNamesForString(s string) []string {
   208  	names := model.EMOJI_PATTERN.FindAllString(s, -1)
   209  
   210  	for i, name := range names {
   211  		names[i] = strings.Trim(name, ":")
   212  	}
   213  
   214  	return names
   215  }
   216  
   217  func getEmojiNamesForPost(post *model.Post, reactions []*model.Reaction) []string {
   218  	// Post message
   219  	names := getEmojiNamesForString(post.Message)
   220  
   221  	// Reactions
   222  	for _, reaction := range reactions {
   223  		names = append(names, reaction.EmojiName)
   224  	}
   225  
   226  	// Post attachments
   227  	for _, attachment := range post.Attachments() {
   228  		if attachment.Text != "" {
   229  			names = append(names, getEmojiNamesForString(attachment.Text)...)
   230  		}
   231  
   232  		if attachment.Pretext != "" {
   233  			names = append(names, getEmojiNamesForString(attachment.Pretext)...)
   234  		}
   235  
   236  		for _, field := range attachment.Fields {
   237  			if value, ok := field.Value.(string); ok {
   238  				names = append(names, getEmojiNamesForString(value)...)
   239  			}
   240  		}
   241  	}
   242  
   243  	// Remove duplicates
   244  	names = model.RemoveDuplicateStrings(names)
   245  
   246  	return names
   247  }
   248  
   249  func (a *App) getCustomEmojisForPost(post *model.Post, reactions []*model.Reaction) ([]*model.Emoji, *model.AppError) {
   250  	if !*a.Config().ServiceSettings.EnableCustomEmoji {
   251  		// Only custom emoji are returned
   252  		return []*model.Emoji{}, nil
   253  	}
   254  
   255  	names := getEmojiNamesForPost(post, reactions)
   256  
   257  	if len(names) == 0 {
   258  		return []*model.Emoji{}, nil
   259  	}
   260  
   261  	return a.GetMultipleEmojiByName(names)
   262  }
   263  
   264  // Given a string, returns the first autolinked URL in the string as well as an array of all Markdown
   265  // images of the form ![alt text](image url). Note that this does not return Markdown links of the
   266  // form [text](url).
   267  func getFirstLinkAndImages(str string) (string, []string) {
   268  	firstLink := ""
   269  	images := []string{}
   270  
   271  	markdown.Inspect(str, func(blockOrInline interface{}) bool {
   272  		switch v := blockOrInline.(type) {
   273  		case *markdown.Autolink:
   274  			if firstLink == "" {
   275  				firstLink = v.Destination()
   276  			}
   277  		case *markdown.InlineImage:
   278  			images = append(images, v.Destination())
   279  		case *markdown.ReferenceImage:
   280  			images = append(images, v.ReferenceDefinition.Destination())
   281  		}
   282  
   283  		return true
   284  	})
   285  
   286  	return firstLink, images
   287  }
   288  
   289  func getImagesInMessageAttachments(post *model.Post) []string {
   290  	var images []string
   291  
   292  	for _, attachment := range post.Attachments() {
   293  		_, imagesInText := getFirstLinkAndImages(attachment.Text)
   294  		images = append(images, imagesInText...)
   295  
   296  		_, imagesInPretext := getFirstLinkAndImages(attachment.Pretext)
   297  		images = append(images, imagesInPretext...)
   298  
   299  		for _, field := range attachment.Fields {
   300  			if value, ok := field.Value.(string); ok {
   301  				_, imagesInFieldValue := getFirstLinkAndImages(value)
   302  				images = append(images, imagesInFieldValue...)
   303  			}
   304  		}
   305  
   306  		if attachment.AuthorIcon != "" {
   307  			images = append(images, attachment.AuthorIcon)
   308  		}
   309  
   310  		if attachment.ImageURL != "" {
   311  			images = append(images, attachment.ImageURL)
   312  		}
   313  
   314  		if attachment.ThumbURL != "" {
   315  			images = append(images, attachment.ThumbURL)
   316  		}
   317  
   318  		if attachment.FooterIcon != "" {
   319  			images = append(images, attachment.FooterIcon)
   320  		}
   321  	}
   322  
   323  	return images
   324  }
   325  
   326  func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool) (*opengraph.OpenGraph, *model.PostImage, error) {
   327  	requestURL = resolveMetadataURL(requestURL, a.GetSiteURL())
   328  
   329  	timestamp = model.FloorToNearestHour(timestamp)
   330  
   331  	// Check cache
   332  	og, image, ok := getLinkMetadataFromCache(requestURL, timestamp)
   333  	if ok {
   334  		return og, image, nil
   335  	}
   336  
   337  	// Check the database if this isn't a new post. If it is a new post and the data is cached, it should be in memory.
   338  	if !isNewPost {
   339  		og, image, ok = a.getLinkMetadataFromDatabase(requestURL, timestamp)
   340  		if ok {
   341  			cacheLinkMetadata(requestURL, timestamp, og, image)
   342  
   343  			return og, image, nil
   344  		}
   345  	}
   346  
   347  	// Make request for a web page or an image
   348  	request, err := http.NewRequest("GET", requestURL, nil)
   349  	if err != nil {
   350  		return nil, nil, err
   351  	}
   352  
   353  	var body io.ReadCloser
   354  	var contentType string
   355  
   356  	if (request.URL.Scheme+"://"+request.URL.Host) == a.GetSiteURL() && request.URL.Path == "/api/v4/image" {
   357  		// /api/v4/image requires authentication, so bypass the API by hitting the proxy directly
   358  		body, contentType, err = a.ImageProxy.GetImageDirect(a.ImageProxy.GetUnproxiedImageURL(request.URL.String()))
   359  	} else {
   360  		request.Header.Add("Accept", "image/*, text/html")
   361  
   362  		client := a.HTTPService.MakeClient(false)
   363  		client.Timeout = time.Duration(*a.Config().ExperimentalSettings.LinkMetadataTimeoutMilliseconds) * time.Millisecond
   364  
   365  		var res *http.Response
   366  		res, err = client.Do(request)
   367  
   368  		if res != nil {
   369  			body = res.Body
   370  			contentType = res.Header.Get("Content-Type")
   371  		}
   372  	}
   373  
   374  	if body != nil {
   375  		defer body.Close()
   376  	}
   377  
   378  	if err == nil {
   379  		// Parse the data
   380  		og, image, err = a.parseLinkMetadata(requestURL, body, contentType)
   381  	}
   382  
   383  	// Write back to cache and database, even if there was an error and the results are nil
   384  	cacheLinkMetadata(requestURL, timestamp, og, image)
   385  
   386  	a.saveLinkMetadataToDatabase(requestURL, timestamp, og, image)
   387  
   388  	return og, image, err
   389  }
   390  
   391  // resolveMetadataURL resolves a given URL relative to the server's site URL.
   392  func resolveMetadataURL(requestURL string, siteURL string) string {
   393  	base, err := url.Parse(siteURL)
   394  	if err != nil {
   395  		return ""
   396  	}
   397  
   398  	resolved, err := base.Parse(requestURL)
   399  	if err != nil {
   400  		return ""
   401  	}
   402  
   403  	return resolved.String()
   404  }
   405  
   406  func getLinkMetadataFromCache(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) {
   407  	cached, ok := linkCache.Get(model.GenerateLinkMetadataHash(requestURL, timestamp))
   408  	if !ok {
   409  		return nil, nil, false
   410  	}
   411  
   412  	switch v := cached.(type) {
   413  	case *opengraph.OpenGraph:
   414  		return v, nil, true
   415  	case *model.PostImage:
   416  		return nil, v, true
   417  	default:
   418  		return nil, nil, true
   419  	}
   420  }
   421  
   422  func (a *App) getLinkMetadataFromDatabase(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) {
   423  	result := <-a.Srv.Store.LinkMetadata().Get(requestURL, timestamp)
   424  	if result.Err != nil {
   425  		return nil, nil, false
   426  	}
   427  
   428  	data := result.Data.(*model.LinkMetadata).Data
   429  
   430  	switch v := data.(type) {
   431  	case *opengraph.OpenGraph:
   432  		return v, nil, true
   433  	case *model.PostImage:
   434  		return nil, v, true
   435  	default:
   436  		return nil, nil, true
   437  	}
   438  }
   439  
   440  func (a *App) saveLinkMetadataToDatabase(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) {
   441  	metadata := &model.LinkMetadata{
   442  		URL:       requestURL,
   443  		Timestamp: timestamp,
   444  	}
   445  
   446  	if og != nil {
   447  		metadata.Type = model.LINK_METADATA_TYPE_OPENGRAPH
   448  		metadata.Data = og
   449  	} else if image != nil {
   450  		metadata.Type = model.LINK_METADATA_TYPE_IMAGE
   451  		metadata.Data = image
   452  	} else {
   453  		metadata.Type = model.LINK_METADATA_TYPE_NONE
   454  	}
   455  
   456  	result := <-a.Srv.Store.LinkMetadata().Save(metadata)
   457  	if result.Err != nil {
   458  		mlog.Warn("Failed to write link metadata", mlog.String("request_url", requestURL), mlog.Err(result.Err))
   459  	}
   460  }
   461  
   462  func cacheLinkMetadata(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) {
   463  	var val interface{}
   464  	if og != nil {
   465  		val = og
   466  	} else if image != nil {
   467  		val = image
   468  	}
   469  
   470  	linkCache.AddWithExpiresInSecs(model.GenerateLinkMetadataHash(requestURL, timestamp), val, LINK_CACHE_DURATION)
   471  }
   472  
   473  func (a *App) parseLinkMetadata(requestURL string, body io.Reader, contentType string) (*opengraph.OpenGraph, *model.PostImage, error) {
   474  	if strings.HasPrefix(contentType, "image") {
   475  		image, err := parseImages(io.LimitReader(body, MaxMetadataImageSize))
   476  		return nil, image, err
   477  	} else if strings.HasPrefix(contentType, "text/html") {
   478  		og := a.ParseOpenGraphMetadata(requestURL, body, contentType)
   479  
   480  		// The OpenGraph library and Go HTML library don't error for malformed input, so check that at least
   481  		// one of these required fields exists before returning the OpenGraph data
   482  		if og.Title != "" || og.Type != "" || og.URL != "" {
   483  			return og, nil, nil
   484  		} else {
   485  			return nil, nil, nil
   486  		}
   487  	} else {
   488  		// Not an image or web page with OpenGraph information
   489  		return nil, nil, nil
   490  	}
   491  }
   492  
   493  func parseImages(body io.Reader) (*model.PostImage, error) {
   494  	// Store any data that is read for the config for any further processing
   495  	buf := &bytes.Buffer{}
   496  	t := io.TeeReader(body, buf)
   497  
   498  	// Read the image config to get the format and dimensions
   499  	config, format, err := image.DecodeConfig(t)
   500  	if err != nil {
   501  		return nil, err
   502  	}
   503  
   504  	image := &model.PostImage{
   505  		Width:  config.Width,
   506  		Height: config.Height,
   507  		Format: format,
   508  	}
   509  
   510  	if format == "gif" {
   511  		// Decoding the config may have read some of the image data, so re-read the data that has already been read first
   512  		frameCount, err := imgutils.CountFrames(io.MultiReader(buf, body))
   513  		if err != nil {
   514  			return nil, err
   515  		}
   516  
   517  		image.FrameCount = frameCount
   518  	}
   519  
   520  	return image, nil
   521  }