github.com/nhannv/mattermost-server@v5.11.1+incompatible/app/post_metadata.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See License.txt for license information. 3 4 package app 5 6 import ( 7 "bytes" 8 "image" 9 "io" 10 "net/http" 11 "net/url" 12 "strings" 13 "time" 14 15 "github.com/dyatlov/go-opengraph/opengraph" 16 "github.com/mattermost/mattermost-server/mlog" 17 "github.com/mattermost/mattermost-server/model" 18 "github.com/mattermost/mattermost-server/utils" 19 "github.com/mattermost/mattermost-server/utils/imgutils" 20 "github.com/mattermost/mattermost-server/utils/markdown" 21 ) 22 23 const LINK_CACHE_SIZE = 10000 24 const LINK_CACHE_DURATION = 3600 25 const MaxMetadataImageSize = MaxOpenGraphResponseSize 26 27 var linkCache = utils.NewLru(LINK_CACHE_SIZE) 28 29 func (a *App) InitPostMetadata() { 30 // Dump any cached links if the proxy settings have changed so image URLs can be updated 31 a.AddConfigListener(func(before, after *model.Config) { 32 if (before.ImageProxySettings.Enable != after.ImageProxySettings.Enable) || 33 (before.ImageProxySettings.ImageProxyType != after.ImageProxySettings.ImageProxyType) || 34 (before.ImageProxySettings.RemoteImageProxyURL != after.ImageProxySettings.RemoteImageProxyURL) || 35 (before.ImageProxySettings.RemoteImageProxyOptions != after.ImageProxySettings.RemoteImageProxyOptions) { 36 linkCache.Purge() 37 } 38 }) 39 } 40 41 func (a *App) PreparePostListForClient(originalList *model.PostList) *model.PostList { 42 list := &model.PostList{ 43 Posts: make(map[string]*model.Post, len(originalList.Posts)), 44 Order: originalList.Order, // Note that this uses the original Order array, so it isn't a deep copy 45 } 46 47 for id, originalPost := range originalList.Posts { 48 post := a.PreparePostForClient(originalPost, false) 49 50 list.Posts[id] = post 51 } 52 53 return list 54 } 55 56 func (a *App) PreparePostForClient(originalPost *model.Post, isNewPost bool) *model.Post { 57 post := originalPost.Clone() 58 59 // Proxy image links before constructing metadata so that requests go through the proxy 60 post = a.PostWithProxyAddedToImageURLs(post) 61 62 if *a.Config().ExperimentalSettings.DisablePostMetadata { 63 return post 64 } 65 66 post.Metadata = &model.PostMetadata{} 67 68 // Emojis and reaction counts 69 if emojis, reactions, err := a.getEmojisAndReactionsForPost(post); err != nil { 70 mlog.Warn("Failed to get emojis and reactions for a post", mlog.String("post_id", post.Id), mlog.Any("err", err)) 71 } else { 72 post.Metadata.Emojis = emojis 73 post.Metadata.Reactions = reactions 74 } 75 76 // Files 77 if fileInfos, err := a.getFileMetadataForPost(post); err != nil { 78 mlog.Warn("Failed to get files for a post", mlog.String("post_id", post.Id), mlog.Any("err", err)) 79 } else { 80 post.Metadata.Files = fileInfos 81 } 82 83 // Embeds and image dimensions 84 firstLink, images := getFirstLinkAndImages(post.Message) 85 86 if embed, err := a.getEmbedForPost(post, firstLink, isNewPost); err != nil { 87 mlog.Debug("Failed to get embedded content for a post", mlog.String("post_id", post.Id), mlog.Any("err", err)) 88 } else if embed == nil { 89 post.Metadata.Embeds = []*model.PostEmbed{} 90 } else { 91 post.Metadata.Embeds = []*model.PostEmbed{embed} 92 } 93 94 post.Metadata.Images = a.getImagesForPost(post, images, isNewPost) 95 96 return post 97 } 98 99 func (a *App) getFileMetadataForPost(post *model.Post) ([]*model.FileInfo, *model.AppError) { 100 if len(post.FileIds) == 0 { 101 return nil, nil 102 } 103 104 return a.GetFileInfosForPost(post.Id) 105 } 106 107 func (a *App) getEmojisAndReactionsForPost(post *model.Post) ([]*model.Emoji, []*model.Reaction, *model.AppError) { 108 var reactions []*model.Reaction 109 if post.HasReactions { 110 var err *model.AppError 111 reactions, err = a.GetReactionsForPost(post.Id) 112 if err != nil { 113 return nil, nil, err 114 } 115 } 116 117 emojis, err := a.getCustomEmojisForPost(post, reactions) 118 if err != nil { 119 return nil, nil, err 120 } 121 122 return emojis, reactions, nil 123 } 124 125 func (a *App) getEmbedForPost(post *model.Post, firstLink string, isNewPost bool) (*model.PostEmbed, error) { 126 if _, ok := post.Props["attachments"]; ok { 127 return &model.PostEmbed{ 128 Type: model.POST_EMBED_MESSAGE_ATTACHMENT, 129 }, nil 130 } 131 132 if firstLink == "" || !*a.Config().ServiceSettings.EnableLinkPreviews { 133 return nil, nil 134 } 135 136 og, image, err := a.getLinkMetadata(firstLink, post.CreateAt, isNewPost) 137 if err != nil { 138 return nil, err 139 } 140 141 if og != nil { 142 return &model.PostEmbed{ 143 Type: model.POST_EMBED_OPENGRAPH, 144 URL: firstLink, 145 Data: og, 146 }, nil 147 } 148 149 if image != nil { 150 // Note that we're not passing the image info here since it'll be part of the PostMetadata.Images field 151 return &model.PostEmbed{ 152 Type: model.POST_EMBED_IMAGE, 153 URL: firstLink, 154 }, nil 155 } 156 157 return nil, nil 158 } 159 160 func (a *App) getImagesForPost(post *model.Post, imageURLs []string, isNewPost bool) map[string]*model.PostImage { 161 images := map[string]*model.PostImage{} 162 163 for _, embed := range post.Metadata.Embeds { 164 switch embed.Type { 165 case model.POST_EMBED_IMAGE: 166 // These dimensions will generally be cached by a previous call to getEmbedForPost 167 imageURLs = append(imageURLs, embed.URL) 168 169 case model.POST_EMBED_MESSAGE_ATTACHMENT: 170 imageURLs = append(imageURLs, getImagesInMessageAttachments(post)...) 171 172 case model.POST_EMBED_OPENGRAPH: 173 for _, image := range embed.Data.(*opengraph.OpenGraph).Images { 174 var imageURL string 175 if image.SecureURL != "" { 176 imageURL = image.SecureURL 177 } else if image.URL != "" { 178 imageURL = image.URL 179 } 180 181 if imageURL == "" { 182 continue 183 } 184 185 imageURLs = append(imageURLs, imageURL) 186 } 187 } 188 } 189 190 // Removing duplicates isn't strictly since images is a map, but it feels safer to do it beforehand 191 if len(imageURLs) > 1 { 192 imageURLs = model.RemoveDuplicateStrings(imageURLs) 193 } 194 195 for _, imageURL := range imageURLs { 196 if _, image, err := a.getLinkMetadata(imageURL, post.CreateAt, isNewPost); err != nil { 197 mlog.Debug("Failed to get dimensions of an image in a post", 198 mlog.String("post_id", post.Id), mlog.String("image_url", imageURL), mlog.Any("err", err)) 199 } else if image != nil { 200 images[imageURL] = image 201 } 202 } 203 204 return images 205 } 206 207 func getEmojiNamesForString(s string) []string { 208 names := model.EMOJI_PATTERN.FindAllString(s, -1) 209 210 for i, name := range names { 211 names[i] = strings.Trim(name, ":") 212 } 213 214 return names 215 } 216 217 func getEmojiNamesForPost(post *model.Post, reactions []*model.Reaction) []string { 218 // Post message 219 names := getEmojiNamesForString(post.Message) 220 221 // Reactions 222 for _, reaction := range reactions { 223 names = append(names, reaction.EmojiName) 224 } 225 226 // Post attachments 227 for _, attachment := range post.Attachments() { 228 if attachment.Text != "" { 229 names = append(names, getEmojiNamesForString(attachment.Text)...) 230 } 231 232 if attachment.Pretext != "" { 233 names = append(names, getEmojiNamesForString(attachment.Pretext)...) 234 } 235 236 for _, field := range attachment.Fields { 237 if value, ok := field.Value.(string); ok { 238 names = append(names, getEmojiNamesForString(value)...) 239 } 240 } 241 } 242 243 // Remove duplicates 244 names = model.RemoveDuplicateStrings(names) 245 246 return names 247 } 248 249 func (a *App) getCustomEmojisForPost(post *model.Post, reactions []*model.Reaction) ([]*model.Emoji, *model.AppError) { 250 if !*a.Config().ServiceSettings.EnableCustomEmoji { 251 // Only custom emoji are returned 252 return []*model.Emoji{}, nil 253 } 254 255 names := getEmojiNamesForPost(post, reactions) 256 257 if len(names) == 0 { 258 return []*model.Emoji{}, nil 259 } 260 261 return a.GetMultipleEmojiByName(names) 262 } 263 264 // Given a string, returns the first autolinked URL in the string as well as an array of all Markdown 265 // images of the form ![alt text](image url). Note that this does not return Markdown links of the 266 // form [text](url). 267 func getFirstLinkAndImages(str string) (string, []string) { 268 firstLink := "" 269 images := []string{} 270 271 markdown.Inspect(str, func(blockOrInline interface{}) bool { 272 switch v := blockOrInline.(type) { 273 case *markdown.Autolink: 274 if firstLink == "" { 275 firstLink = v.Destination() 276 } 277 case *markdown.InlineImage: 278 images = append(images, v.Destination()) 279 case *markdown.ReferenceImage: 280 images = append(images, v.ReferenceDefinition.Destination()) 281 } 282 283 return true 284 }) 285 286 return firstLink, images 287 } 288 289 func getImagesInMessageAttachments(post *model.Post) []string { 290 var images []string 291 292 for _, attachment := range post.Attachments() { 293 _, imagesInText := getFirstLinkAndImages(attachment.Text) 294 images = append(images, imagesInText...) 295 296 _, imagesInPretext := getFirstLinkAndImages(attachment.Pretext) 297 images = append(images, imagesInPretext...) 298 299 for _, field := range attachment.Fields { 300 if value, ok := field.Value.(string); ok { 301 _, imagesInFieldValue := getFirstLinkAndImages(value) 302 images = append(images, imagesInFieldValue...) 303 } 304 } 305 306 if attachment.AuthorIcon != "" { 307 images = append(images, attachment.AuthorIcon) 308 } 309 310 if attachment.ImageURL != "" { 311 images = append(images, attachment.ImageURL) 312 } 313 314 if attachment.ThumbURL != "" { 315 images = append(images, attachment.ThumbURL) 316 } 317 318 if attachment.FooterIcon != "" { 319 images = append(images, attachment.FooterIcon) 320 } 321 } 322 323 return images 324 } 325 326 func (a *App) getLinkMetadata(requestURL string, timestamp int64, isNewPost bool) (*opengraph.OpenGraph, *model.PostImage, error) { 327 requestURL = resolveMetadataURL(requestURL, a.GetSiteURL()) 328 329 timestamp = model.FloorToNearestHour(timestamp) 330 331 // Check cache 332 og, image, ok := getLinkMetadataFromCache(requestURL, timestamp) 333 if ok { 334 return og, image, nil 335 } 336 337 // Check the database if this isn't a new post. If it is a new post and the data is cached, it should be in memory. 338 if !isNewPost { 339 og, image, ok = a.getLinkMetadataFromDatabase(requestURL, timestamp) 340 if ok { 341 cacheLinkMetadata(requestURL, timestamp, og, image) 342 343 return og, image, nil 344 } 345 } 346 347 // Make request for a web page or an image 348 request, err := http.NewRequest("GET", requestURL, nil) 349 if err != nil { 350 return nil, nil, err 351 } 352 353 var body io.ReadCloser 354 var contentType string 355 356 if (request.URL.Scheme+"://"+request.URL.Host) == a.GetSiteURL() && request.URL.Path == "/api/v4/image" { 357 // /api/v4/image requires authentication, so bypass the API by hitting the proxy directly 358 body, contentType, err = a.ImageProxy.GetImageDirect(a.ImageProxy.GetUnproxiedImageURL(request.URL.String())) 359 } else { 360 request.Header.Add("Accept", "image/*, text/html") 361 362 client := a.HTTPService.MakeClient(false) 363 client.Timeout = time.Duration(*a.Config().ExperimentalSettings.LinkMetadataTimeoutMilliseconds) * time.Millisecond 364 365 var res *http.Response 366 res, err = client.Do(request) 367 368 if res != nil { 369 body = res.Body 370 contentType = res.Header.Get("Content-Type") 371 } 372 } 373 374 if body != nil { 375 defer body.Close() 376 } 377 378 if err == nil { 379 // Parse the data 380 og, image, err = a.parseLinkMetadata(requestURL, body, contentType) 381 } 382 383 // Write back to cache and database, even if there was an error and the results are nil 384 cacheLinkMetadata(requestURL, timestamp, og, image) 385 386 a.saveLinkMetadataToDatabase(requestURL, timestamp, og, image) 387 388 return og, image, err 389 } 390 391 // resolveMetadataURL resolves a given URL relative to the server's site URL. 392 func resolveMetadataURL(requestURL string, siteURL string) string { 393 base, err := url.Parse(siteURL) 394 if err != nil { 395 return "" 396 } 397 398 resolved, err := base.Parse(requestURL) 399 if err != nil { 400 return "" 401 } 402 403 return resolved.String() 404 } 405 406 func getLinkMetadataFromCache(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) { 407 cached, ok := linkCache.Get(model.GenerateLinkMetadataHash(requestURL, timestamp)) 408 if !ok { 409 return nil, nil, false 410 } 411 412 switch v := cached.(type) { 413 case *opengraph.OpenGraph: 414 return v, nil, true 415 case *model.PostImage: 416 return nil, v, true 417 default: 418 return nil, nil, true 419 } 420 } 421 422 func (a *App) getLinkMetadataFromDatabase(requestURL string, timestamp int64) (*opengraph.OpenGraph, *model.PostImage, bool) { 423 result := <-a.Srv.Store.LinkMetadata().Get(requestURL, timestamp) 424 if result.Err != nil { 425 return nil, nil, false 426 } 427 428 data := result.Data.(*model.LinkMetadata).Data 429 430 switch v := data.(type) { 431 case *opengraph.OpenGraph: 432 return v, nil, true 433 case *model.PostImage: 434 return nil, v, true 435 default: 436 return nil, nil, true 437 } 438 } 439 440 func (a *App) saveLinkMetadataToDatabase(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) { 441 metadata := &model.LinkMetadata{ 442 URL: requestURL, 443 Timestamp: timestamp, 444 } 445 446 if og != nil { 447 metadata.Type = model.LINK_METADATA_TYPE_OPENGRAPH 448 metadata.Data = og 449 } else if image != nil { 450 metadata.Type = model.LINK_METADATA_TYPE_IMAGE 451 metadata.Data = image 452 } else { 453 metadata.Type = model.LINK_METADATA_TYPE_NONE 454 } 455 456 result := <-a.Srv.Store.LinkMetadata().Save(metadata) 457 if result.Err != nil { 458 mlog.Warn("Failed to write link metadata", mlog.String("request_url", requestURL), mlog.Err(result.Err)) 459 } 460 } 461 462 func cacheLinkMetadata(requestURL string, timestamp int64, og *opengraph.OpenGraph, image *model.PostImage) { 463 var val interface{} 464 if og != nil { 465 val = og 466 } else if image != nil { 467 val = image 468 } 469 470 linkCache.AddWithExpiresInSecs(model.GenerateLinkMetadataHash(requestURL, timestamp), val, LINK_CACHE_DURATION) 471 } 472 473 func (a *App) parseLinkMetadata(requestURL string, body io.Reader, contentType string) (*opengraph.OpenGraph, *model.PostImage, error) { 474 if strings.HasPrefix(contentType, "image") { 475 image, err := parseImages(io.LimitReader(body, MaxMetadataImageSize)) 476 return nil, image, err 477 } else if strings.HasPrefix(contentType, "text/html") { 478 og := a.ParseOpenGraphMetadata(requestURL, body, contentType) 479 480 // The OpenGraph library and Go HTML library don't error for malformed input, so check that at least 481 // one of these required fields exists before returning the OpenGraph data 482 if og.Title != "" || og.Type != "" || og.URL != "" { 483 return og, nil, nil 484 } else { 485 return nil, nil, nil 486 } 487 } else { 488 // Not an image or web page with OpenGraph information 489 return nil, nil, nil 490 } 491 } 492 493 func parseImages(body io.Reader) (*model.PostImage, error) { 494 // Store any data that is read for the config for any further processing 495 buf := &bytes.Buffer{} 496 t := io.TeeReader(body, buf) 497 498 // Read the image config to get the format and dimensions 499 config, format, err := image.DecodeConfig(t) 500 if err != nil { 501 return nil, err 502 } 503 504 image := &model.PostImage{ 505 Width: config.Width, 506 Height: config.Height, 507 Format: format, 508 } 509 510 if format == "gif" { 511 // Decoding the config may have read some of the image data, so re-read the data that has already been read first 512 frameCount, err := imgutils.CountFrames(io.MultiReader(buf, body)) 513 if err != nil { 514 return nil, err 515 } 516 517 image.FrameCount = frameCount 518 } 519 520 return image, nil 521 }