github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/vfs/metadata.go (about) 1 package vfs 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "image" 8 "io" 9 "math" 10 "net/url" 11 "strings" 12 "sync" 13 "time" 14 15 // Packages image/... are not used explicitly in the code below, 16 // but are imported for their initialization side-effects 17 _ "image/gif" 18 _ "image/jpeg" 19 _ "image/png" 20 21 // Same for image/webp 22 _ "golang.org/x/image/webp" 23 24 "github.com/bradfitz/latlong" 25 "github.com/cozy/cozy-stack/pkg/config/config" 26 "github.com/cozy/cozy-stack/pkg/consts" 27 "github.com/cozy/cozy-stack/pkg/shortcut" 28 "github.com/cozy/goexif2/exif" 29 "github.com/cozy/goexif2/tiff" 30 "github.com/dhowden/tag" 31 ) 32 33 // MetadataExtractorVersion is the version number of the metadata extractor. 34 // It will be used later to know which files can be re-examined to get more 35 // metadata when the extractor is improved. 36 const MetadataExtractorVersion = 2 37 38 // Metadata is a list of metadata specific to each mimetype: 39 // id3 for music, exif for jpegs, etc. 40 type Metadata map[string]interface{} 41 42 // NewMetadata returns a new metadata object, with the version field set 43 func NewMetadata() Metadata { 44 m := Metadata{} 45 m["extractor_version"] = MetadataExtractorVersion 46 return m 47 } 48 49 // MergeMetadata takes a metadata map and merges it in the FileDoc 50 func MergeMetadata(doc *FileDoc, meta Metadata) { 51 if doc.Metadata == nil { 52 doc.Metadata = meta 53 } else { 54 for k, v := range meta { 55 // XXX: do not overwrite the target metadata for sharing shortcuts 56 if k != "target" || doc.Metadata[k] == nil { 57 doc.Metadata[k] = v 58 } 59 } 60 } 61 } 62 63 // RemoveCertifiedMetadata returns a metadata map where the keys that are 64 // certified have been removed. It can be useful for sharing, as certified 65 // metadata are only valid localy. 66 func (m Metadata) RemoveCertifiedMetadata() Metadata { 67 if len(m) == 0 { 68 return Metadata{} 69 } 70 result := make(Metadata, len(m)) 71 for k, v := range m { 72 if k == consts.CarbonCopyKey || k == consts.ElectronicSafeKey { 73 continue 74 } 75 result[k] = v 76 } 77 return result 78 } 79 80 // MetaExtractor is an interface for extracting metadata from a file 81 type MetaExtractor interface { 82 io.WriteCloser 83 Abort(error) 84 Result() Metadata 85 } 86 87 // NewMetaExtractor returns an extractor for metadata if the mime type has one, 88 // or null else 89 func NewMetaExtractor(doc *FileDoc) *MetaExtractor { 90 var e MetaExtractor 91 switch doc.Mime { 92 case "image/jpeg": 93 e = NewExifExtractor(doc.CreatedAt, true) 94 case "image/heic", "image/heif": 95 e = NewExifExtractor(doc.CreatedAt, false) 96 case "image/png", "image/gif": 97 e = NewImageExtractor(doc.CreatedAt) 98 case "audio/mp3", "audio/mpeg", "audio/ogg", "audio/x-m4a", "audio/flac": 99 e = NewAudioExtractor() 100 case consts.ShortcutMimeType: 101 var instance string 102 if doc.CozyMetadata != nil { 103 instance = doc.CozyMetadata.CreatedOn 104 } 105 e = NewShortcutExtractor(instance) 106 } 107 if e != nil { 108 return &e 109 } 110 return nil 111 } 112 113 // ImageExtractor is used to extract width/height from images 114 type ImageExtractor struct { 115 w *io.PipeWriter 116 r *io.PipeReader 117 ch chan interface{} 118 createdAt time.Time 119 } 120 121 // NewImageExtractor returns an extractor for images 122 func NewImageExtractor(createdAt time.Time) *ImageExtractor { 123 e := &ImageExtractor{createdAt: createdAt} 124 e.r, e.w = io.Pipe() 125 e.ch = make(chan interface{}) 126 go e.Start() 127 return e 128 } 129 130 // Start is used in a goroutine to start the metadata extraction 131 func (e *ImageExtractor) Start() { 132 var cfg image.Config 133 var err error 134 defer func() { 135 r := recover() 136 if errc := e.r.Close(); err == nil { 137 err = errc 138 } 139 if r != nil { 140 e.ch <- fmt.Errorf("metadata: recovered from image decoding: %s", r) 141 } else if err != nil { 142 e.ch <- err 143 } else { 144 e.ch <- cfg 145 } 146 }() 147 cfg, _, err = image.DecodeConfig(e.r) 148 } 149 150 // Write is called to push some bytes to the extractor 151 func (e *ImageExtractor) Write(p []byte) (n int, err error) { 152 return e.w.Write(p) 153 } 154 155 // Close is called when all the bytes has been pushed, to finalize the extraction 156 func (e *ImageExtractor) Close() error { 157 err := e.w.Close() 158 if err != nil { 159 <-e.ch 160 } 161 return err 162 } 163 164 // Abort is called when the extractor can be discarded 165 func (e *ImageExtractor) Abort(err error) { 166 _ = e.w.CloseWithError(err) 167 <-e.ch 168 } 169 170 // Result is called to get the extracted metadata 171 func (e *ImageExtractor) Result() Metadata { 172 m := NewMetadata() 173 m["datetime"] = e.createdAt 174 cfg := <-e.ch 175 176 if cfg, ok := cfg.(image.Config); ok { 177 m["width"] = cfg.Width 178 m["height"] = cfg.Height 179 } 180 181 return m 182 } 183 184 // ExifExtractor is used to extract EXIF metadata from jpegs 185 type ExifExtractor struct { 186 w *io.PipeWriter 187 r *io.PipeReader 188 im *ImageExtractor 189 ch chan interface{} 190 } 191 192 // NewExifExtractor returns an extractor for EXIF metadata 193 func NewExifExtractor(createdAt time.Time, withImageExtractor bool) *ExifExtractor { 194 e := &ExifExtractor{} 195 if withImageExtractor { 196 e.im = NewImageExtractor(createdAt) 197 } 198 e.r, e.w = io.Pipe() 199 e.ch = make(chan interface{}) 200 go e.Start() 201 return e 202 } 203 204 // Start is used in a goroutine to start the metadata extraction 205 func (e *ExifExtractor) Start() { 206 var x *exif.Exif 207 var err error 208 defer func() { 209 r := recover() 210 if errc := e.r.Close(); err == nil { 211 err = errc 212 } 213 if r != nil { 214 e.ch <- fmt.Errorf("metadata: recovered from exif extracting: %s", r) 215 } else if err != nil { 216 e.ch <- err 217 } else { 218 e.ch <- x 219 } 220 }() 221 x, err = exif.Decode(e.r) 222 } 223 224 // Write is called to push some bytes to the extractor 225 func (e *ExifExtractor) Write(p []byte) (n int, err error) { 226 if e.im != nil { 227 _, _ = e.im.Write(p) 228 } 229 return e.w.Write(p) 230 } 231 232 // Close is called when all the bytes has been pushed, to finalize the extraction 233 func (e *ExifExtractor) Close() error { 234 if e.im != nil { 235 e.im.Close() 236 } 237 return e.w.Close() 238 } 239 240 // Abort is called when the extractor can be discarded 241 func (e *ExifExtractor) Abort(err error) { 242 if e.im != nil { 243 e.im.Abort(err) 244 } 245 _ = e.w.CloseWithError(err) 246 <-e.ch 247 } 248 249 // Result is called to get the extracted metadata 250 func (e *ExifExtractor) Result() Metadata { 251 var m Metadata 252 if e.im != nil { 253 m = e.im.Result() 254 } else { 255 m = NewMetadata() 256 } 257 select { 258 case x := <-e.ch: 259 if x, ok := x.(*exif.Exif); ok { 260 localTZ := false 261 if dt, err := x.DateTime(); err == nil { 262 m["datetime"] = dt 263 localTZ = dt.Location() == time.Local 264 } 265 if flash, err := x.Flash(); err == nil { 266 m["flash"] = flash 267 } 268 if lat, long, err := x.LatLong(); err == nil { 269 if !math.IsNaN(lat) && !math.IsNaN(long) { 270 m["gps"] = map[string]float64{ 271 "lat": lat, 272 "long": long, 273 } 274 if localTZ { 275 if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil { 276 if t, err := exifDateTimeInLocation(x, loc); err == nil { 277 m["datetime"] = t 278 } 279 } 280 } 281 } 282 } 283 if _, ok := m["width"]; !ok { 284 if xDimension, err := x.Get("PixelXDimension"); err == nil { 285 if width, err := xDimension.Int(0); err == nil { 286 m["width"] = width 287 } 288 } 289 } 290 if _, ok := m["height"]; !ok { 291 if yDimension, err := x.Get("PixelYDimension"); err == nil { 292 if height, err := yDimension.Int(0); err == nil { 293 m["height"] = height 294 } 295 } 296 } 297 if o, err := x.Get("Orientation"); err == nil { 298 if orientation, err := o.Int(0); err == nil { 299 m["orientation"] = orientation 300 } 301 } 302 } 303 case <-time.After(1 * time.Minute): 304 // Timeout when the exif parser is blocked waiting for more bytes but 305 // there are no more bytes to read. 306 } 307 return m 308 } 309 310 // Code taken from perkeep 311 // https://github.com/perkeep/perkeep/blob/7f17c0483f2e86575ed87aac35fb75154b16b7f4/pkg/schema/schema.go#L1043-L1094 312 313 // This is basically a copy of the exif.Exif.DateTime() method, except: 314 // - it takes a *time.Location to assume 315 // - the caller already assumes there's no timezone offset or GPS time 316 // in the EXIF, so any of that code can be ignored. 317 func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) { 318 tag, err := x.Get(exif.DateTimeOriginal) 319 if err != nil { 320 tag, err = x.Get(exif.DateTime) 321 if err != nil { 322 return time.Time{}, err 323 } 324 } 325 if tag.Format() != tiff.StringVal { 326 return time.Time{}, errors.New("DateTime[Original] not in string format") 327 } 328 const exifTimeLayout = "2006:01:02 15:04:05" 329 dateStr := strings.TrimRight(string(tag.Val), "\x00") 330 return time.ParseInLocation(exifTimeLayout, dateStr, loc) 331 } 332 333 var zoneCache struct { 334 sync.RWMutex 335 m map[string]*time.Location 336 } 337 338 func lookupLocation(zone string) *time.Location { 339 if zone == "" { 340 return nil 341 } 342 zoneCache.RLock() 343 l, ok := zoneCache.m[zone] 344 zoneCache.RUnlock() 345 if ok { 346 return l 347 } 348 loc, err := time.LoadLocation(zone) 349 zoneCache.Lock() 350 if zoneCache.m == nil { 351 zoneCache.m = make(map[string]*time.Location) 352 } 353 zoneCache.m[zone] = loc // even if nil 354 zoneCache.Unlock() 355 if err != nil { 356 return nil 357 } 358 return loc 359 } 360 361 // AudioExtractor is used to extract album/artist/etc. from audio 362 type AudioExtractor struct { 363 w *io.PipeWriter 364 r *io.PipeReader 365 ch chan interface{} 366 } 367 368 // NewAudioExtractor returns an extractor for audio 369 func NewAudioExtractor() *AudioExtractor { 370 e := &AudioExtractor{} 371 e.r, e.w = io.Pipe() 372 e.ch = make(chan interface{}) 373 go e.Start() 374 return e 375 } 376 377 // Start is used in a goroutine to start the metadata extraction 378 func (e *AudioExtractor) Start() { 379 var tags tag.Metadata 380 var buf []byte 381 var err error 382 buf, err = io.ReadAll(e.r) 383 if err != nil { 384 e.r.Close() 385 e.ch <- err 386 return 387 } 388 defer func() { 389 r := recover() 390 if errc := e.r.Close(); err == nil { 391 err = errc 392 } 393 if r != nil { 394 e.ch <- fmt.Errorf("metadata: recovered from audio extracting: %s", r) 395 } else if err != nil { 396 e.ch <- err 397 } else { 398 e.ch <- tags 399 } 400 }() 401 tags, err = tag.ReadFrom(bytes.NewReader(buf)) 402 } 403 404 // Write is called to push some bytes to the extractor 405 func (e *AudioExtractor) Write(p []byte) (n int, err error) { 406 return e.w.Write(p) 407 } 408 409 // Close is called when all the bytes has been pushed, to finalize the extraction 410 func (e *AudioExtractor) Close() error { 411 return e.w.Close() 412 } 413 414 // Abort is called when the extractor can be discarded 415 func (e *AudioExtractor) Abort(err error) { 416 _ = e.w.CloseWithError(err) 417 <-e.ch 418 } 419 420 // Result is called to get the extracted metadata 421 func (e *AudioExtractor) Result() Metadata { 422 m := NewMetadata() 423 tags := <-e.ch 424 if tags, ok := tags.(tag.Metadata); ok { 425 if album := tags.Album(); album != "" { 426 m["album"] = album 427 } 428 if artist := tags.Artist(); artist != "" { 429 m["artist"] = artist 430 } 431 if composer := tags.Composer(); composer != "" { 432 m["composer"] = composer 433 } 434 if genre := tags.Genre(); genre != "" { 435 m["genre"] = genre 436 } 437 if title := tags.Title(); title != "" { 438 m["title"] = title 439 } 440 if year := tags.Year(); year != 0 { 441 m["year"] = year 442 } 443 if track, _ := tags.Track(); track != 0 { 444 m["track"] = track 445 } 446 } 447 return m 448 } 449 450 // ShortcutExtractor is used to extract information from .url files 451 type ShortcutExtractor struct { 452 w *io.PipeWriter 453 r *io.PipeReader 454 ch chan interface{} 455 instance string 456 } 457 458 // NewShortcutExtractor returns an extractor for .url files 459 func NewShortcutExtractor(instance string) *ShortcutExtractor { 460 e := &ShortcutExtractor{} 461 e.instance = instance 462 e.r, e.w = io.Pipe() 463 e.ch = make(chan interface{}) 464 go e.Start() 465 return e 466 } 467 468 // Start is used in a goroutine to start the metadata extraction 469 func (e *ShortcutExtractor) Start() { 470 var link shortcut.Result 471 var err error 472 defer func() { 473 r := recover() 474 if errc := e.r.Close(); err == nil { 475 err = errc 476 } 477 if r != nil { 478 e.ch <- fmt.Errorf("metadata: recovered from shortcut decoding: %s", r) 479 } else if err != nil { 480 e.ch <- err 481 } else { 482 e.ch <- link 483 } 484 }() 485 link, err = shortcut.Parse(e.r) 486 } 487 488 // Write is called to push some bytes to the extractor 489 func (e *ShortcutExtractor) Write(p []byte) (n int, err error) { 490 return e.w.Write(p) 491 } 492 493 // Close is called when all the bytes has been pushed, to finalize the extraction 494 func (e *ShortcutExtractor) Close() error { 495 err := e.w.Close() 496 if err != nil { 497 <-e.ch 498 } 499 return err 500 } 501 502 // Abort is called when the extractor can be discarded 503 func (e *ShortcutExtractor) Abort(err error) { 504 _ = e.w.CloseWithError(err) 505 <-e.ch 506 } 507 508 // Result is called to get the extracted metadata 509 func (e *ShortcutExtractor) Result() Metadata { 510 m := NewMetadata() 511 link := <-e.ch 512 if link, ok := link.(shortcut.Result); ok { 513 cozy, app := extractCozyLink(link, e.instance) 514 if cozy != "" { 515 target := map[string]interface{}{ 516 "cozyMetadata": map[string]interface{}{ 517 "instance": cozy, 518 }, 519 } 520 if app != "" { 521 target["app"] = app 522 } 523 m["target"] = target 524 } 525 } 526 return m 527 } 528 529 func extractCozyLink(link shortcut.Result, instance string) (string, string) { 530 if link.URL == "" { 531 return "", "" 532 } 533 u, err := url.Parse(link.URL) 534 if err != nil { 535 return "", "" 536 } 537 v, err := url.Parse(instance) 538 if err != nil { 539 return "", "" 540 } 541 host, slug, _ := config.SplitCozyHost(u.Host) 542 if host == v.Host { 543 return host, slug 544 } 545 return "", "" 546 }