github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/model/vfs/metadata.go (about)

     1  package vfs
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"image"
     8  	"io"
     9  	"math"
    10  	"net/url"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	// Packages image/... are not used explicitly in the code below,
    16  	// but are imported for their initialization side-effects
    17  	_ "image/gif"
    18  	_ "image/jpeg"
    19  	_ "image/png"
    20  
    21  	// Same for image/webp
    22  	_ "golang.org/x/image/webp"
    23  
    24  	"github.com/bradfitz/latlong"
    25  	"github.com/cozy/cozy-stack/pkg/config/config"
    26  	"github.com/cozy/cozy-stack/pkg/consts"
    27  	"github.com/cozy/cozy-stack/pkg/shortcut"
    28  	"github.com/cozy/goexif2/exif"
    29  	"github.com/cozy/goexif2/tiff"
    30  	"github.com/dhowden/tag"
    31  )
    32  
    33  // MetadataExtractorVersion is the version number of the metadata extractor.
    34  // It will be used later to know which files can be re-examined to get more
    35  // metadata when the extractor is improved.
    36  const MetadataExtractorVersion = 2
    37  
    38  // Metadata is a list of metadata specific to each mimetype:
    39  // id3 for music, exif for jpegs, etc.
    40  type Metadata map[string]interface{}
    41  
    42  // NewMetadata returns a new metadata object, with the version field set
    43  func NewMetadata() Metadata {
    44  	m := Metadata{}
    45  	m["extractor_version"] = MetadataExtractorVersion
    46  	return m
    47  }
    48  
    49  // MergeMetadata takes a metadata map and merges it in the FileDoc
    50  func MergeMetadata(doc *FileDoc, meta Metadata) {
    51  	if doc.Metadata == nil {
    52  		doc.Metadata = meta
    53  	} else {
    54  		for k, v := range meta {
    55  			// XXX: do not overwrite the target metadata for sharing shortcuts
    56  			if k != "target" || doc.Metadata[k] == nil {
    57  				doc.Metadata[k] = v
    58  			}
    59  		}
    60  	}
    61  }
    62  
    63  // RemoveCertifiedMetadata returns a metadata map where the keys that are
    64  // certified have been removed. It can be useful for sharing, as certified
    65  // metadata are only valid localy.
    66  func (m Metadata) RemoveCertifiedMetadata() Metadata {
    67  	if len(m) == 0 {
    68  		return Metadata{}
    69  	}
    70  	result := make(Metadata, len(m))
    71  	for k, v := range m {
    72  		if k == consts.CarbonCopyKey || k == consts.ElectronicSafeKey {
    73  			continue
    74  		}
    75  		result[k] = v
    76  	}
    77  	return result
    78  }
    79  
    80  // MetaExtractor is an interface for extracting metadata from a file
    81  type MetaExtractor interface {
    82  	io.WriteCloser
    83  	Abort(error)
    84  	Result() Metadata
    85  }
    86  
    87  // NewMetaExtractor returns an extractor for metadata if the mime type has one,
    88  // or null else
    89  func NewMetaExtractor(doc *FileDoc) *MetaExtractor {
    90  	var e MetaExtractor
    91  	switch doc.Mime {
    92  	case "image/jpeg":
    93  		e = NewExifExtractor(doc.CreatedAt, true)
    94  	case "image/heic", "image/heif":
    95  		e = NewExifExtractor(doc.CreatedAt, false)
    96  	case "image/png", "image/gif":
    97  		e = NewImageExtractor(doc.CreatedAt)
    98  	case "audio/mp3", "audio/mpeg", "audio/ogg", "audio/x-m4a", "audio/flac":
    99  		e = NewAudioExtractor()
   100  	case consts.ShortcutMimeType:
   101  		var instance string
   102  		if doc.CozyMetadata != nil {
   103  			instance = doc.CozyMetadata.CreatedOn
   104  		}
   105  		e = NewShortcutExtractor(instance)
   106  	}
   107  	if e != nil {
   108  		return &e
   109  	}
   110  	return nil
   111  }
   112  
   113  // ImageExtractor is used to extract width/height from images
   114  type ImageExtractor struct {
   115  	w         *io.PipeWriter
   116  	r         *io.PipeReader
   117  	ch        chan interface{}
   118  	createdAt time.Time
   119  }
   120  
   121  // NewImageExtractor returns an extractor for images
   122  func NewImageExtractor(createdAt time.Time) *ImageExtractor {
   123  	e := &ImageExtractor{createdAt: createdAt}
   124  	e.r, e.w = io.Pipe()
   125  	e.ch = make(chan interface{})
   126  	go e.Start()
   127  	return e
   128  }
   129  
   130  // Start is used in a goroutine to start the metadata extraction
   131  func (e *ImageExtractor) Start() {
   132  	var cfg image.Config
   133  	var err error
   134  	defer func() {
   135  		r := recover()
   136  		if errc := e.r.Close(); err == nil {
   137  			err = errc
   138  		}
   139  		if r != nil {
   140  			e.ch <- fmt.Errorf("metadata: recovered from image decoding: %s", r)
   141  		} else if err != nil {
   142  			e.ch <- err
   143  		} else {
   144  			e.ch <- cfg
   145  		}
   146  	}()
   147  	cfg, _, err = image.DecodeConfig(e.r)
   148  }
   149  
   150  // Write is called to push some bytes to the extractor
   151  func (e *ImageExtractor) Write(p []byte) (n int, err error) {
   152  	return e.w.Write(p)
   153  }
   154  
   155  // Close is called when all the bytes has been pushed, to finalize the extraction
   156  func (e *ImageExtractor) Close() error {
   157  	err := e.w.Close()
   158  	if err != nil {
   159  		<-e.ch
   160  	}
   161  	return err
   162  }
   163  
   164  // Abort is called when the extractor can be discarded
   165  func (e *ImageExtractor) Abort(err error) {
   166  	_ = e.w.CloseWithError(err)
   167  	<-e.ch
   168  }
   169  
   170  // Result is called to get the extracted metadata
   171  func (e *ImageExtractor) Result() Metadata {
   172  	m := NewMetadata()
   173  	m["datetime"] = e.createdAt
   174  	cfg := <-e.ch
   175  
   176  	if cfg, ok := cfg.(image.Config); ok {
   177  		m["width"] = cfg.Width
   178  		m["height"] = cfg.Height
   179  	}
   180  
   181  	return m
   182  }
   183  
   184  // ExifExtractor is used to extract EXIF metadata from jpegs
   185  type ExifExtractor struct {
   186  	w  *io.PipeWriter
   187  	r  *io.PipeReader
   188  	im *ImageExtractor
   189  	ch chan interface{}
   190  }
   191  
   192  // NewExifExtractor returns an extractor for EXIF metadata
   193  func NewExifExtractor(createdAt time.Time, withImageExtractor bool) *ExifExtractor {
   194  	e := &ExifExtractor{}
   195  	if withImageExtractor {
   196  		e.im = NewImageExtractor(createdAt)
   197  	}
   198  	e.r, e.w = io.Pipe()
   199  	e.ch = make(chan interface{})
   200  	go e.Start()
   201  	return e
   202  }
   203  
   204  // Start is used in a goroutine to start the metadata extraction
   205  func (e *ExifExtractor) Start() {
   206  	var x *exif.Exif
   207  	var err error
   208  	defer func() {
   209  		r := recover()
   210  		if errc := e.r.Close(); err == nil {
   211  			err = errc
   212  		}
   213  		if r != nil {
   214  			e.ch <- fmt.Errorf("metadata: recovered from exif extracting: %s", r)
   215  		} else if err != nil {
   216  			e.ch <- err
   217  		} else {
   218  			e.ch <- x
   219  		}
   220  	}()
   221  	x, err = exif.Decode(e.r)
   222  }
   223  
   224  // Write is called to push some bytes to the extractor
   225  func (e *ExifExtractor) Write(p []byte) (n int, err error) {
   226  	if e.im != nil {
   227  		_, _ = e.im.Write(p)
   228  	}
   229  	return e.w.Write(p)
   230  }
   231  
   232  // Close is called when all the bytes has been pushed, to finalize the extraction
   233  func (e *ExifExtractor) Close() error {
   234  	if e.im != nil {
   235  		e.im.Close()
   236  	}
   237  	return e.w.Close()
   238  }
   239  
   240  // Abort is called when the extractor can be discarded
   241  func (e *ExifExtractor) Abort(err error) {
   242  	if e.im != nil {
   243  		e.im.Abort(err)
   244  	}
   245  	_ = e.w.CloseWithError(err)
   246  	<-e.ch
   247  }
   248  
   249  // Result is called to get the extracted metadata
   250  func (e *ExifExtractor) Result() Metadata {
   251  	var m Metadata
   252  	if e.im != nil {
   253  		m = e.im.Result()
   254  	} else {
   255  		m = NewMetadata()
   256  	}
   257  	select {
   258  	case x := <-e.ch:
   259  		if x, ok := x.(*exif.Exif); ok {
   260  			localTZ := false
   261  			if dt, err := x.DateTime(); err == nil {
   262  				m["datetime"] = dt
   263  				localTZ = dt.Location() == time.Local
   264  			}
   265  			if flash, err := x.Flash(); err == nil {
   266  				m["flash"] = flash
   267  			}
   268  			if lat, long, err := x.LatLong(); err == nil {
   269  				if !math.IsNaN(lat) && !math.IsNaN(long) {
   270  					m["gps"] = map[string]float64{
   271  						"lat":  lat,
   272  						"long": long,
   273  					}
   274  					if localTZ {
   275  						if loc := lookupLocation(latlong.LookupZoneName(lat, long)); loc != nil {
   276  							if t, err := exifDateTimeInLocation(x, loc); err == nil {
   277  								m["datetime"] = t
   278  							}
   279  						}
   280  					}
   281  				}
   282  			}
   283  			if _, ok := m["width"]; !ok {
   284  				if xDimension, err := x.Get("PixelXDimension"); err == nil {
   285  					if width, err := xDimension.Int(0); err == nil {
   286  						m["width"] = width
   287  					}
   288  				}
   289  			}
   290  			if _, ok := m["height"]; !ok {
   291  				if yDimension, err := x.Get("PixelYDimension"); err == nil {
   292  					if height, err := yDimension.Int(0); err == nil {
   293  						m["height"] = height
   294  					}
   295  				}
   296  			}
   297  			if o, err := x.Get("Orientation"); err == nil {
   298  				if orientation, err := o.Int(0); err == nil {
   299  					m["orientation"] = orientation
   300  				}
   301  			}
   302  		}
   303  	case <-time.After(1 * time.Minute):
   304  		// Timeout when the exif parser is blocked waiting for more bytes but
   305  		// there are no more bytes to read.
   306  	}
   307  	return m
   308  }
   309  
   310  // Code taken from perkeep
   311  // https://github.com/perkeep/perkeep/blob/7f17c0483f2e86575ed87aac35fb75154b16b7f4/pkg/schema/schema.go#L1043-L1094
   312  
   313  // This is basically a copy of the exif.Exif.DateTime() method, except:
   314  //   - it takes a *time.Location to assume
   315  //   - the caller already assumes there's no timezone offset or GPS time
   316  //     in the EXIF, so any of that code can be ignored.
   317  func exifDateTimeInLocation(x *exif.Exif, loc *time.Location) (time.Time, error) {
   318  	tag, err := x.Get(exif.DateTimeOriginal)
   319  	if err != nil {
   320  		tag, err = x.Get(exif.DateTime)
   321  		if err != nil {
   322  			return time.Time{}, err
   323  		}
   324  	}
   325  	if tag.Format() != tiff.StringVal {
   326  		return time.Time{}, errors.New("DateTime[Original] not in string format")
   327  	}
   328  	const exifTimeLayout = "2006:01:02 15:04:05"
   329  	dateStr := strings.TrimRight(string(tag.Val), "\x00")
   330  	return time.ParseInLocation(exifTimeLayout, dateStr, loc)
   331  }
   332  
   333  var zoneCache struct {
   334  	sync.RWMutex
   335  	m map[string]*time.Location
   336  }
   337  
   338  func lookupLocation(zone string) *time.Location {
   339  	if zone == "" {
   340  		return nil
   341  	}
   342  	zoneCache.RLock()
   343  	l, ok := zoneCache.m[zone]
   344  	zoneCache.RUnlock()
   345  	if ok {
   346  		return l
   347  	}
   348  	loc, err := time.LoadLocation(zone)
   349  	zoneCache.Lock()
   350  	if zoneCache.m == nil {
   351  		zoneCache.m = make(map[string]*time.Location)
   352  	}
   353  	zoneCache.m[zone] = loc // even if nil
   354  	zoneCache.Unlock()
   355  	if err != nil {
   356  		return nil
   357  	}
   358  	return loc
   359  }
   360  
   361  // AudioExtractor is used to extract album/artist/etc. from audio
   362  type AudioExtractor struct {
   363  	w  *io.PipeWriter
   364  	r  *io.PipeReader
   365  	ch chan interface{}
   366  }
   367  
   368  // NewAudioExtractor returns an extractor for audio
   369  func NewAudioExtractor() *AudioExtractor {
   370  	e := &AudioExtractor{}
   371  	e.r, e.w = io.Pipe()
   372  	e.ch = make(chan interface{})
   373  	go e.Start()
   374  	return e
   375  }
   376  
   377  // Start is used in a goroutine to start the metadata extraction
   378  func (e *AudioExtractor) Start() {
   379  	var tags tag.Metadata
   380  	var buf []byte
   381  	var err error
   382  	buf, err = io.ReadAll(e.r)
   383  	if err != nil {
   384  		e.r.Close()
   385  		e.ch <- err
   386  		return
   387  	}
   388  	defer func() {
   389  		r := recover()
   390  		if errc := e.r.Close(); err == nil {
   391  			err = errc
   392  		}
   393  		if r != nil {
   394  			e.ch <- fmt.Errorf("metadata: recovered from audio extracting: %s", r)
   395  		} else if err != nil {
   396  			e.ch <- err
   397  		} else {
   398  			e.ch <- tags
   399  		}
   400  	}()
   401  	tags, err = tag.ReadFrom(bytes.NewReader(buf))
   402  }
   403  
   404  // Write is called to push some bytes to the extractor
   405  func (e *AudioExtractor) Write(p []byte) (n int, err error) {
   406  	return e.w.Write(p)
   407  }
   408  
   409  // Close is called when all the bytes has been pushed, to finalize the extraction
   410  func (e *AudioExtractor) Close() error {
   411  	return e.w.Close()
   412  }
   413  
   414  // Abort is called when the extractor can be discarded
   415  func (e *AudioExtractor) Abort(err error) {
   416  	_ = e.w.CloseWithError(err)
   417  	<-e.ch
   418  }
   419  
   420  // Result is called to get the extracted metadata
   421  func (e *AudioExtractor) Result() Metadata {
   422  	m := NewMetadata()
   423  	tags := <-e.ch
   424  	if tags, ok := tags.(tag.Metadata); ok {
   425  		if album := tags.Album(); album != "" {
   426  			m["album"] = album
   427  		}
   428  		if artist := tags.Artist(); artist != "" {
   429  			m["artist"] = artist
   430  		}
   431  		if composer := tags.Composer(); composer != "" {
   432  			m["composer"] = composer
   433  		}
   434  		if genre := tags.Genre(); genre != "" {
   435  			m["genre"] = genre
   436  		}
   437  		if title := tags.Title(); title != "" {
   438  			m["title"] = title
   439  		}
   440  		if year := tags.Year(); year != 0 {
   441  			m["year"] = year
   442  		}
   443  		if track, _ := tags.Track(); track != 0 {
   444  			m["track"] = track
   445  		}
   446  	}
   447  	return m
   448  }
   449  
   450  // ShortcutExtractor is used to extract information from .url files
   451  type ShortcutExtractor struct {
   452  	w        *io.PipeWriter
   453  	r        *io.PipeReader
   454  	ch       chan interface{}
   455  	instance string
   456  }
   457  
   458  // NewShortcutExtractor returns an extractor for .url files
   459  func NewShortcutExtractor(instance string) *ShortcutExtractor {
   460  	e := &ShortcutExtractor{}
   461  	e.instance = instance
   462  	e.r, e.w = io.Pipe()
   463  	e.ch = make(chan interface{})
   464  	go e.Start()
   465  	return e
   466  }
   467  
   468  // Start is used in a goroutine to start the metadata extraction
   469  func (e *ShortcutExtractor) Start() {
   470  	var link shortcut.Result
   471  	var err error
   472  	defer func() {
   473  		r := recover()
   474  		if errc := e.r.Close(); err == nil {
   475  			err = errc
   476  		}
   477  		if r != nil {
   478  			e.ch <- fmt.Errorf("metadata: recovered from shortcut decoding: %s", r)
   479  		} else if err != nil {
   480  			e.ch <- err
   481  		} else {
   482  			e.ch <- link
   483  		}
   484  	}()
   485  	link, err = shortcut.Parse(e.r)
   486  }
   487  
   488  // Write is called to push some bytes to the extractor
   489  func (e *ShortcutExtractor) Write(p []byte) (n int, err error) {
   490  	return e.w.Write(p)
   491  }
   492  
   493  // Close is called when all the bytes has been pushed, to finalize the extraction
   494  func (e *ShortcutExtractor) Close() error {
   495  	err := e.w.Close()
   496  	if err != nil {
   497  		<-e.ch
   498  	}
   499  	return err
   500  }
   501  
   502  // Abort is called when the extractor can be discarded
   503  func (e *ShortcutExtractor) Abort(err error) {
   504  	_ = e.w.CloseWithError(err)
   505  	<-e.ch
   506  }
   507  
   508  // Result is called to get the extracted metadata
   509  func (e *ShortcutExtractor) Result() Metadata {
   510  	m := NewMetadata()
   511  	link := <-e.ch
   512  	if link, ok := link.(shortcut.Result); ok {
   513  		cozy, app := extractCozyLink(link, e.instance)
   514  		if cozy != "" {
   515  			target := map[string]interface{}{
   516  				"cozyMetadata": map[string]interface{}{
   517  					"instance": cozy,
   518  				},
   519  			}
   520  			if app != "" {
   521  				target["app"] = app
   522  			}
   523  			m["target"] = target
   524  		}
   525  	}
   526  	return m
   527  }
   528  
   529  func extractCozyLink(link shortcut.Result, instance string) (string, string) {
   530  	if link.URL == "" {
   531  		return "", ""
   532  	}
   533  	u, err := url.Parse(link.URL)
   534  	if err != nil {
   535  		return "", ""
   536  	}
   537  	v, err := url.Parse(instance)
   538  	if err != nil {
   539  		return "", ""
   540  	}
   541  	host, slug, _ := config.SplitCozyHost(u.Host)
   542  	if host == v.Host {
   543  		return host, slug
   544  	}
   545  	return "", ""
   546  }