github.com/cozy/cozy-stack@v0.0.0-20240327093429-939e4a21320e/model/note/import.go (about)

     1  package note
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"path"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/cozy/cozy-stack/model/instance"
    14  	"github.com/cozy/cozy-stack/model/vfs"
    15  	"github.com/cozy/cozy-stack/pkg/consts"
    16  	"github.com/cozy/cozy-stack/pkg/filetype"
    17  	"github.com/cozy/prosemirror-go/markdown"
    18  	"github.com/cozy/prosemirror-go/model"
    19  	"github.com/gofrs/uuid/v5"
    20  )
    21  
    22  // MaxMarkdownSize is the maximal size of a markdown that can be parsed.
    23  const MaxMarkdownSize = 2 * 1024 * 1024
    24  
    25  func ImportFile(inst *instance.Instance, newdoc, olddoc *vfs.FileDoc, body io.ReadCloser) error {
    26  	schemaSpecs := DefaultSchemaSpecs()
    27  	specs := model.SchemaSpecFromJSON(schemaSpecs)
    28  	schema, err := model.NewSchema(&specs)
    29  	if err != nil {
    30  		return err
    31  	}
    32  
    33  	// We need a fileID for saving images
    34  	if newdoc.ID() == "" {
    35  		uuidv7, _ := uuid.NewV7()
    36  		newdoc.SetID(uuidv7.String())
    37  	}
    38  	images, _ := getImages(inst, newdoc.ID())
    39  
    40  	fs := inst.VFS()
    41  	file, err := fs.CreateFile(newdoc, olddoc)
    42  	if err != nil {
    43  		return err
    44  	}
    45  
    46  	reader := io.TeeReader(body, file)
    47  	content, _, err := importReader(inst, newdoc, reader, schema)
    48  
    49  	if content != nil {
    50  		fillMetadata(newdoc, olddoc, schemaSpecs, content)
    51  	} else {
    52  		_, _ = io.Copy(io.Discard, reader)
    53  		inst.Logger().WithNamespace("notes").
    54  			Warnf("Cannot import notes: %s", err)
    55  	}
    56  	if err := file.Close(); err != nil {
    57  		return err
    58  	}
    59  
    60  	if olddoc != nil {
    61  		purgeAllSteps(inst, olddoc.DocID)
    62  	}
    63  	for _, img := range images {
    64  		img.seen = false
    65  		img.ToRemove = true
    66  	}
    67  	cleanImages(inst, images)
    68  	return nil
    69  }
    70  
    71  func ImportImages(inst *instance.Instance, olddoc *vfs.FileDoc) error {
    72  	inst.Logger().WithNamespace("notes").
    73  		Infof("importing images from note: %s", olddoc.ID())
    74  	schemaSpecs := DefaultSchemaSpecs()
    75  	specs := model.SchemaSpecFromJSON(schemaSpecs)
    76  	schema, err := model.NewSchema(&specs)
    77  	if err != nil {
    78  		return fmt.Errorf("failed to read note schema: %w", err)
    79  	}
    80  
    81  	fs := inst.VFS()
    82  	file, err := fs.OpenFile(olddoc)
    83  	if err != nil {
    84  		return fmt.Errorf("failed to open file for note images import: %w", err)
    85  	}
    86  
    87  	content, images, err := importReader(inst, olddoc, file, schema)
    88  	cleanImages(inst, images) // XXX: remove images found in the archive but not in the markdown
    89  	if cerr := file.Close(); cerr != nil {
    90  		return fmt.Errorf("error while closing note file: %w", cerr)
    91  	}
    92  	if content == nil || !hasImages(images) {
    93  		inst.Logger().WithNamespace("notes").
    94  			Infof("No images to import")
    95  		return nil
    96  	}
    97  
    98  	md := markdownSerializer(images).Serialize(content)
    99  	body, err := buildArchive(inst, []byte(md), images)
   100  	if err != nil {
   101  		return fmt.Errorf("failed to build note archive: %w", err)
   102  	}
   103  	newdoc := olddoc.Clone().(*vfs.FileDoc)
   104  	newdoc.ByteSize = int64(len(body))
   105  	newdoc.MD5Sum = nil
   106  	fillMetadata(newdoc, olddoc, schemaSpecs, content)
   107  
   108  	file, err = inst.VFS().CreateFile(newdoc, olddoc)
   109  	if err != nil {
   110  		return fmt.Errorf("failed to create file for note images import: %w", err)
   111  	}
   112  	_, err = file.Write(body)
   113  	if err != nil {
   114  		err = fmt.Errorf("failed to write updated note: %w", err)
   115  	}
   116  	if cerr := file.Close(); cerr != nil && err == nil {
   117  		err = fmt.Errorf("failed to close updated note file: %w", cerr)
   118  	}
   119  
   120  	return err
   121  }
   122  
   123  func importReader(inst *instance.Instance, doc *vfs.FileDoc, reader io.Reader, schema *model.Schema) (*model.Node, []*Image, error) {
   124  	buf := &bytes.Buffer{}
   125  	var hasImages bool
   126  	if _, err := io.CopyN(buf, reader, 512); err != nil {
   127  		if !errors.Is(err, io.EOF) {
   128  			return nil, nil, fmt.Errorf("failed to buffer note content: %w", err)
   129  		}
   130  		hasImages = false
   131  	} else {
   132  		hasImages = isTar(buf.Bytes())
   133  	}
   134  
   135  	if !hasImages {
   136  		if _, err := buf.ReadFrom(reader); err != nil {
   137  			return nil, nil, err
   138  		}
   139  		content, err := parseFile(buf, schema)
   140  		return content, nil, err
   141  	}
   142  
   143  	var content *model.Node
   144  	var err error
   145  	var images []*Image
   146  	defer func() {
   147  		if err == nil && images != nil {
   148  			fixURLForProsemirrorImages(content, images)
   149  		}
   150  	}()
   151  
   152  	tr := tar.NewReader(io.MultiReader(buf, reader))
   153  	for {
   154  		header, errh := tr.Next()
   155  		if errh != nil {
   156  			return content, images, errh
   157  		}
   158  		if header.Typeflag != tar.TypeReg {
   159  			continue
   160  		}
   161  		if header.Name == "index.md" {
   162  			content, err = parseFile(tr, schema)
   163  			if err != nil {
   164  				return nil, nil, fmt.Errorf("failed to parse note markdown: %w", err)
   165  			}
   166  		} else {
   167  			ext := path.Ext(header.Name)
   168  			contentType := filetype.ByExtension(ext)
   169  			upload, erru := NewImageUpload(inst, doc, header.Name, contentType)
   170  			if erru != nil {
   171  				err = fmt.Errorf("failed to create image upload for %s: %w", header.Name, erru)
   172  			} else {
   173  				_, errc := io.Copy(upload, tr)
   174  				if cerr := upload.Close(); cerr != nil && (errc == nil || errc == io.ErrUnexpectedEOF) {
   175  					errc = fmt.Errorf("failed to upload image %s: %w", header.Name, cerr)
   176  				}
   177  				if errc != nil {
   178  					err = errc
   179  				} else {
   180  					images = append(images, upload.Image)
   181  				}
   182  			}
   183  		}
   184  	}
   185  }
   186  
   187  func fixURLForProsemirrorImages(node *model.Node, images []*Image) {
   188  	if node.Type.Name == "media" {
   189  		name, _ := node.Attrs["alt"].(string)
   190  		for _, img := range images {
   191  			if img.originalName == name {
   192  				node.Attrs["url"] = img.DocID
   193  				img.seen = true
   194  			}
   195  		}
   196  	}
   197  
   198  	node.ForEach(func(child *model.Node, _ int, _ int) {
   199  		fixURLForProsemirrorImages(child, images)
   200  	})
   201  }
   202  
   203  func fillMetadata(newdoc, olddoc *vfs.FileDoc, schemaSpecs map[string]interface{}, content *model.Node) {
   204  	version := 1
   205  	if olddoc != nil {
   206  		rev := strings.Split(olddoc.DocRev, "-")[0]
   207  		n, _ := strconv.Atoi(rev)
   208  		version = n * 1000
   209  	}
   210  
   211  	newdoc.Mime = consts.NoteMimeType
   212  	newdoc.Class = "text"
   213  	newdoc.Metadata = vfs.Metadata{
   214  		"title":   strings.TrimSuffix(newdoc.DocName, ".cozy-note"),
   215  		"content": content.ToJSON(),
   216  		"version": version,
   217  		"schema":  schemaSpecs,
   218  	}
   219  }
   220  
   221  func parseFile(r io.Reader, schema *model.Schema) (*model.Node, error) {
   222  	buf, err := io.ReadAll(io.LimitReader(r, MaxMarkdownSize))
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  	parser := markdownParser()
   227  	funcs := markdownNodeMapper()
   228  	return markdown.ParseMarkdown(parser, funcs, buf, schema)
   229  }
   230  
   231  func isTar(buf []byte) bool {
   232  	if len(buf) < 263 {
   233  		return false
   234  	}
   235  	// https://en.wikipedia.org/wiki/Tar_(computing)#UStar_format
   236  	return buf[257] == 'u' && buf[258] == 's' && buf[259] == 't' &&
   237  		buf[260] == 'a' && buf[261] == 'r' && buf[262] == 0
   238  }