github.com/cozy/cozy-stack@v0.0.0-20240327093429-939e4a21320e/model/note/import.go (about) 1 package note 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "path" 10 "strconv" 11 "strings" 12 13 "github.com/cozy/cozy-stack/model/instance" 14 "github.com/cozy/cozy-stack/model/vfs" 15 "github.com/cozy/cozy-stack/pkg/consts" 16 "github.com/cozy/cozy-stack/pkg/filetype" 17 "github.com/cozy/prosemirror-go/markdown" 18 "github.com/cozy/prosemirror-go/model" 19 "github.com/gofrs/uuid/v5" 20 ) 21 22 // MaxMarkdownSize is the maximal size of a markdown that can be parsed. 23 const MaxMarkdownSize = 2 * 1024 * 1024 24 25 func ImportFile(inst *instance.Instance, newdoc, olddoc *vfs.FileDoc, body io.ReadCloser) error { 26 schemaSpecs := DefaultSchemaSpecs() 27 specs := model.SchemaSpecFromJSON(schemaSpecs) 28 schema, err := model.NewSchema(&specs) 29 if err != nil { 30 return err 31 } 32 33 // We need a fileID for saving images 34 if newdoc.ID() == "" { 35 uuidv7, _ := uuid.NewV7() 36 newdoc.SetID(uuidv7.String()) 37 } 38 images, _ := getImages(inst, newdoc.ID()) 39 40 fs := inst.VFS() 41 file, err := fs.CreateFile(newdoc, olddoc) 42 if err != nil { 43 return err 44 } 45 46 reader := io.TeeReader(body, file) 47 content, _, err := importReader(inst, newdoc, reader, schema) 48 49 if content != nil { 50 fillMetadata(newdoc, olddoc, schemaSpecs, content) 51 } else { 52 _, _ = io.Copy(io.Discard, reader) 53 inst.Logger().WithNamespace("notes"). 54 Warnf("Cannot import notes: %s", err) 55 } 56 if err := file.Close(); err != nil { 57 return err 58 } 59 60 if olddoc != nil { 61 purgeAllSteps(inst, olddoc.DocID) 62 } 63 for _, img := range images { 64 img.seen = false 65 img.ToRemove = true 66 } 67 cleanImages(inst, images) 68 return nil 69 } 70 71 func ImportImages(inst *instance.Instance, olddoc *vfs.FileDoc) error { 72 inst.Logger().WithNamespace("notes"). 73 Infof("importing images from note: %s", olddoc.ID()) 74 schemaSpecs := DefaultSchemaSpecs() 75 specs := model.SchemaSpecFromJSON(schemaSpecs) 76 schema, err := model.NewSchema(&specs) 77 if err != nil { 78 return fmt.Errorf("failed to read note schema: %w", err) 79 } 80 81 fs := inst.VFS() 82 file, err := fs.OpenFile(olddoc) 83 if err != nil { 84 return fmt.Errorf("failed to open file for note images import: %w", err) 85 } 86 87 content, images, err := importReader(inst, olddoc, file, schema) 88 cleanImages(inst, images) // XXX: remove images found in the archive but not in the markdown 89 if cerr := file.Close(); cerr != nil { 90 return fmt.Errorf("error while closing note file: %w", cerr) 91 } 92 if content == nil || !hasImages(images) { 93 inst.Logger().WithNamespace("notes"). 94 Infof("No images to import") 95 return nil 96 } 97 98 md := markdownSerializer(images).Serialize(content) 99 body, err := buildArchive(inst, []byte(md), images) 100 if err != nil { 101 return fmt.Errorf("failed to build note archive: %w", err) 102 } 103 newdoc := olddoc.Clone().(*vfs.FileDoc) 104 newdoc.ByteSize = int64(len(body)) 105 newdoc.MD5Sum = nil 106 fillMetadata(newdoc, olddoc, schemaSpecs, content) 107 108 file, err = inst.VFS().CreateFile(newdoc, olddoc) 109 if err != nil { 110 return fmt.Errorf("failed to create file for note images import: %w", err) 111 } 112 _, err = file.Write(body) 113 if err != nil { 114 err = fmt.Errorf("failed to write updated note: %w", err) 115 } 116 if cerr := file.Close(); cerr != nil && err == nil { 117 err = fmt.Errorf("failed to close updated note file: %w", cerr) 118 } 119 120 return err 121 } 122 123 func importReader(inst *instance.Instance, doc *vfs.FileDoc, reader io.Reader, schema *model.Schema) (*model.Node, []*Image, error) { 124 buf := &bytes.Buffer{} 125 var hasImages bool 126 if _, err := io.CopyN(buf, reader, 512); err != nil { 127 if !errors.Is(err, io.EOF) { 128 return nil, nil, fmt.Errorf("failed to buffer note content: %w", err) 129 } 130 hasImages = false 131 } else { 132 hasImages = isTar(buf.Bytes()) 133 } 134 135 if !hasImages { 136 if _, err := buf.ReadFrom(reader); err != nil { 137 return nil, nil, err 138 } 139 content, err := parseFile(buf, schema) 140 return content, nil, err 141 } 142 143 var content *model.Node 144 var err error 145 var images []*Image 146 defer func() { 147 if err == nil && images != nil { 148 fixURLForProsemirrorImages(content, images) 149 } 150 }() 151 152 tr := tar.NewReader(io.MultiReader(buf, reader)) 153 for { 154 header, errh := tr.Next() 155 if errh != nil { 156 return content, images, errh 157 } 158 if header.Typeflag != tar.TypeReg { 159 continue 160 } 161 if header.Name == "index.md" { 162 content, err = parseFile(tr, schema) 163 if err != nil { 164 return nil, nil, fmt.Errorf("failed to parse note markdown: %w", err) 165 } 166 } else { 167 ext := path.Ext(header.Name) 168 contentType := filetype.ByExtension(ext) 169 upload, erru := NewImageUpload(inst, doc, header.Name, contentType) 170 if erru != nil { 171 err = fmt.Errorf("failed to create image upload for %s: %w", header.Name, erru) 172 } else { 173 _, errc := io.Copy(upload, tr) 174 if cerr := upload.Close(); cerr != nil && (errc == nil || errc == io.ErrUnexpectedEOF) { 175 errc = fmt.Errorf("failed to upload image %s: %w", header.Name, cerr) 176 } 177 if errc != nil { 178 err = errc 179 } else { 180 images = append(images, upload.Image) 181 } 182 } 183 } 184 } 185 } 186 187 func fixURLForProsemirrorImages(node *model.Node, images []*Image) { 188 if node.Type.Name == "media" { 189 name, _ := node.Attrs["alt"].(string) 190 for _, img := range images { 191 if img.originalName == name { 192 node.Attrs["url"] = img.DocID 193 img.seen = true 194 } 195 } 196 } 197 198 node.ForEach(func(child *model.Node, _ int, _ int) { 199 fixURLForProsemirrorImages(child, images) 200 }) 201 } 202 203 func fillMetadata(newdoc, olddoc *vfs.FileDoc, schemaSpecs map[string]interface{}, content *model.Node) { 204 version := 1 205 if olddoc != nil { 206 rev := strings.Split(olddoc.DocRev, "-")[0] 207 n, _ := strconv.Atoi(rev) 208 version = n * 1000 209 } 210 211 newdoc.Mime = consts.NoteMimeType 212 newdoc.Class = "text" 213 newdoc.Metadata = vfs.Metadata{ 214 "title": strings.TrimSuffix(newdoc.DocName, ".cozy-note"), 215 "content": content.ToJSON(), 216 "version": version, 217 "schema": schemaSpecs, 218 } 219 } 220 221 func parseFile(r io.Reader, schema *model.Schema) (*model.Node, error) { 222 buf, err := io.ReadAll(io.LimitReader(r, MaxMarkdownSize)) 223 if err != nil { 224 return nil, err 225 } 226 parser := markdownParser() 227 funcs := markdownNodeMapper() 228 return markdown.ParseMarkdown(parser, funcs, buf, schema) 229 } 230 231 func isTar(buf []byte) bool { 232 if len(buf) < 263 { 233 return false 234 } 235 // https://en.wikipedia.org/wiki/Tar_(computing)#UStar_format 236 return buf[257] == 'u' && buf[258] == 's' && buf[259] == 't' && 237 buf[260] == 'a' && buf[261] == 'r' && buf[262] == 0 238 }