github.com/elliott5/community@v0.14.1-0.20160709191136-823126fb026a/documize/api/endpoint/conversion_endpoint.go (about) 1 // Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved. 2 // 3 // This software (Documize Community Edition) is licensed under 4 // GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html 5 // 6 // You can operate outside the AGPL restrictions by purchasing 7 // Documize Enterprise Edition and obtaining a commercial license 8 // by contacting <sales@documize.com>. 9 // 10 // https://documize.com 11 12 package endpoint 13 14 import ( 15 "bytes" 16 "encoding/json" 17 "errors" 18 "fmt" 19 "io" 20 "net/http" 21 "strings" 22 23 "github.com/documize/community/documize/api/endpoint/models" 24 "github.com/documize/community/documize/api/entity" 25 "github.com/documize/community/documize/api/request" 26 "github.com/documize/community/documize/api/store" 27 "github.com/documize/community/documize/api/util" 28 "github.com/documize/community/wordsmith/api" 29 "github.com/documize/community/wordsmith/log" 30 31 uuid "github.com/nu7hatch/gouuid" 32 33 "github.com/gorilla/mux" 34 ) 35 36 func uploadDocument(w http.ResponseWriter, r *http.Request) (string, string, string) { 37 method := "uploadDocument" 38 p := request.GetPersister(r) 39 40 params := mux.Vars(r) 41 folderID := params["folderID"] 42 43 if !p.CanUploadDocument(folderID) { 44 writeForbiddenError(w) 45 return "", "", "" 46 } 47 48 // grab file 49 filedata, filename, err := r.FormFile("attachment") 50 51 if err != nil { 52 writeMissingDataError(w, method, "attachment") 53 return "", "", "" 54 } 55 56 b := new(bytes.Buffer) 57 _, err = io.Copy(b, filedata) 58 59 if err != nil { 60 writeServerError(w, method, err) 61 return "", "", "" 62 } 63 64 // generate job id 65 var job = "some-uuid" 66 67 newUUID, err := uuid.NewV4() 68 69 if err != nil { 70 writeServerError(w, method, err) 71 return "", "", "" 72 } 73 74 job = newUUID.String() 75 76 err = storageProvider.Upload(job, filename.Filename, b.Bytes()) 77 78 if err != nil { 79 writeServerError(w, method, err) 80 return "", "", "" 81 } 82 83 log.Info(fmt.Sprintf("Org %s (%s) [Uploaded] %s", p.Context.OrgName, p.Context.OrgID, filename.Filename)) 84 85 return job, folderID, p.Context.OrgID 86 } 87 88 func convertDocument(w http.ResponseWriter, r *http.Request, job, folderID string, conversion api.ConversionJobRequest) { 89 method := "convertDocument" 90 p := request.GetPersister(r) 91 92 var fileResult *api.DocumentConversionResponse 93 var filename string 94 var err error 95 96 filename, fileResult, err = storageProvider.Convert(conversion) 97 98 if err != nil { 99 writePayloadError(w, method, err) 100 return 101 } 102 103 if fileResult.Err != "" { 104 writeGeneralSQLError(w, method, errors.New(fileResult.Err)) 105 return 106 } 107 108 // NOTE: empty .docx documents trigger this error 109 if len(fileResult.Pages) == 0 { 110 writeMissingDataError(w, method, "no pages in document") 111 return 112 } 113 114 // All the commented-out code below should be in following function call 115 116 newDocument, err := processDocument(p, filename, job, folderID, fileResult) 117 118 if err != nil { 119 writeServerError(w, method, err) 120 return 121 } 122 123 json, err := json.Marshal(newDocument) 124 125 if err != nil { 126 writeJSONMarshalError(w, method, "conversion", err) 127 return 128 } 129 130 writeSuccessBytes(w, json) 131 } 132 133 // UploadConvertDocument is an endpoint to both upload and convert a document 134 func UploadConvertDocument(w http.ResponseWriter, r *http.Request) { 135 job, folderID, orgID := uploadDocument(w, r) 136 if job == "" { 137 return // error already handled 138 } 139 convertDocument(w, r, job, folderID, api.ConversionJobRequest{ 140 Job: job, 141 IndexDepth: 4, 142 OrgID: orgID, 143 }) 144 } 145 146 func processDocument(p request.Persister, filename, job, folderID string, fileResult *api.DocumentConversionResponse) (newDocument entity.Document, err error) { 147 // Convert into database objects 148 document := store.ConvertFileResult(filename, fileResult) 149 document.Job = job 150 document.OrgID = p.Context.OrgID 151 document.LabelID = folderID 152 document.UserID = p.Context.UserID 153 documentID := util.UniqueID() 154 document.RefID = documentID 155 156 tx, err := request.Db.Beginx() 157 158 log.IfErr(err) 159 160 p.Context.Transaction = tx 161 162 err = p.AddDocument(document) 163 164 if err != nil { 165 log.IfErr(tx.Rollback()) 166 log.Error("Cannot insert new document", err) 167 return 168 } 169 170 //err = processPage(documentID, fileResult.PageFiles, fileResult.Pages.Children[0], 1, p) 171 172 for k, v := range fileResult.Pages { 173 var page entity.Page 174 page.OrgID = p.Context.OrgID 175 page.DocumentID = documentID 176 page.Level = v.Level 177 page.Title = v.Title 178 page.Body = string(v.Body) 179 page.Sequence = float64(k+1) * 1024.0 // need to start above 0 to allow insertion before the first item 180 pageID := util.UniqueID() 181 page.RefID = pageID 182 183 meta := entity.PageMeta{} 184 meta.PageID = pageID 185 meta.RawBody = page.Body 186 187 model := models.PageModel{} 188 model.Page = page 189 model.Meta = meta 190 191 err = p.AddPage(model) 192 193 if err != nil { 194 log.IfErr(tx.Rollback()) 195 log.Error("Cannot process page newly added document", err) 196 return 197 } 198 } 199 200 for _, e := range fileResult.EmbeddedFiles { 201 //fmt.Println("DEBUG embedded file info", document.OrgId, document.Job, e.Name, len(e.Data), e.ID) 202 var a entity.Attachment 203 a.DocumentID = documentID 204 a.Job = document.Job 205 a.FileID = e.ID 206 a.Filename = strings.Replace(e.Name, "embeddings/", "", 1) 207 a.Data = e.Data 208 refID := util.UniqueID() 209 a.RefID = refID 210 211 err = p.AddAttachment(a) 212 213 if err != nil { 214 log.IfErr(tx.Rollback()) 215 log.Error("Cannot add attachment for newly added document", err) 216 return 217 } 218 } 219 220 log.IfErr(tx.Commit()) 221 222 newDocument, err = p.GetDocument(documentID) 223 224 if err != nil { 225 log.Error("Cannot fetch newly added document", err) 226 return 227 } 228 229 // New code from normal conversion code 230 231 tx, err = request.Db.Beginx() 232 233 if err != nil { 234 log.Error("Cannot begin a transatcion", err) 235 return 236 } 237 238 p.Context.Transaction = tx 239 240 err = p.UpdateDocument(newDocument) // TODO review - this seems to write-back an unaltered record from that read above, but within that it calls searches.UpdateDocument() to reindex the doc. 241 242 if err != nil { 243 log.IfErr(tx.Rollback()) 244 log.Error("Cannot update an imported document", err) 245 return 246 } 247 248 log.IfErr(tx.Commit()) 249 250 // End new code 251 252 return 253 }