github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/pkg/couchdb/bulk.go (about) 1 package couchdb 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "io" 8 "net/http" 9 "net/url" 10 "strings" 11 "time" 12 13 "github.com/cozy/cozy-stack/pkg/config/config" 14 "github.com/cozy/cozy-stack/pkg/couchdb/revision" 15 "github.com/cozy/cozy-stack/pkg/logger" 16 "github.com/cozy/cozy-stack/pkg/prefixer" 17 "github.com/cozy/cozy-stack/pkg/realtime" 18 "github.com/google/go-querystring/query" 19 ) 20 21 // AllDocsRequest is used to build a _all_docs request 22 type AllDocsRequest struct { 23 Descending bool `url:"descending,omitempty"` 24 Limit int `url:"limit,omitempty"` 25 Skip int `url:"skip,omitempty"` 26 StartKey string `url:"startkey,omitempty"` 27 EndKey string `url:"endkey,omitempty"` 28 Keys []string `url:"keys,omitempty"` 29 } 30 31 // AllDocsResponse is the response we receive from an _all_docs request 32 type AllDocsResponse struct { 33 Offset int `json:"offset"` 34 TotalRows int `json:"total_rows"` 35 Rows []AllDocsRow `json:"rows"` 36 } 37 38 // AllDocsRow is a row inside the _all_docs response 39 type AllDocsRow struct { 40 ID string `json:"id"` 41 Doc json.RawMessage `json:"doc"` 42 } 43 44 // IDRev is used for the payload of POST _bulk_get 45 type IDRev struct { 46 ID string `json:"id"` 47 Rev string `json:"rev,omitempty"` 48 } 49 50 // BulkGetResponse is the response we receive from a _bulk_get request 51 type BulkGetResponse struct { 52 Results []struct { 53 Docs []struct { 54 OK map[string]interface{} `json:"ok"` 55 } `json:"docs"` 56 } `json:"results"` 57 } 58 59 // CountAllDocs returns the number of documents of the given doctype. 60 func CountAllDocs(db prefixer.Prefixer, doctype string) (int, error) { 61 var response AllDocsResponse 62 url := "_all_docs?limit=0" 63 err := makeRequest(db, doctype, http.MethodGet, url, nil, &response) 64 if err != nil { 65 return 0, err 66 } 67 return response.TotalRows, nil 68 } 69 70 // CountNormalDocs returns the number of documents of the given doctype, 71 // and excludes the design docs from the count. 72 func CountNormalDocs(db prefixer.Prefixer, doctype string) (int, error) { 73 var designRes ViewResponse 74 err := makeRequest(db, doctype, http.MethodGet, "_design_docs", nil, &designRes) 75 if err != nil { 76 return 0, err 77 } 78 total := designRes.Total 79 // CouchDB response for the total_rows on the _design_docs endpoint: 80 // - is the total number of documents on CouchDB 2.2 (and before) 81 // - is the total number of design documents on CouchDB 2.3+ 82 // See https://github.com/apache/couchdb/issues/1603 83 if total == len(designRes.Rows) { 84 if total, err = CountAllDocs(db, doctype); err != nil { 85 return 0, err 86 } 87 } 88 return total - len(designRes.Rows), nil 89 } 90 91 // GetAllDocs returns all documents of a specified doctype. It filters 92 // out the possible _design document. 93 func GetAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) { 94 return getAllDocs(db, doctype, req, results, false) 95 } 96 97 // GetDesignDocs does the same as GetAllDocs, but it keeps the design docs. 98 func GetDesignDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) { 99 return getAllDocs(db, doctype, req, results, true) 100 } 101 102 func getAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}, includeDesignDocs bool) (err error) { 103 var v url.Values 104 if req != nil { 105 v, err = req.Values() 106 if err != nil { 107 return err 108 } 109 } else { 110 v = make(url.Values) 111 } 112 v.Add("include_docs", "true") 113 var response AllDocsResponse 114 if req == nil || len(req.Keys) == 0 { 115 url := "_all_docs?" + v.Encode() 116 err = makeRequest(db, doctype, http.MethodGet, url, nil, &response) 117 } else { 118 v.Del("keys") 119 url := "_all_docs?" + v.Encode() 120 body := struct { 121 Keys []string `json:"keys"` 122 }{ 123 Keys: req.Keys, 124 } 125 err = makeRequest(db, doctype, http.MethodPost, url, body, &response) 126 } 127 if err != nil { 128 return err 129 } 130 131 var docs []json.RawMessage 132 for _, row := range response.Rows { 133 if includeDesignDocs || !strings.HasPrefix(row.ID, "_design") { 134 docs = append(docs, row.Doc) 135 } 136 } 137 data, err := json.Marshal(docs) 138 if err != nil { 139 return err 140 } 141 return json.Unmarshal(data, results) 142 } 143 144 func MakeAllDocsRequest(db prefixer.Prefixer, doctype string, params *AllDocsRequest) (io.ReadCloser, error) { 145 if len(params.Keys) > 0 { 146 return nil, errors.New("keys is not supported by MakeAllDocsRequest") 147 } 148 var v url.Values 149 var err error 150 if params != nil { 151 v, err = params.Values() 152 if err != nil { 153 return nil, err 154 } 155 } else { 156 v = make(url.Values) 157 } 158 v.Add("include_docs", "true") 159 path := "_all_docs?" + v.Encode() 160 method := http.MethodGet 161 162 log := logger.WithDomain(db.DomainName()).WithNamespace("couchdb") 163 if log.IsDebug() { 164 log.Debugf("request: %s %s %s", method, path, "") 165 } 166 req, err := buildCouchRequest(db, doctype, method, path, nil, nil) 167 if err != nil { 168 log.Error(err.Error()) 169 return nil, err 170 } 171 172 start := time.Now() 173 resp, err := config.CouchClient().Do(req) 174 elapsed := time.Since(start) 175 // Possible err = mostly connection failure 176 if err != nil { 177 err = newConnectionError(err) 178 log.Error(err.Error()) 179 return nil, err 180 } 181 182 if elapsed.Seconds() >= 10 { 183 log.Infof("slow request on %s %s (%s)", method, path, elapsed) 184 } 185 186 if err = handleResponseError(db, resp); err != nil { 187 return nil, err 188 } 189 return resp.Body, nil 190 } 191 192 // ForeachDocs traverse all the documents from the given database with the 193 // specified doctype and calls a function for each document. 194 func ForeachDocs(db prefixer.Prefixer, doctype string, fn func(id string, doc json.RawMessage) error) error { 195 return ForeachDocsWithCustomPagination(db, doctype, 100, fn) 196 } 197 198 // ForeachDocsWithCustomPagination traverse all the documents from the given 199 // database, and calls a function for each document. The documents are fetched 200 // from CouchDB with a pagination with a custom number of items per page. 201 func ForeachDocsWithCustomPagination(db prefixer.Prefixer, doctype string, limit int, fn func(id string, doc json.RawMessage) error) error { 202 var startKey string 203 for { 204 skip := 0 205 if startKey != "" { 206 skip = 1 207 } 208 req := &AllDocsRequest{ 209 StartKey: `"` + startKey + `"`, 210 Skip: skip, 211 Limit: limit, 212 } 213 v, err := query.Values(req) 214 if err != nil { 215 return err 216 } 217 v.Add("include_docs", "true") 218 219 var res AllDocsResponse 220 url := "_all_docs?" + v.Encode() 221 err = makeRequest(db, doctype, http.MethodGet, url, nil, &res) 222 if err != nil { 223 return err 224 } 225 226 startKey = "" 227 for _, row := range res.Rows { 228 if !strings.HasPrefix(row.ID, "_design") { 229 if err = fn(row.ID, row.Doc); err != nil { 230 return err 231 } 232 } 233 startKey = row.ID 234 } 235 if len(res.Rows) < limit { 236 break 237 } 238 } 239 240 return nil 241 } 242 243 // BulkGetDocs returns the documents with the given id at the given revision 244 func BulkGetDocs(db prefixer.Prefixer, doctype string, payload []IDRev) ([]map[string]interface{}, error) { 245 path := "_bulk_get?revs=true" 246 body := struct { 247 Docs []IDRev `json:"docs"` 248 }{ 249 Docs: payload, 250 } 251 var response BulkGetResponse 252 err := makeRequest(db, doctype, http.MethodPost, path, body, &response) 253 if err != nil { 254 return nil, err 255 } 256 results := make([]map[string]interface{}, 0, len(response.Results)) 257 for _, r := range response.Results { 258 for _, doc := range r.Docs { 259 if doc.OK != nil { 260 results = append(results, doc.OK) 261 } 262 } 263 } 264 return results, nil 265 } 266 267 // BulkUpdateDocs is used to update several docs in one call, as a bulk. 268 // olddocs parameter is used for realtime / event triggers. 269 func BulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error { 270 if len(docs) == 0 { 271 return nil 272 } 273 274 remaining := docs 275 olds := olddocs 276 for len(remaining) > 0 { 277 n := 1000 278 if len(remaining) < n { 279 n = len(remaining) 280 } 281 bulkDocs := remaining[:n] 282 remaining = remaining[n:] 283 bulkOlds := olds[:n] 284 olds = olds[n:] 285 if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil { 286 if IsNoDatabaseError(err) { 287 if err := EnsureDBExist(db, doctype); err != nil { 288 return err 289 } 290 } 291 // If it fails once, try again 292 time.Sleep(1 * time.Second) 293 if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil { 294 return err 295 } 296 } 297 } 298 return nil 299 } 300 301 func bulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error { 302 body := struct { 303 Docs []interface{} `json:"docs"` 304 }{ 305 Docs: docs, 306 } 307 var res []UpdateResponse 308 if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil { 309 return err 310 } 311 if len(res) != len(docs) { 312 return errors.New("BulkUpdateDoc receive an unexpected number of responses") 313 } 314 logBulk(db, "BulkUpdateDocs", doctype, res) 315 for i, doc := range docs { 316 if d, ok := doc.(Doc); ok { 317 update := res[i] 318 if update.Error != "" { 319 logger.WithDomain(db.DomainName()).WithNamespace("couchdb"). 320 Warnf("bulkUpdateDocs error for %s %s: %s - %s", doctype, update.ID, update.Error, update.Reason) 321 } 322 if update.ID == "" || update.Rev == "" || !update.Ok { 323 continue 324 } 325 event := realtime.EventUpdate 326 if d.Rev() == "" { 327 event = realtime.EventCreate 328 d.SetID(update.ID) 329 } 330 d.SetRev(update.Rev) 331 if old, ok := olddocs[i].(Doc); ok { 332 RTEvent(db, realtime.EventUpdate, d, old) 333 } else { 334 RTEvent(db, event, d, nil) 335 } 336 } 337 } 338 return nil 339 } 340 341 // BulkDeleteDocs is used to delete serveral documents in one call. 342 func BulkDeleteDocs(db prefixer.Prefixer, doctype string, docs []Doc) error { 343 if len(docs) == 0 { 344 return nil 345 } 346 body := struct { 347 Docs []json.RawMessage `json:"docs"` 348 }{ 349 Docs: make([]json.RawMessage, 0, len(docs)), 350 } 351 for _, doc := range docs { 352 body.Docs = append(body.Docs, json.RawMessage( 353 fmt.Sprintf(`{"_id":"%s","_rev":"%s","_deleted":true}`, doc.ID(), doc.Rev()), 354 )) 355 } 356 var res []UpdateResponse 357 if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil { 358 return err 359 } 360 for i, doc := range docs { 361 doc.SetRev(res[i].Rev) 362 RTEvent(db, realtime.EventDelete, doc, nil) 363 } 364 logBulk(db, "BulkDeleteDocs", doctype, docs) 365 return nil 366 } 367 368 // BulkForceUpdateDocs is used to update several docs in one call, and to force 369 // the revisions history. It is used by replications. 370 func BulkForceUpdateDocs(db prefixer.Prefixer, doctype string, docs []map[string]interface{}) error { 371 if len(docs) == 0 { 372 return nil 373 } 374 body := struct { 375 NewEdits bool `json:"new_edits"` 376 Docs []map[string]interface{} `json:"docs"` 377 }{ 378 NewEdits: false, 379 Docs: docs, 380 } 381 logBulk(db, "BulkForceUpdateDocs", doctype, docs) 382 // XXX CouchDB returns just an empty array when new_edits is false, so we 383 // ignore the response 384 return makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, nil) 385 } 386 387 func logBulk(db prefixer.Prefixer, prefix, doctype string, docs interface{}) { 388 extracted := make([]string, 0, 1000) 389 if documents, ok := docs.([]Doc); ok { 390 for _, doc := range documents { 391 id := doc.ID() 392 rev := revision.Generation(doc.Rev()) 393 extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev)) 394 } 395 } else if updates, ok := docs.([]UpdateResponse); ok { 396 for _, update := range updates { 397 id := update.ID 398 rev := revision.Generation(update.Rev) 399 extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev)) 400 } 401 } else if maps, ok := docs.([]map[string]interface{}); ok { 402 for _, doc := range maps { 403 id, _ := doc["_id"].(string) 404 extracted = append(extracted, id) 405 } 406 } 407 408 var messages []string 409 for len(extracted) > 0 { 410 nb := len(extracted) 411 // We limit the number of ids per log to avoid the line width limit. 412 if nb > 50 { 413 nb = 50 414 } 415 msg := strings.Join(extracted[:nb], " ") 416 messages = append(messages, msg) 417 extracted = extracted[nb:] 418 } 419 420 for _, msg := range messages { 421 logger.WithDomain(db.DomainName()).WithNamespace("couchdb"). 422 Infof("%s for %s: %s", prefix, doctype, msg) 423 } 424 }