github.com/cozy/cozy-stack@v0.0.0-20240327093429-939e4a21320e/pkg/couchdb/bulk.go (about)

     1  package couchdb
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"net/http"
     9  	"net/url"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/cozy/cozy-stack/pkg/config/config"
    14  	"github.com/cozy/cozy-stack/pkg/couchdb/revision"
    15  	"github.com/cozy/cozy-stack/pkg/logger"
    16  	"github.com/cozy/cozy-stack/pkg/prefixer"
    17  	"github.com/cozy/cozy-stack/pkg/realtime"
    18  	"github.com/google/go-querystring/query"
    19  )
    20  
    21  // AllDocsRequest is used to build a _all_docs request
    22  type AllDocsRequest struct {
    23  	Descending    bool     `url:"descending,omitempty"`
    24  	Limit         int      `url:"limit,omitempty"`
    25  	Skip          int      `url:"skip,omitempty"`
    26  	StartKey      string   `url:"startkey,omitempty"`
    27  	StartKeyDocID string   `url:"startkey_docid,omitempty"`
    28  	EndKey        string   `url:"endkey,omitempty"`
    29  	EndKeyDocID   string   `url:"endkey_docid,omitempty"`
    30  	Keys          []string `url:"keys,omitempty"`
    31  }
    32  
    33  // AllDocsResponse is the response we receive from an _all_docs request
    34  type AllDocsResponse struct {
    35  	Offset    int          `json:"offset"`
    36  	TotalRows int          `json:"total_rows"`
    37  	Rows      []AllDocsRow `json:"rows"`
    38  }
    39  
    40  // AllDocsRow is a row inside the _all_docs response
    41  type AllDocsRow struct {
    42  	ID  string          `json:"id"`
    43  	Doc json.RawMessage `json:"doc"`
    44  }
    45  
    46  // IDRev is used for the payload of POST _bulk_get
    47  type IDRev struct {
    48  	ID  string `json:"id"`
    49  	Rev string `json:"rev,omitempty"`
    50  }
    51  
    52  // BulkGetResponse is the response we receive from a _bulk_get request
    53  type BulkGetResponse struct {
    54  	Results []struct {
    55  		Docs []struct {
    56  			OK map[string]interface{} `json:"ok"`
    57  		} `json:"docs"`
    58  	} `json:"results"`
    59  }
    60  
    61  // CountAllDocs returns the number of documents of the given doctype.
    62  func CountAllDocs(db prefixer.Prefixer, doctype string) (int, error) {
    63  	var response AllDocsResponse
    64  	url := "_all_docs?limit=0"
    65  	err := makeRequest(db, doctype, http.MethodGet, url, nil, &response)
    66  	if err != nil {
    67  		return 0, err
    68  	}
    69  	return response.TotalRows, nil
    70  }
    71  
    72  // CountNormalDocs returns the number of documents of the given doctype,
    73  // and excludes the design docs from the count.
    74  func CountNormalDocs(db prefixer.Prefixer, doctype string) (int, error) {
    75  	var designRes ViewResponse
    76  	err := makeRequest(db, doctype, http.MethodGet, "_design_docs", nil, &designRes)
    77  	if err != nil {
    78  		return 0, err
    79  	}
    80  	total := designRes.Total
    81  	// CouchDB response for the total_rows on the _design_docs endpoint:
    82  	// - is the total number of documents on CouchDB 2.2 (and before)
    83  	// - is the total number of design documents on CouchDB 2.3+
    84  	// See https://github.com/apache/couchdb/issues/1603
    85  	if total == len(designRes.Rows) {
    86  		if total, err = CountAllDocs(db, doctype); err != nil {
    87  			return 0, err
    88  		}
    89  	}
    90  	return total - len(designRes.Rows), nil
    91  }
    92  
    93  // GetAllDocs returns all documents of a specified doctype. It filters
    94  // out the possible _design document.
    95  func GetAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) {
    96  	return getAllDocs(db, doctype, req, results, false)
    97  }
    98  
    99  // GetDesignDocs does the same as GetAllDocs, but it keeps the design docs.
   100  func GetDesignDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) {
   101  	return getAllDocs(db, doctype, req, results, true)
   102  }
   103  
   104  func getAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}, includeDesignDocs bool) (err error) {
   105  	var v url.Values
   106  	if req != nil {
   107  		v, err = req.Values()
   108  		if err != nil {
   109  			return err
   110  		}
   111  	} else {
   112  		v = make(url.Values)
   113  	}
   114  	v.Add("include_docs", "true")
   115  	var response AllDocsResponse
   116  	if req == nil || len(req.Keys) == 0 {
   117  		url := "_all_docs?" + v.Encode()
   118  		err = makeRequest(db, doctype, http.MethodGet, url, nil, &response)
   119  	} else {
   120  		v.Del("keys")
   121  		url := "_all_docs?" + v.Encode()
   122  		body := struct {
   123  			Keys []string `json:"keys"`
   124  		}{
   125  			Keys: req.Keys,
   126  		}
   127  		err = makeRequest(db, doctype, http.MethodPost, url, body, &response)
   128  	}
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	var docs []json.RawMessage
   134  	for _, row := range response.Rows {
   135  		if includeDesignDocs || !strings.HasPrefix(row.ID, "_design") {
   136  			docs = append(docs, row.Doc)
   137  		}
   138  	}
   139  	data, err := json.Marshal(docs)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	return json.Unmarshal(data, results)
   144  }
   145  
   146  func MakeAllDocsRequest(db prefixer.Prefixer, doctype string, params *AllDocsRequest) (io.ReadCloser, error) {
   147  	if len(params.Keys) > 0 {
   148  		return nil, errors.New("keys is not supported by MakeAllDocsRequest")
   149  	}
   150  	var v url.Values
   151  	var err error
   152  	if params != nil {
   153  		v, err = params.Values()
   154  		if err != nil {
   155  			return nil, err
   156  		}
   157  	} else {
   158  		v = make(url.Values)
   159  	}
   160  	v.Add("include_docs", "true")
   161  	path := "_all_docs?" + v.Encode()
   162  	method := http.MethodGet
   163  
   164  	log := logger.WithDomain(db.DomainName()).WithNamespace("couchdb")
   165  	if log.IsDebug() {
   166  		log.Debugf("request: %s %s %s", method, path, "")
   167  	}
   168  	req, err := buildCouchRequest(db, doctype, method, path, nil, nil)
   169  	if err != nil {
   170  		log.Error(err.Error())
   171  		return nil, err
   172  	}
   173  
   174  	start := time.Now()
   175  	resp, err := config.CouchClient().Do(req)
   176  	elapsed := time.Since(start)
   177  	// Possible err = mostly connection failure
   178  	if err != nil {
   179  		err = newConnectionError(err)
   180  		log.Error(err.Error())
   181  		return nil, err
   182  	}
   183  
   184  	if elapsed.Seconds() >= 10 {
   185  		log.Infof("slow request on %s %s (%s)", method, path, elapsed)
   186  	}
   187  
   188  	if err = handleResponseError(db, resp); err != nil {
   189  		return nil, err
   190  	}
   191  	return resp.Body, nil
   192  }
   193  
   194  // ForeachDocs traverse all the documents from the given database with the
   195  // specified doctype and calls a function for each document.
   196  func ForeachDocs(db prefixer.Prefixer, doctype string, fn func(id string, doc json.RawMessage) error) error {
   197  	return ForeachDocsWithCustomPagination(db, doctype, 100, fn)
   198  }
   199  
   200  // ForeachDocsWithCustomPagination traverse all the documents from the given
   201  // database, and calls a function for each document. The documents are fetched
   202  // from CouchDB with a pagination with a custom number of items per page.
   203  func ForeachDocsWithCustomPagination(db prefixer.Prefixer, doctype string, limit int, fn func(id string, doc json.RawMessage) error) error {
   204  	var startKey string
   205  	for {
   206  		skip := 0
   207  		if startKey != "" {
   208  			skip = 1
   209  		}
   210  		req := &AllDocsRequest{
   211  			StartKeyDocID: startKey,
   212  			Skip:          skip,
   213  			Limit:         limit,
   214  		}
   215  		v, err := query.Values(req)
   216  		if err != nil {
   217  			return err
   218  		}
   219  		v.Add("include_docs", "true")
   220  
   221  		var res AllDocsResponse
   222  		url := "_all_docs?" + v.Encode()
   223  		err = makeRequest(db, doctype, http.MethodGet, url, nil, &res)
   224  		if err != nil {
   225  			return err
   226  		}
   227  
   228  		startKey = ""
   229  		for _, row := range res.Rows {
   230  			if !strings.HasPrefix(row.ID, "_design") {
   231  				if err = fn(row.ID, row.Doc); err != nil {
   232  					return err
   233  				}
   234  			}
   235  			startKey = row.ID
   236  		}
   237  		if len(res.Rows) < limit {
   238  			break
   239  		}
   240  	}
   241  
   242  	return nil
   243  }
   244  
   245  // BulkGetDocs returns the documents with the given id at the given revision
   246  func BulkGetDocs(db prefixer.Prefixer, doctype string, payload []IDRev) ([]map[string]interface{}, error) {
   247  	path := "_bulk_get?revs=true"
   248  	body := struct {
   249  		Docs []IDRev `json:"docs"`
   250  	}{
   251  		Docs: payload,
   252  	}
   253  	var response BulkGetResponse
   254  	err := makeRequest(db, doctype, http.MethodPost, path, body, &response)
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  	results := make([]map[string]interface{}, 0, len(response.Results))
   259  	for _, r := range response.Results {
   260  		for _, doc := range r.Docs {
   261  			if doc.OK != nil {
   262  				results = append(results, doc.OK)
   263  			}
   264  		}
   265  	}
   266  	return results, nil
   267  }
   268  
   269  // BulkUpdateDocs is used to update several docs in one call, as a bulk.
   270  // olddocs parameter is used for realtime / event triggers.
   271  func BulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error {
   272  	if len(docs) == 0 {
   273  		return nil
   274  	}
   275  
   276  	remaining := docs
   277  	olds := olddocs
   278  	for len(remaining) > 0 {
   279  		n := 1000
   280  		if len(remaining) < n {
   281  			n = len(remaining)
   282  		}
   283  		bulkDocs := remaining[:n]
   284  		remaining = remaining[n:]
   285  		bulkOlds := olds[:n]
   286  		olds = olds[n:]
   287  		if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil {
   288  			if IsNoDatabaseError(err) {
   289  				if err := EnsureDBExist(db, doctype); err != nil {
   290  					return err
   291  				}
   292  			}
   293  			// If it fails once, try again
   294  			time.Sleep(1 * time.Second)
   295  			if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil {
   296  				return err
   297  			}
   298  		}
   299  	}
   300  	return nil
   301  }
   302  
   303  func bulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error {
   304  	body := struct {
   305  		Docs []interface{} `json:"docs"`
   306  	}{
   307  		Docs: docs,
   308  	}
   309  	var res []UpdateResponse
   310  	if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil {
   311  		return err
   312  	}
   313  	if len(res) != len(docs) {
   314  		return errors.New("BulkUpdateDoc receive an unexpected number of responses")
   315  	}
   316  	logBulk(db, "BulkUpdateDocs", doctype, res)
   317  	for i, doc := range docs {
   318  		if d, ok := doc.(Doc); ok {
   319  			update := res[i]
   320  			if update.Error != "" {
   321  				logger.WithDomain(db.DomainName()).WithNamespace("couchdb").
   322  					Warnf("bulkUpdateDocs error for %s %s: %s - %s", doctype, update.ID, update.Error, update.Reason)
   323  			}
   324  			if update.ID == "" || update.Rev == "" || !update.Ok {
   325  				continue
   326  			}
   327  			event := realtime.EventUpdate
   328  			if d.Rev() == "" {
   329  				event = realtime.EventCreate
   330  				d.SetID(update.ID)
   331  			}
   332  			d.SetRev(update.Rev)
   333  			if old, ok := olddocs[i].(Doc); ok {
   334  				RTEvent(db, realtime.EventUpdate, d, old)
   335  			} else {
   336  				RTEvent(db, event, d, nil)
   337  			}
   338  		}
   339  	}
   340  	return nil
   341  }
   342  
   343  // BulkDeleteDocs is used to delete serveral documents in one call.
   344  func BulkDeleteDocs(db prefixer.Prefixer, doctype string, docs []Doc) error {
   345  	if len(docs) == 0 {
   346  		return nil
   347  	}
   348  	body := struct {
   349  		Docs []json.RawMessage `json:"docs"`
   350  	}{
   351  		Docs: make([]json.RawMessage, 0, len(docs)),
   352  	}
   353  	for _, doc := range docs {
   354  		body.Docs = append(body.Docs, json.RawMessage(
   355  			fmt.Sprintf(`{"_id":"%s","_rev":"%s","_deleted":true}`, doc.ID(), doc.Rev()),
   356  		))
   357  	}
   358  	var res []UpdateResponse
   359  	if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil {
   360  		return err
   361  	}
   362  	for i, doc := range docs {
   363  		doc.SetRev(res[i].Rev)
   364  		RTEvent(db, realtime.EventDelete, doc, nil)
   365  	}
   366  	logBulk(db, "BulkDeleteDocs", doctype, docs)
   367  	return nil
   368  }
   369  
   370  // BulkForceUpdateDocs is used to update several docs in one call, and to force
   371  // the revisions history. It is used by replications.
   372  func BulkForceUpdateDocs(db prefixer.Prefixer, doctype string, docs []map[string]interface{}) error {
   373  	if len(docs) == 0 {
   374  		return nil
   375  	}
   376  	body := struct {
   377  		NewEdits bool                     `json:"new_edits"`
   378  		Docs     []map[string]interface{} `json:"docs"`
   379  	}{
   380  		NewEdits: false,
   381  		Docs:     docs,
   382  	}
   383  	logBulk(db, "BulkForceUpdateDocs", doctype, docs)
   384  	// XXX CouchDB returns just an empty array when new_edits is false, so we
   385  	// ignore the response
   386  	return makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, nil)
   387  }
   388  
   389  func logBulk(db prefixer.Prefixer, prefix, doctype string, docs interface{}) {
   390  	extracted := make([]string, 0, 1000)
   391  	if documents, ok := docs.([]Doc); ok {
   392  		for _, doc := range documents {
   393  			id := doc.ID()
   394  			rev := revision.Generation(doc.Rev())
   395  			extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev))
   396  		}
   397  	} else if updates, ok := docs.([]UpdateResponse); ok {
   398  		for _, update := range updates {
   399  			id := update.ID
   400  			rev := revision.Generation(update.Rev)
   401  			extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev))
   402  		}
   403  	} else if maps, ok := docs.([]map[string]interface{}); ok {
   404  		for _, doc := range maps {
   405  			id, _ := doc["_id"].(string)
   406  			extracted = append(extracted, id)
   407  		}
   408  	}
   409  
   410  	var messages []string
   411  	for len(extracted) > 0 {
   412  		nb := len(extracted)
   413  		// We limit the number of ids per log to avoid the line width limit.
   414  		if nb > 50 {
   415  			nb = 50
   416  		}
   417  		msg := strings.Join(extracted[:nb], " ")
   418  		messages = append(messages, msg)
   419  		extracted = extracted[nb:]
   420  	}
   421  
   422  	for _, msg := range messages {
   423  		logger.WithDomain(db.DomainName()).WithNamespace("couchdb").
   424  			Infof("%s for %s: %s", prefix, doctype, msg)
   425  	}
   426  }