github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/pkg/couchdb/bulk.go (about)

     1  package couchdb
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"net/http"
     9  	"net/url"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/cozy/cozy-stack/pkg/config/config"
    14  	"github.com/cozy/cozy-stack/pkg/couchdb/revision"
    15  	"github.com/cozy/cozy-stack/pkg/logger"
    16  	"github.com/cozy/cozy-stack/pkg/prefixer"
    17  	"github.com/cozy/cozy-stack/pkg/realtime"
    18  	"github.com/google/go-querystring/query"
    19  )
    20  
    21  // AllDocsRequest is used to build a _all_docs request
    22  type AllDocsRequest struct {
    23  	Descending bool     `url:"descending,omitempty"`
    24  	Limit      int      `url:"limit,omitempty"`
    25  	Skip       int      `url:"skip,omitempty"`
    26  	StartKey   string   `url:"startkey,omitempty"`
    27  	EndKey     string   `url:"endkey,omitempty"`
    28  	Keys       []string `url:"keys,omitempty"`
    29  }
    30  
    31  // AllDocsResponse is the response we receive from an _all_docs request
    32  type AllDocsResponse struct {
    33  	Offset    int          `json:"offset"`
    34  	TotalRows int          `json:"total_rows"`
    35  	Rows      []AllDocsRow `json:"rows"`
    36  }
    37  
    38  // AllDocsRow is a row inside the _all_docs response
    39  type AllDocsRow struct {
    40  	ID  string          `json:"id"`
    41  	Doc json.RawMessage `json:"doc"`
    42  }
    43  
    44  // IDRev is used for the payload of POST _bulk_get
    45  type IDRev struct {
    46  	ID  string `json:"id"`
    47  	Rev string `json:"rev,omitempty"`
    48  }
    49  
    50  // BulkGetResponse is the response we receive from a _bulk_get request
    51  type BulkGetResponse struct {
    52  	Results []struct {
    53  		Docs []struct {
    54  			OK map[string]interface{} `json:"ok"`
    55  		} `json:"docs"`
    56  	} `json:"results"`
    57  }
    58  
    59  // CountAllDocs returns the number of documents of the given doctype.
    60  func CountAllDocs(db prefixer.Prefixer, doctype string) (int, error) {
    61  	var response AllDocsResponse
    62  	url := "_all_docs?limit=0"
    63  	err := makeRequest(db, doctype, http.MethodGet, url, nil, &response)
    64  	if err != nil {
    65  		return 0, err
    66  	}
    67  	return response.TotalRows, nil
    68  }
    69  
    70  // CountNormalDocs returns the number of documents of the given doctype,
    71  // and excludes the design docs from the count.
    72  func CountNormalDocs(db prefixer.Prefixer, doctype string) (int, error) {
    73  	var designRes ViewResponse
    74  	err := makeRequest(db, doctype, http.MethodGet, "_design_docs", nil, &designRes)
    75  	if err != nil {
    76  		return 0, err
    77  	}
    78  	total := designRes.Total
    79  	// CouchDB response for the total_rows on the _design_docs endpoint:
    80  	// - is the total number of documents on CouchDB 2.2 (and before)
    81  	// - is the total number of design documents on CouchDB 2.3+
    82  	// See https://github.com/apache/couchdb/issues/1603
    83  	if total == len(designRes.Rows) {
    84  		if total, err = CountAllDocs(db, doctype); err != nil {
    85  			return 0, err
    86  		}
    87  	}
    88  	return total - len(designRes.Rows), nil
    89  }
    90  
    91  // GetAllDocs returns all documents of a specified doctype. It filters
    92  // out the possible _design document.
    93  func GetAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) {
    94  	return getAllDocs(db, doctype, req, results, false)
    95  }
    96  
    97  // GetDesignDocs does the same as GetAllDocs, but it keeps the design docs.
    98  func GetDesignDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}) (err error) {
    99  	return getAllDocs(db, doctype, req, results, true)
   100  }
   101  
   102  func getAllDocs(db prefixer.Prefixer, doctype string, req *AllDocsRequest, results interface{}, includeDesignDocs bool) (err error) {
   103  	var v url.Values
   104  	if req != nil {
   105  		v, err = req.Values()
   106  		if err != nil {
   107  			return err
   108  		}
   109  	} else {
   110  		v = make(url.Values)
   111  	}
   112  	v.Add("include_docs", "true")
   113  	var response AllDocsResponse
   114  	if req == nil || len(req.Keys) == 0 {
   115  		url := "_all_docs?" + v.Encode()
   116  		err = makeRequest(db, doctype, http.MethodGet, url, nil, &response)
   117  	} else {
   118  		v.Del("keys")
   119  		url := "_all_docs?" + v.Encode()
   120  		body := struct {
   121  			Keys []string `json:"keys"`
   122  		}{
   123  			Keys: req.Keys,
   124  		}
   125  		err = makeRequest(db, doctype, http.MethodPost, url, body, &response)
   126  	}
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	var docs []json.RawMessage
   132  	for _, row := range response.Rows {
   133  		if includeDesignDocs || !strings.HasPrefix(row.ID, "_design") {
   134  			docs = append(docs, row.Doc)
   135  		}
   136  	}
   137  	data, err := json.Marshal(docs)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	return json.Unmarshal(data, results)
   142  }
   143  
   144  func MakeAllDocsRequest(db prefixer.Prefixer, doctype string, params *AllDocsRequest) (io.ReadCloser, error) {
   145  	if len(params.Keys) > 0 {
   146  		return nil, errors.New("keys is not supported by MakeAllDocsRequest")
   147  	}
   148  	var v url.Values
   149  	var err error
   150  	if params != nil {
   151  		v, err = params.Values()
   152  		if err != nil {
   153  			return nil, err
   154  		}
   155  	} else {
   156  		v = make(url.Values)
   157  	}
   158  	v.Add("include_docs", "true")
   159  	path := "_all_docs?" + v.Encode()
   160  	method := http.MethodGet
   161  
   162  	log := logger.WithDomain(db.DomainName()).WithNamespace("couchdb")
   163  	if log.IsDebug() {
   164  		log.Debugf("request: %s %s %s", method, path, "")
   165  	}
   166  	req, err := buildCouchRequest(db, doctype, method, path, nil, nil)
   167  	if err != nil {
   168  		log.Error(err.Error())
   169  		return nil, err
   170  	}
   171  
   172  	start := time.Now()
   173  	resp, err := config.CouchClient().Do(req)
   174  	elapsed := time.Since(start)
   175  	// Possible err = mostly connection failure
   176  	if err != nil {
   177  		err = newConnectionError(err)
   178  		log.Error(err.Error())
   179  		return nil, err
   180  	}
   181  
   182  	if elapsed.Seconds() >= 10 {
   183  		log.Infof("slow request on %s %s (%s)", method, path, elapsed)
   184  	}
   185  
   186  	if err = handleResponseError(db, resp); err != nil {
   187  		return nil, err
   188  	}
   189  	return resp.Body, nil
   190  }
   191  
   192  // ForeachDocs traverse all the documents from the given database with the
   193  // specified doctype and calls a function for each document.
   194  func ForeachDocs(db prefixer.Prefixer, doctype string, fn func(id string, doc json.RawMessage) error) error {
   195  	return ForeachDocsWithCustomPagination(db, doctype, 100, fn)
   196  }
   197  
   198  // ForeachDocsWithCustomPagination traverse all the documents from the given
   199  // database, and calls a function for each document. The documents are fetched
   200  // from CouchDB with a pagination with a custom number of items per page.
   201  func ForeachDocsWithCustomPagination(db prefixer.Prefixer, doctype string, limit int, fn func(id string, doc json.RawMessage) error) error {
   202  	var startKey string
   203  	for {
   204  		skip := 0
   205  		if startKey != "" {
   206  			skip = 1
   207  		}
   208  		req := &AllDocsRequest{
   209  			StartKey: `"` + startKey + `"`,
   210  			Skip:     skip,
   211  			Limit:    limit,
   212  		}
   213  		v, err := query.Values(req)
   214  		if err != nil {
   215  			return err
   216  		}
   217  		v.Add("include_docs", "true")
   218  
   219  		var res AllDocsResponse
   220  		url := "_all_docs?" + v.Encode()
   221  		err = makeRequest(db, doctype, http.MethodGet, url, nil, &res)
   222  		if err != nil {
   223  			return err
   224  		}
   225  
   226  		startKey = ""
   227  		for _, row := range res.Rows {
   228  			if !strings.HasPrefix(row.ID, "_design") {
   229  				if err = fn(row.ID, row.Doc); err != nil {
   230  					return err
   231  				}
   232  			}
   233  			startKey = row.ID
   234  		}
   235  		if len(res.Rows) < limit {
   236  			break
   237  		}
   238  	}
   239  
   240  	return nil
   241  }
   242  
   243  // BulkGetDocs returns the documents with the given id at the given revision
   244  func BulkGetDocs(db prefixer.Prefixer, doctype string, payload []IDRev) ([]map[string]interface{}, error) {
   245  	path := "_bulk_get?revs=true"
   246  	body := struct {
   247  		Docs []IDRev `json:"docs"`
   248  	}{
   249  		Docs: payload,
   250  	}
   251  	var response BulkGetResponse
   252  	err := makeRequest(db, doctype, http.MethodPost, path, body, &response)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  	results := make([]map[string]interface{}, 0, len(response.Results))
   257  	for _, r := range response.Results {
   258  		for _, doc := range r.Docs {
   259  			if doc.OK != nil {
   260  				results = append(results, doc.OK)
   261  			}
   262  		}
   263  	}
   264  	return results, nil
   265  }
   266  
   267  // BulkUpdateDocs is used to update several docs in one call, as a bulk.
   268  // olddocs parameter is used for realtime / event triggers.
   269  func BulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error {
   270  	if len(docs) == 0 {
   271  		return nil
   272  	}
   273  
   274  	remaining := docs
   275  	olds := olddocs
   276  	for len(remaining) > 0 {
   277  		n := 1000
   278  		if len(remaining) < n {
   279  			n = len(remaining)
   280  		}
   281  		bulkDocs := remaining[:n]
   282  		remaining = remaining[n:]
   283  		bulkOlds := olds[:n]
   284  		olds = olds[n:]
   285  		if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil {
   286  			if IsNoDatabaseError(err) {
   287  				if err := EnsureDBExist(db, doctype); err != nil {
   288  					return err
   289  				}
   290  			}
   291  			// If it fails once, try again
   292  			time.Sleep(1 * time.Second)
   293  			if err := bulkUpdateDocs(db, doctype, bulkDocs, bulkOlds); err != nil {
   294  				return err
   295  			}
   296  		}
   297  	}
   298  	return nil
   299  }
   300  
   301  func bulkUpdateDocs(db prefixer.Prefixer, doctype string, docs, olddocs []interface{}) error {
   302  	body := struct {
   303  		Docs []interface{} `json:"docs"`
   304  	}{
   305  		Docs: docs,
   306  	}
   307  	var res []UpdateResponse
   308  	if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil {
   309  		return err
   310  	}
   311  	if len(res) != len(docs) {
   312  		return errors.New("BulkUpdateDoc receive an unexpected number of responses")
   313  	}
   314  	logBulk(db, "BulkUpdateDocs", doctype, res)
   315  	for i, doc := range docs {
   316  		if d, ok := doc.(Doc); ok {
   317  			update := res[i]
   318  			if update.Error != "" {
   319  				logger.WithDomain(db.DomainName()).WithNamespace("couchdb").
   320  					Warnf("bulkUpdateDocs error for %s %s: %s - %s", doctype, update.ID, update.Error, update.Reason)
   321  			}
   322  			if update.ID == "" || update.Rev == "" || !update.Ok {
   323  				continue
   324  			}
   325  			event := realtime.EventUpdate
   326  			if d.Rev() == "" {
   327  				event = realtime.EventCreate
   328  				d.SetID(update.ID)
   329  			}
   330  			d.SetRev(update.Rev)
   331  			if old, ok := olddocs[i].(Doc); ok {
   332  				RTEvent(db, realtime.EventUpdate, d, old)
   333  			} else {
   334  				RTEvent(db, event, d, nil)
   335  			}
   336  		}
   337  	}
   338  	return nil
   339  }
   340  
   341  // BulkDeleteDocs is used to delete serveral documents in one call.
   342  func BulkDeleteDocs(db prefixer.Prefixer, doctype string, docs []Doc) error {
   343  	if len(docs) == 0 {
   344  		return nil
   345  	}
   346  	body := struct {
   347  		Docs []json.RawMessage `json:"docs"`
   348  	}{
   349  		Docs: make([]json.RawMessage, 0, len(docs)),
   350  	}
   351  	for _, doc := range docs {
   352  		body.Docs = append(body.Docs, json.RawMessage(
   353  			fmt.Sprintf(`{"_id":"%s","_rev":"%s","_deleted":true}`, doc.ID(), doc.Rev()),
   354  		))
   355  	}
   356  	var res []UpdateResponse
   357  	if err := makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, &res); err != nil {
   358  		return err
   359  	}
   360  	for i, doc := range docs {
   361  		doc.SetRev(res[i].Rev)
   362  		RTEvent(db, realtime.EventDelete, doc, nil)
   363  	}
   364  	logBulk(db, "BulkDeleteDocs", doctype, docs)
   365  	return nil
   366  }
   367  
   368  // BulkForceUpdateDocs is used to update several docs in one call, and to force
   369  // the revisions history. It is used by replications.
   370  func BulkForceUpdateDocs(db prefixer.Prefixer, doctype string, docs []map[string]interface{}) error {
   371  	if len(docs) == 0 {
   372  		return nil
   373  	}
   374  	body := struct {
   375  		NewEdits bool                     `json:"new_edits"`
   376  		Docs     []map[string]interface{} `json:"docs"`
   377  	}{
   378  		NewEdits: false,
   379  		Docs:     docs,
   380  	}
   381  	logBulk(db, "BulkForceUpdateDocs", doctype, docs)
   382  	// XXX CouchDB returns just an empty array when new_edits is false, so we
   383  	// ignore the response
   384  	return makeRequest(db, doctype, http.MethodPost, "_bulk_docs", body, nil)
   385  }
   386  
   387  func logBulk(db prefixer.Prefixer, prefix, doctype string, docs interface{}) {
   388  	extracted := make([]string, 0, 1000)
   389  	if documents, ok := docs.([]Doc); ok {
   390  		for _, doc := range documents {
   391  			id := doc.ID()
   392  			rev := revision.Generation(doc.Rev())
   393  			extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev))
   394  		}
   395  	} else if updates, ok := docs.([]UpdateResponse); ok {
   396  		for _, update := range updates {
   397  			id := update.ID
   398  			rev := revision.Generation(update.Rev)
   399  			extracted = append(extracted, fmt.Sprintf("%s (%d)", id, rev))
   400  		}
   401  	} else if maps, ok := docs.([]map[string]interface{}); ok {
   402  		for _, doc := range maps {
   403  			id, _ := doc["_id"].(string)
   404  			extracted = append(extracted, id)
   405  		}
   406  	}
   407  
   408  	var messages []string
   409  	for len(extracted) > 0 {
   410  		nb := len(extracted)
   411  		// We limit the number of ids per log to avoid the line width limit.
   412  		if nb > 50 {
   413  			nb = 50
   414  		}
   415  		msg := strings.Join(extracted[:nb], " ")
   416  		messages = append(messages, msg)
   417  		extracted = extracted[nb:]
   418  	}
   419  
   420  	for _, msg := range messages {
   421  		logger.WithDomain(db.DomainName()).WithNamespace("couchdb").
   422  			Infof("%s for %s: %s", prefix, doctype, msg)
   423  	}
   424  }