github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/pkg/couchdb/stream/all_docs.go (about)

     1  // The stream package can be used for streaming CouchDB responses in JSON
     2  // format from the CouchDB cluster to a client, with the stack doing stuff like
     3  // filtering some fields. It is way faster that doing a full parsing of the
     4  // JSON response, doing stuff, and then reserialize to JSON for large payloads.
     5  package stream
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"strings"
    13  
    14  	"github.com/ohler55/ojg/oj"
    15  )
    16  
    17  type allDocsFilter struct {
    18  	// config
    19  	fields   [][]byte
    20  	skipDDoc bool
    21  
    22  	// state
    23  	w          io.Writer
    24  	row        oj.Builder // The current row without the filtered fields
    25  	rowIsDDoc  bool       // The current row is a design doc
    26  	inDoc      bool       // The current value is inside the "doc" part of a row
    27  	path       []byte     // The JSON object keys leading to the current position, joined with `.` (inside a doc)
    28  	depth      int        // The number of `{` and `[` minus the number of `}` and `]`
    29  	matchedAt  int        // The depth of an exact match on a field, or -1
    30  	rejectedAt int        // The depth where no fields can match (partial or exact), or -1
    31  	total      int        // The number of rows kept
    32  	err        error
    33  }
    34  
    35  // NewAllDocsFilter creates an object that can be used to remove some fields
    36  // from a response to the all_docs endpoint of CouchDB.
    37  func NewAllDocsFilter(fields []string) *allDocsFilter {
    38  	slices := make([][]byte, 0, len(fields))
    39  	for _, field := range fields {
    40  		slices = append(slices, []byte(field))
    41  	}
    42  	return &allDocsFilter{fields: slices}
    43  }
    44  
    45  // SkipDesignDocs must be called to configure the filter to also remove the
    46  // design docs.
    47  func (f *allDocsFilter) SkipDesignDocs() {
    48  	f.skipDDoc = true
    49  }
    50  
    51  // Stream will read the JSON response from CouchDB as the r reader, and will
    52  // write the filtered JSON to the w writer to be sent to the client.
    53  func (f *allDocsFilter) Stream(r io.Reader, w io.Writer) error {
    54  	f.w = w
    55  	f.path = make([]byte, 0, 128)
    56  	f.depth = 0
    57  	f.matchedAt = -1
    58  	f.rejectedAt = -1
    59  	f.total = 0
    60  	f.err = nil
    61  
    62  	if err := oj.TokenizeLoad(r, f); err != nil {
    63  		return err
    64  	}
    65  	return f.err
    66  }
    67  
    68  var (
    69  	keySlice   = []byte("key")
    70  	arraySlice = []byte("[]")
    71  	idSlice    = []byte("id")
    72  	docSlice   = []byte("doc")
    73  )
    74  
    75  func (f *allDocsFilter) isKeptField() bool {
    76  	// Decision has already been made at an higher level
    77  	if f.matchedAt >= 0 {
    78  		return true
    79  	}
    80  	if f.rejectedAt >= 0 {
    81  		return false
    82  	}
    83  
    84  	// Special cases
    85  	if len(f.fields) == 0 {
    86  		return true
    87  	}
    88  	if f.depth <= 3 || !f.inDoc {
    89  		// keys at global level: offset, rows, and total_rows
    90  		// keys at row level: id, key, value, and doc
    91  		// keys at row.value level: rev
    92  		// -> we can remove key (same as id) to gain a few kbs in the response
    93  		return !bytes.Equal(f.path, keySlice)
    94  	}
    95  
    96  	// Looks at fields to decide
    97  	for _, field := range f.fields {
    98  		if bytes.Equal(field, f.path) {
    99  			return true
   100  		}
   101  	}
   102  	return false
   103  }
   104  
   105  // currentKey returns the last object key we have seen.
   106  func (f *allDocsFilter) currentKey() []byte {
   107  	idx := bytes.LastIndexByte(f.path, '.')
   108  	if idx == -1 {
   109  		return f.path
   110  	}
   111  	return f.path[idx+1:]
   112  }
   113  
   114  // popKey removes the given key from the path after we have finished processing
   115  // its value.
   116  func (f *allDocsFilter) popKey(key []byte) {
   117  	pos := len(f.path) - len(key) - 1
   118  	if pos > 0 {
   119  		f.path = f.path[:pos]
   120  	} else {
   121  		f.path = f.path[:0]
   122  	}
   123  }
   124  
   125  // value is used for basic values in JSON: nulls, booleans, numbers and strings.
   126  func (f *allDocsFilter) value(value interface{}) {
   127  	var err error
   128  	key := f.currentKey()
   129  	if bytes.Equal(key, arraySlice) {
   130  		if f.rejectedAt < 0 {
   131  			err = f.row.Value(value)
   132  		}
   133  	} else {
   134  		if f.isKeptField() {
   135  			err = f.row.Value(value, string(key))
   136  		}
   137  		f.popKey(key)
   138  	}
   139  	if err != nil && f.err == nil {
   140  		f.err = err
   141  	}
   142  }
   143  
   144  func (f *allDocsFilter) Null() {
   145  	f.value(nil)
   146  }
   147  
   148  func (f *allDocsFilter) Bool(b bool) {
   149  	f.value(b)
   150  }
   151  
   152  func (f *allDocsFilter) Int(i int64) {
   153  	if f.depth > 2 { // total_rows and offset are not kept from the reader
   154  		f.value(i)
   155  	}
   156  }
   157  
   158  func (f *allDocsFilter) Float(x float64) {
   159  	f.value(x)
   160  }
   161  
   162  func (f *allDocsFilter) Number(n string) {
   163  	if f.err == nil {
   164  		f.err = fmt.Errorf("number %q is not supported", n)
   165  	}
   166  }
   167  
   168  func (f *allDocsFilter) String(s string) {
   169  	if f.skipDDoc && f.depth == 3 &&
   170  		bytes.Equal(f.path, idSlice) && strings.HasPrefix(s, "_design") {
   171  		// skip design docs
   172  		f.rowIsDDoc = true
   173  		f.path = f.path[:0]
   174  	} else {
   175  		f.value(s)
   176  	}
   177  }
   178  
   179  func (f *allDocsFilter) Key(s string) {
   180  	if len(f.path) != 0 {
   181  		f.path = append(f.path, '.')
   182  	}
   183  	f.path = append(f.path, s...)
   184  }
   185  
   186  func (f *allDocsFilter) ObjectStart() {
   187  	var err error
   188  	switch f.depth {
   189  	case 0: // global
   190  		// nothing
   191  	case 1: // rows array
   192  		err = errors.New("unexpected case")
   193  	case 2: // a row
   194  		f.rowIsDDoc = false
   195  		f.path = f.path[:0]
   196  		err = f.row.Object()
   197  	case 3: // doc or value
   198  		if bytes.Equal(f.path, docSlice) {
   199  			f.inDoc = true
   200  		}
   201  		if len(f.fields) == 0 || f.inDoc {
   202  			err = f.row.Object(string(f.path))
   203  		}
   204  		f.path = f.path[:0]
   205  	default: // inside doc
   206  		err = f.objectStartInDoc()
   207  	}
   208  	if err != nil && f.err == nil {
   209  		f.err = err
   210  	}
   211  	f.depth++
   212  }
   213  
   214  func (f *allDocsFilter) objectStartInDoc() error {
   215  	// We are inside an object that won't be copied to the response
   216  	if f.rejectedAt >= 0 {
   217  		return nil
   218  	}
   219  
   220  	// Objects inside an array are always kept
   221  	key := f.currentKey()
   222  	if bytes.Equal(key, arraySlice) {
   223  		return f.row.Object()
   224  	}
   225  
   226  	// We keep every attribute of an included field and we keep everything if
   227  	// fields is empty.
   228  	// e.g. we keep `cozyMetadata.uploadedBy` if fields include `cozyMetadata`,
   229  	if f.matchedAt >= 0 || len(f.fields) == 0 {
   230  		return f.row.Object(string(key))
   231  	}
   232  
   233  	// Exact match
   234  	for _, field := range f.fields {
   235  		if bytes.Equal(field, f.path) {
   236  			f.matchedAt = f.depth
   237  			return f.row.Object(string(key))
   238  		}
   239  	}
   240  
   241  	// We keep parent attributes of included fields.
   242  	// e.g. we keep `metadata` if fields include `metadata.datetime`.
   243  	withDot := make([]byte, len(f.path)+1)
   244  	copy(withDot, f.path)
   245  	withDot[len(f.path)] = '.'
   246  	for _, field := range f.fields {
   247  		if bytes.HasPrefix(field, withDot) {
   248  			return f.row.Object(string(key))
   249  		}
   250  	}
   251  
   252  	// We can remove this object from the response
   253  	f.rejectedAt = f.depth
   254  	return nil
   255  }
   256  
   257  func (f *allDocsFilter) ObjectEnd() {
   258  	f.depth--
   259  
   260  	switch f.depth {
   261  	case 0: // global
   262  		// nothing
   263  	case 1: // rows array
   264  		if f.err == nil {
   265  			f.err = errors.New("unexpected case")
   266  		}
   267  	case 2: // a row
   268  		if f.rowIsDDoc {
   269  			f.row.Reset()
   270  		} else {
   271  			f.flushRow()
   272  		}
   273  	case 3: // doc or value
   274  		if len(f.fields) == 0 || f.inDoc {
   275  			f.row.Pop()
   276  		}
   277  		f.path = f.path[:0]
   278  		f.inDoc = false
   279  	default: // inside doc
   280  		f.objectEndInDoc()
   281  	}
   282  }
   283  
   284  func (f *allDocsFilter) objectEndInDoc() {
   285  	if key := f.currentKey(); !bytes.Equal(key, arraySlice) {
   286  		f.popKey(key)
   287  	}
   288  
   289  	if f.rejectedAt >= 0 {
   290  		if f.rejectedAt == f.depth {
   291  			f.rejectedAt = -1
   292  		}
   293  		return
   294  	}
   295  	if f.matchedAt == f.depth {
   296  		f.matchedAt = -1
   297  	}
   298  
   299  	f.row.Pop()
   300  }
   301  
   302  func (f *allDocsFilter) flushRow() {
   303  	prefix := ""
   304  	if f.total != 0 {
   305  		prefix = ","
   306  	}
   307  	row := prefix + oj.JSON(f.row.Result()) + "\n"
   308  	f.row.Reset()
   309  	if _, err := f.w.Write([]byte(row)); err != nil && f.err != nil {
   310  		f.err = err
   311  	}
   312  	f.total++
   313  }
   314  
   315  func (f *allDocsFilter) ArrayStart() {
   316  	f.depth++
   317  
   318  	if f.depth <= 2 {
   319  		// Special case for the rows array
   320  		if _, err := f.w.Write([]byte(`{"rows":[`)); err != nil && f.err == nil {
   321  			f.err = err
   322  		}
   323  		return
   324  	}
   325  
   326  	key := f.currentKey()
   327  	f.path = append(f.path, '.', '[', ']')
   328  
   329  	if f.rejectedAt >= 0 {
   330  		return
   331  	}
   332  
   333  	var err error
   334  	if bytes.Equal(key, arraySlice) {
   335  		err = f.row.Array()
   336  	} else if f.isKeptField() {
   337  		err = f.row.Array(string(key))
   338  	} else {
   339  		f.rejectedAt = f.depth - 1
   340  	}
   341  	if err != nil && f.err == nil {
   342  		f.err = err
   343  	}
   344  }
   345  
   346  func (f *allDocsFilter) ArrayEnd() {
   347  	f.depth--
   348  
   349  	if f.depth <= 2 {
   350  		// Special case for the rows array
   351  		buf := fmt.Sprintf(`],"offset":0,"total_rows":%d}`, f.total)
   352  		if _, err := f.w.Write([]byte(buf)); err != nil && f.err == nil {
   353  			f.err = err
   354  		}
   355  		return
   356  	}
   357  
   358  	f.popKey(arraySlice)
   359  	if key := f.currentKey(); !bytes.Equal(key, arraySlice) {
   360  		f.popKey(key)
   361  	}
   362  
   363  	if f.rejectedAt >= 0 {
   364  		if f.rejectedAt == f.depth {
   365  			f.rejectedAt = -1
   366  		}
   367  		return
   368  	}
   369  
   370  	f.row.Pop()
   371  }