github.com/cozy/cozy-stack@v0.0.0-20240603063001-31110fa4cae1/pkg/couchdb/stream/all_docs.go (about) 1 // The stream package can be used for streaming CouchDB responses in JSON 2 // format from the CouchDB cluster to a client, with the stack doing stuff like 3 // filtering some fields. It is way faster that doing a full parsing of the 4 // JSON response, doing stuff, and then reserialize to JSON for large payloads. 5 package stream 6 7 import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "io" 12 "strings" 13 14 "github.com/ohler55/ojg/oj" 15 ) 16 17 type allDocsFilter struct { 18 // config 19 fields [][]byte 20 skipDDoc bool 21 22 // state 23 w io.Writer 24 row oj.Builder // The current row without the filtered fields 25 rowIsDDoc bool // The current row is a design doc 26 inDoc bool // The current value is inside the "doc" part of a row 27 path []byte // The JSON object keys leading to the current position, joined with `.` (inside a doc) 28 depth int // The number of `{` and `[` minus the number of `}` and `]` 29 matchedAt int // The depth of an exact match on a field, or -1 30 rejectedAt int // The depth where no fields can match (partial or exact), or -1 31 total int // The number of rows kept 32 err error 33 } 34 35 // NewAllDocsFilter creates an object that can be used to remove some fields 36 // from a response to the all_docs endpoint of CouchDB. 37 func NewAllDocsFilter(fields []string) *allDocsFilter { 38 slices := make([][]byte, 0, len(fields)) 39 for _, field := range fields { 40 slices = append(slices, []byte(field)) 41 } 42 return &allDocsFilter{fields: slices} 43 } 44 45 // SkipDesignDocs must be called to configure the filter to also remove the 46 // design docs. 47 func (f *allDocsFilter) SkipDesignDocs() { 48 f.skipDDoc = true 49 } 50 51 // Stream will read the JSON response from CouchDB as the r reader, and will 52 // write the filtered JSON to the w writer to be sent to the client. 53 func (f *allDocsFilter) Stream(r io.Reader, w io.Writer) error { 54 f.w = w 55 f.path = make([]byte, 0, 128) 56 f.depth = 0 57 f.matchedAt = -1 58 f.rejectedAt = -1 59 f.total = 0 60 f.err = nil 61 62 if err := oj.TokenizeLoad(r, f); err != nil { 63 return err 64 } 65 return f.err 66 } 67 68 var ( 69 keySlice = []byte("key") 70 arraySlice = []byte("[]") 71 idSlice = []byte("id") 72 docSlice = []byte("doc") 73 ) 74 75 func (f *allDocsFilter) isKeptField() bool { 76 // Decision has already been made at an higher level 77 if f.matchedAt >= 0 { 78 return true 79 } 80 if f.rejectedAt >= 0 { 81 return false 82 } 83 84 // Special cases 85 if len(f.fields) == 0 { 86 return true 87 } 88 if f.depth <= 3 || !f.inDoc { 89 // keys at global level: offset, rows, and total_rows 90 // keys at row level: id, key, value, and doc 91 // keys at row.value level: rev 92 // -> we can remove key (same as id) to gain a few kbs in the response 93 return !bytes.Equal(f.path, keySlice) 94 } 95 96 // Looks at fields to decide 97 for _, field := range f.fields { 98 if bytes.Equal(field, f.path) { 99 return true 100 } 101 } 102 return false 103 } 104 105 // currentKey returns the last object key we have seen. 106 func (f *allDocsFilter) currentKey() []byte { 107 idx := bytes.LastIndexByte(f.path, '.') 108 if idx == -1 { 109 return f.path 110 } 111 return f.path[idx+1:] 112 } 113 114 // popKey removes the given key from the path after we have finished processing 115 // its value. 116 func (f *allDocsFilter) popKey(key []byte) { 117 pos := len(f.path) - len(key) - 1 118 if pos > 0 { 119 f.path = f.path[:pos] 120 } else { 121 f.path = f.path[:0] 122 } 123 } 124 125 // value is used for basic values in JSON: nulls, booleans, numbers and strings. 126 func (f *allDocsFilter) value(value interface{}) { 127 var err error 128 key := f.currentKey() 129 if bytes.Equal(key, arraySlice) { 130 if f.rejectedAt < 0 { 131 err = f.row.Value(value) 132 } 133 } else { 134 if f.isKeptField() { 135 err = f.row.Value(value, string(key)) 136 } 137 f.popKey(key) 138 } 139 if err != nil && f.err == nil { 140 f.err = err 141 } 142 } 143 144 func (f *allDocsFilter) Null() { 145 f.value(nil) 146 } 147 148 func (f *allDocsFilter) Bool(b bool) { 149 f.value(b) 150 } 151 152 func (f *allDocsFilter) Int(i int64) { 153 if f.depth > 2 { // total_rows and offset are not kept from the reader 154 f.value(i) 155 } 156 } 157 158 func (f *allDocsFilter) Float(x float64) { 159 f.value(x) 160 } 161 162 func (f *allDocsFilter) Number(n string) { 163 if f.err == nil { 164 f.err = fmt.Errorf("number %q is not supported", n) 165 } 166 } 167 168 func (f *allDocsFilter) String(s string) { 169 if f.skipDDoc && f.depth == 3 && 170 bytes.Equal(f.path, idSlice) && strings.HasPrefix(s, "_design") { 171 // skip design docs 172 f.rowIsDDoc = true 173 f.path = f.path[:0] 174 } else { 175 f.value(s) 176 } 177 } 178 179 func (f *allDocsFilter) Key(s string) { 180 if len(f.path) != 0 { 181 f.path = append(f.path, '.') 182 } 183 f.path = append(f.path, s...) 184 } 185 186 func (f *allDocsFilter) ObjectStart() { 187 var err error 188 switch f.depth { 189 case 0: // global 190 // nothing 191 case 1: // rows array 192 err = errors.New("unexpected case") 193 case 2: // a row 194 f.rowIsDDoc = false 195 f.path = f.path[:0] 196 err = f.row.Object() 197 case 3: // doc or value 198 if bytes.Equal(f.path, docSlice) { 199 f.inDoc = true 200 } 201 if len(f.fields) == 0 || f.inDoc { 202 err = f.row.Object(string(f.path)) 203 } 204 f.path = f.path[:0] 205 default: // inside doc 206 err = f.objectStartInDoc() 207 } 208 if err != nil && f.err == nil { 209 f.err = err 210 } 211 f.depth++ 212 } 213 214 func (f *allDocsFilter) objectStartInDoc() error { 215 // We are inside an object that won't be copied to the response 216 if f.rejectedAt >= 0 { 217 return nil 218 } 219 220 // Objects inside an array are always kept 221 key := f.currentKey() 222 if bytes.Equal(key, arraySlice) { 223 return f.row.Object() 224 } 225 226 // We keep every attribute of an included field and we keep everything if 227 // fields is empty. 228 // e.g. we keep `cozyMetadata.uploadedBy` if fields include `cozyMetadata`, 229 if f.matchedAt >= 0 || len(f.fields) == 0 { 230 return f.row.Object(string(key)) 231 } 232 233 // Exact match 234 for _, field := range f.fields { 235 if bytes.Equal(field, f.path) { 236 f.matchedAt = f.depth 237 return f.row.Object(string(key)) 238 } 239 } 240 241 // We keep parent attributes of included fields. 242 // e.g. we keep `metadata` if fields include `metadata.datetime`. 243 withDot := make([]byte, len(f.path)+1) 244 copy(withDot, f.path) 245 withDot[len(f.path)] = '.' 246 for _, field := range f.fields { 247 if bytes.HasPrefix(field, withDot) { 248 return f.row.Object(string(key)) 249 } 250 } 251 252 // We can remove this object from the response 253 f.rejectedAt = f.depth 254 return nil 255 } 256 257 func (f *allDocsFilter) ObjectEnd() { 258 f.depth-- 259 260 switch f.depth { 261 case 0: // global 262 // nothing 263 case 1: // rows array 264 if f.err == nil { 265 f.err = errors.New("unexpected case") 266 } 267 case 2: // a row 268 if f.rowIsDDoc { 269 f.row.Reset() 270 } else { 271 f.flushRow() 272 } 273 case 3: // doc or value 274 if len(f.fields) == 0 || f.inDoc { 275 f.row.Pop() 276 } 277 f.path = f.path[:0] 278 f.inDoc = false 279 default: // inside doc 280 f.objectEndInDoc() 281 } 282 } 283 284 func (f *allDocsFilter) objectEndInDoc() { 285 if key := f.currentKey(); !bytes.Equal(key, arraySlice) { 286 f.popKey(key) 287 } 288 289 if f.rejectedAt >= 0 { 290 if f.rejectedAt == f.depth { 291 f.rejectedAt = -1 292 } 293 return 294 } 295 if f.matchedAt == f.depth { 296 f.matchedAt = -1 297 } 298 299 f.row.Pop() 300 } 301 302 func (f *allDocsFilter) flushRow() { 303 prefix := "" 304 if f.total != 0 { 305 prefix = "," 306 } 307 row := prefix + oj.JSON(f.row.Result()) + "\n" 308 f.row.Reset() 309 if _, err := f.w.Write([]byte(row)); err != nil && f.err != nil { 310 f.err = err 311 } 312 f.total++ 313 } 314 315 func (f *allDocsFilter) ArrayStart() { 316 f.depth++ 317 318 if f.depth <= 2 { 319 // Special case for the rows array 320 if _, err := f.w.Write([]byte(`{"rows":[`)); err != nil && f.err == nil { 321 f.err = err 322 } 323 return 324 } 325 326 key := f.currentKey() 327 f.path = append(f.path, '.', '[', ']') 328 329 if f.rejectedAt >= 0 { 330 return 331 } 332 333 var err error 334 if bytes.Equal(key, arraySlice) { 335 err = f.row.Array() 336 } else if f.isKeptField() { 337 err = f.row.Array(string(key)) 338 } else { 339 f.rejectedAt = f.depth - 1 340 } 341 if err != nil && f.err == nil { 342 f.err = err 343 } 344 } 345 346 func (f *allDocsFilter) ArrayEnd() { 347 f.depth-- 348 349 if f.depth <= 2 { 350 // Special case for the rows array 351 buf := fmt.Sprintf(`],"offset":0,"total_rows":%d}`, f.total) 352 if _, err := f.w.Write([]byte(buf)); err != nil && f.err == nil { 353 f.err = err 354 } 355 return 356 } 357 358 f.popKey(arraySlice) 359 if key := f.currentKey(); !bytes.Equal(key, arraySlice) { 360 f.popKey(key) 361 } 362 363 if f.rejectedAt >= 0 { 364 if f.rejectedAt == f.depth { 365 f.rejectedAt = -1 366 } 367 return 368 } 369 370 f.row.Pop() 371 }