github.com/thiagoyeds/go-cloud@v0.26.0/docstore/internal/fields/fields.go (about) 1 // Copyright 2019 The Go Cloud Development Kit Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package fields provides a view of the fields of a struct that follows the Go 16 // rules, amended to consider tags and case insensitivity. 17 // 18 // Usage 19 // 20 // First define a function that interprets tags: 21 // 22 // func parseTag(st reflect.StructTag) (name string, keep bool, other interface{}, err error) { ... } 23 // 24 // The function's return values describe whether to ignore the field 25 // completely or provide an alternate name, as well as other data from the 26 // parse that is stored to avoid re-parsing. 27 // 28 // Then define a function to validate the type: 29 // 30 // func validate(t reflect.Type) error { ... } 31 // 32 // Then, if necessary, define a function to specify leaf types - types 33 // which should be considered one field and not be recursed into: 34 // 35 // func isLeafType(t reflect.Type) bool { ... } 36 // 37 // eg: 38 // 39 // func isLeafType(t reflect.Type) bool { 40 // return t == reflect.TypeOf(time.Time{}) 41 // } 42 // 43 // Next, construct a Cache, passing your functions. As its name suggests, a 44 // Cache remembers validation and field information for a type, so subsequent 45 // calls with the same type are very fast. 46 // 47 // cache := fields.NewCache(parseTag, validate, isLeafType) 48 // 49 // To get the fields of a struct type as determined by the above rules, call 50 // the Fields method: 51 // 52 // fields, err := cache.Fields(reflect.TypeOf(MyStruct{})) 53 // 54 // The return value can be treated as a slice of Fields. 55 // 56 // Given a string, such as a key or column name obtained during unmarshalling, 57 // call Match on the list of fields to find a field whose name is the best 58 // match: 59 // 60 // field := fields.Match(name) 61 // 62 // Match looks for an exact match first, then falls back to a case-insensitive 63 // comparison. 64 package fields 65 66 import ( 67 "bytes" 68 "reflect" 69 "sort" 70 "strings" 71 "sync" 72 ) 73 74 // A Field records information about a struct field. 75 type Field struct { 76 Name string // effective field name 77 NameFromTag bool // did Name come from a tag? 78 Type reflect.Type // field type 79 Index []int // index sequence, for reflect.Value.FieldByIndex 80 ParsedTag interface{} // third return value of the parseTag function 81 82 nameBytes []byte 83 equalFold func(s, t []byte) bool 84 } 85 86 // ParseTagFunc is a function that accepts a struct tag and returns four values: an alternative name for the field 87 // extracted from the tag, a boolean saying whether to keep the field or ignore it, additional data that is stored 88 // with the field information to avoid having to parse the tag again, and an error. 89 type ParseTagFunc func(reflect.StructTag) (name string, keep bool, other interface{}, err error) 90 91 // ValidateFunc is a function that accepts a reflect.Type and returns an error if the struct type is invalid in any 92 // way. 93 type ValidateFunc func(reflect.Type) error 94 95 // LeafTypesFunc is a function that accepts a reflect.Type and returns true if the struct type a leaf, or false if not. 96 // TODO(deklerk) is this description accurate? 97 type LeafTypesFunc func(reflect.Type) bool 98 99 // A Cache records information about the fields of struct types. 100 // 101 // A Cache is safe for use by multiple goroutines. 102 type Cache struct { 103 parseTag ParseTagFunc 104 validate ValidateFunc 105 leafTypes LeafTypesFunc 106 cache sync.Map // from reflect.Type to cacheValue 107 } 108 109 // NewCache constructs a Cache. 110 // 111 // Its first argument should be a function that accepts 112 // a struct tag and returns four values: an alternative name for the field 113 // extracted from the tag, a boolean saying whether to keep the field or ignore 114 // it, additional data that is stored with the field information to avoid 115 // having to parse the tag again, and an error. 116 // 117 // Its second argument should be a function that accepts a reflect.Type and 118 // returns an error if the struct type is invalid in any way. For example, it 119 // may check that all of the struct field tags are valid, or that all fields 120 // are of an appropriate type. 121 func NewCache(parseTag ParseTagFunc, validate ValidateFunc, leafTypes LeafTypesFunc) *Cache { 122 if parseTag == nil { 123 parseTag = func(reflect.StructTag) (string, bool, interface{}, error) { 124 return "", true, nil, nil 125 } 126 } 127 if validate == nil { 128 validate = func(reflect.Type) error { 129 return nil 130 } 131 } 132 if leafTypes == nil { 133 leafTypes = func(reflect.Type) bool { 134 return false 135 } 136 } 137 138 return &Cache{ 139 parseTag: parseTag, 140 validate: validate, 141 leafTypes: leafTypes, 142 } 143 } 144 145 // A fieldScan represents an item on the fieldByNameFunc scan work list. 146 type fieldScan struct { 147 typ reflect.Type 148 index []int 149 } 150 151 // Fields returns all the exported fields of t, which must be a struct type. It 152 // follows the standard Go rules for embedded fields, modified by the presence 153 // of tags. The result is sorted lexicographically by index. 154 // 155 // These rules apply in the absence of tags: 156 // Anonymous struct fields are treated as if their inner exported fields were 157 // fields in the outer struct (embedding). The result includes all fields that 158 // aren't shadowed by fields at higher level of embedding. If more than one 159 // field with the same name exists at the same level of embedding, it is 160 // excluded. An anonymous field that is not of struct type is treated as having 161 // its type as its name. 162 // 163 // Tags modify these rules as follows: 164 // A field's tag is used as its name. 165 // An anonymous struct field with a name given in its tag is treated as 166 // a field having that name, rather than an embedded struct (the struct's 167 // fields will not be returned). 168 // If more than one field with the same name exists at the same level of embedding, 169 // but exactly one of them is tagged, then the tagged field is reported and the others 170 // are ignored. 171 func (c *Cache) Fields(t reflect.Type) (List, error) { 172 if t.Kind() != reflect.Struct { 173 panic("fields: Fields of non-struct type") 174 } 175 return c.cachedTypeFields(t) 176 } 177 178 // A List is a list of Fields. 179 type List []Field 180 181 // MatchExact returns the field in the list with the given name, or nil if there is 182 // none. 183 func (l List) MatchExact(name string) *Field { 184 return l.MatchExactBytes([]byte(name)) 185 } 186 187 // MatchExactBytes is identical to MatchExact, except that the argument is a byte slice. 188 func (l List) MatchExactBytes(name []byte) *Field { 189 for _, f := range l { 190 if bytes.Equal(f.nameBytes, name) { 191 return &f 192 } 193 } 194 return nil 195 } 196 197 // MatchFold returns the field in the list whose name best matches the supplied 198 // name, nor nil if no field does. If there is a field with the exact name, it 199 // is returned. Otherwise the first field (sorted by index) whose name matches 200 // case-insensitively is returned. 201 func (l List) MatchFold(name string) *Field { 202 return l.MatchFoldBytes([]byte(name)) 203 } 204 205 // MatchFoldBytes is identical to MatchFold, except that the argument is a byte slice. 206 func (l List) MatchFoldBytes(name []byte) *Field { 207 var f *Field 208 for i := range l { 209 ff := &l[i] 210 if bytes.Equal(ff.nameBytes, name) { 211 return ff 212 } 213 if f == nil && ff.equalFold(ff.nameBytes, name) { 214 f = ff 215 } 216 } 217 return f 218 } 219 220 type cacheValue struct { 221 fields List 222 err error 223 } 224 225 // cachedTypeFields is like typeFields but uses a cache to avoid repeated work. 226 // This code has been copied and modified from 227 // https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/encode.go. 228 func (c *Cache) cachedTypeFields(t reflect.Type) (List, error) { 229 var cv cacheValue 230 x, ok := c.cache.Load(t) 231 if ok { 232 cv = x.(cacheValue) 233 } else { 234 if err := c.validate(t); err != nil { 235 cv = cacheValue{nil, err} 236 } else { 237 f, err := c.typeFields(t) 238 cv = cacheValue{List(f), err} 239 } 240 c.cache.Store(t, cv) 241 } 242 return cv.fields, cv.err 243 } 244 245 func (c *Cache) typeFields(t reflect.Type) ([]Field, error) { 246 fields, err := c.listFields(t) 247 if err != nil { 248 return nil, err 249 } 250 sort.Sort(byName(fields)) 251 // Delete all fields that are hidden by the Go rules for embedded fields. 252 253 // The fields are sorted in primary order of name, secondary order of field 254 // index length. So the first field with a given name is the dominant one. 255 var out []Field 256 for advance, i := 0, 0; i < len(fields); i += advance { 257 // One iteration per name. 258 // Find the sequence of fields with the name of this first field. 259 fi := fields[i] 260 name := fi.Name 261 for advance = 1; i+advance < len(fields); advance++ { 262 fj := fields[i+advance] 263 if fj.Name != name { 264 break 265 } 266 } 267 // Find the dominant field, if any, out of all fields that have the same name. 268 dominant, ok := dominantField(fields[i : i+advance]) 269 if ok { 270 out = append(out, dominant) 271 } 272 } 273 sort.Sort(byIndex(out)) 274 return out, nil 275 } 276 277 func (c *Cache) listFields(t reflect.Type) ([]Field, error) { 278 // This uses the same condition that the Go language does: there must be a unique instance 279 // of the match at a given depth level. If there are multiple instances of a match at the 280 // same depth, they annihilate each other and inhibit any possible match at a lower level. 281 // The algorithm is breadth first search, one depth level at a time. 282 283 // The current and next slices are work queues: 284 // current lists the fields to visit on this depth level, 285 // and next lists the fields on the next lower level. 286 current := []fieldScan{} 287 next := []fieldScan{{typ: t}} 288 289 // nextCount records the number of times an embedded type has been 290 // encountered and considered for queueing in the 'next' slice. 291 // We only queue the first one, but we increment the count on each. 292 // If a struct type T can be reached more than once at a given depth level, 293 // then it annihilates itself and need not be considered at all when we 294 // process that next depth level. 295 var nextCount map[reflect.Type]int 296 297 // visited records the structs that have been considered already. 298 // Embedded pointer fields can create cycles in the graph of 299 // reachable embedded types; visited avoids following those cycles. 300 // It also avoids duplicated effort: if we didn't find the field in an 301 // embedded type T at level 2, we won't find it in one at level 4 either. 302 visited := map[reflect.Type]bool{} 303 304 var fields []Field // Fields found. 305 306 for len(next) > 0 { 307 current, next = next, current[:0] 308 count := nextCount 309 nextCount = nil 310 311 // Process all the fields at this depth, now listed in 'current'. 312 // The loop queues embedded fields found in 'next', for processing during the next 313 // iteration. The multiplicity of the 'current' field counts is recorded 314 // in 'count'; the multiplicity of the 'next' field counts is recorded in 'nextCount'. 315 for _, scan := range current { 316 t := scan.typ 317 if visited[t] { 318 // We've looked through this type before, at a higher level. 319 // That higher level would shadow the lower level we're now at, 320 // so this one can't be useful to us. Ignore it. 321 continue 322 } 323 visited[t] = true 324 for i := 0; i < t.NumField(); i++ { 325 f := t.Field(i) 326 327 exported := (f.PkgPath == "") 328 329 // If a named field is unexported, ignore it. An anonymous 330 // unexported field is processed, because it may contain 331 // exported fields, which are visible. 332 if !exported && !f.Anonymous { 333 continue 334 } 335 336 // Examine the tag. 337 tagName, keep, other, err := c.parseTag(f.Tag) 338 if err != nil { 339 return nil, err 340 } 341 if !keep { 342 continue 343 } 344 if c.leafTypes(f.Type) { 345 fields = append(fields, newField(f, tagName, other, scan.index, i)) 346 continue 347 } 348 349 var ntyp reflect.Type 350 if f.Anonymous { 351 // Anonymous field of type T or *T. 352 ntyp = f.Type 353 if ntyp.Kind() == reflect.Ptr { 354 ntyp = ntyp.Elem() 355 } 356 } 357 358 // Record fields with a tag name, non-anonymous fields, or 359 // anonymous non-struct fields. 360 if tagName != "" || ntyp == nil || ntyp.Kind() != reflect.Struct { 361 if !exported { 362 continue 363 } 364 fields = append(fields, newField(f, tagName, other, scan.index, i)) 365 if count[t] > 1 { 366 // If there were multiple instances, add a second, 367 // so that the annihilation code will see a duplicate. 368 fields = append(fields, fields[len(fields)-1]) 369 } 370 continue 371 } 372 373 // Queue embedded struct fields for processing with next level, 374 // but only if the embedded types haven't already been queued. 375 if nextCount[ntyp] > 0 { 376 nextCount[ntyp] = 2 // exact multiple doesn't matter 377 continue 378 } 379 if nextCount == nil { 380 nextCount = map[reflect.Type]int{} 381 } 382 nextCount[ntyp] = 1 383 if count[t] > 1 { 384 nextCount[ntyp] = 2 // exact multiple doesn't matter 385 } 386 var index []int 387 index = append(index, scan.index...) 388 index = append(index, i) 389 next = append(next, fieldScan{ntyp, index}) 390 } 391 } 392 } 393 return fields, nil 394 } 395 396 func newField(f reflect.StructField, tagName string, other interface{}, index []int, i int) Field { 397 name := tagName 398 if name == "" { 399 name = f.Name 400 } 401 sf := Field{ 402 Name: name, 403 NameFromTag: tagName != "", 404 Type: f.Type, 405 ParsedTag: other, 406 nameBytes: []byte(name), 407 } 408 sf.equalFold = foldFunc(sf.nameBytes) 409 sf.Index = append(sf.Index, index...) 410 sf.Index = append(sf.Index, i) 411 return sf 412 } 413 414 // byName sorts fields using the following criteria, in order: 415 // 1. name 416 // 2. embedding depth 417 // 3. tag presence (preferring a tagged field) 418 // 4. index sequence. 419 type byName []Field 420 421 func (x byName) Len() int { return len(x) } 422 423 func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 424 425 func (x byName) Less(i, j int) bool { 426 if x[i].Name != x[j].Name { 427 return x[i].Name < x[j].Name 428 } 429 if len(x[i].Index) != len(x[j].Index) { 430 return len(x[i].Index) < len(x[j].Index) 431 } 432 if x[i].NameFromTag != x[j].NameFromTag { 433 return x[i].NameFromTag 434 } 435 return byIndex(x).Less(i, j) 436 } 437 438 // byIndex sorts field by index sequence. 439 type byIndex []Field 440 441 func (x byIndex) Len() int { return len(x) } 442 443 func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 444 445 func (x byIndex) Less(i, j int) bool { 446 xi := x[i].Index 447 xj := x[j].Index 448 ln := len(xi) 449 if l := len(xj); l < ln { 450 ln = l 451 } 452 for k := 0; k < ln; k++ { 453 if xi[k] != xj[k] { 454 return xi[k] < xj[k] 455 } 456 } 457 return len(xi) < len(xj) 458 } 459 460 // dominantField looks through the fields, all of which are known to have the 461 // same name, to find the single field that dominates the others using Go's 462 // embedding rules, modified by the presence of tags. If there are multiple 463 // top-level fields, the boolean will be false: This condition is an error in 464 // Go and we skip all the fields. 465 func dominantField(fs []Field) (Field, bool) { 466 // The fields are sorted in increasing index-length order, then by presence of tag. 467 // That means that the first field is the dominant one. We need only check 468 // for error cases: two fields at top level, either both tagged or neither tagged. 469 if len(fs) > 1 && len(fs[0].Index) == len(fs[1].Index) && fs[0].NameFromTag == fs[1].NameFromTag { 470 return Field{}, false 471 } 472 return fs[0], true 473 } 474 475 // ParseStandardTag extracts the sub-tag named by key, then parses it using the 476 // de facto standard format introduced in encoding/json: 477 // "-" means "ignore this tag", unless it has options (that is, is followed by a comma), 478 // in which case it is treated a name. 479 // "<name>" provides an alternative name for the field 480 // "<name>,opt1,opt2,..." specifies options after the name. 481 // The options are returned as a []string. 482 func ParseStandardTag(key string, t reflect.StructTag) (name string, keep bool, options []string) { 483 s := t.Get(key) 484 parts := strings.Split(s, ",") 485 if parts[0] == "-" && len(parts) == 1 { 486 return "", false, nil 487 } 488 if len(parts) > 1 { 489 options = parts[1:] 490 } 491 return parts[0], true, options 492 }