github.com/thiagoyeds/go-cloud@v0.26.0/docstore/internal/fields/fields.go (about)

     1  // Copyright 2019 The Go Cloud Development Kit Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package fields provides a view of the fields of a struct that follows the Go
    16  // rules, amended to consider tags and case insensitivity.
    17  //
    18  // Usage
    19  //
    20  // First define a function that interprets tags:
    21  //
    22  //   func parseTag(st reflect.StructTag) (name string, keep bool, other interface{}, err error) { ... }
    23  //
    24  // The function's return values describe whether to ignore the field
    25  // completely or provide an alternate name, as well as other data from the
    26  // parse that is stored to avoid re-parsing.
    27  //
    28  // Then define a function to validate the type:
    29  //
    30  //   func validate(t reflect.Type) error { ... }
    31  //
    32  // Then, if necessary, define a function to specify leaf types - types
    33  // which should be considered one field and not be recursed into:
    34  //
    35  //   func isLeafType(t reflect.Type) bool { ... }
    36  //
    37  // eg:
    38  //
    39  //   func isLeafType(t reflect.Type) bool {
    40  //      return t == reflect.TypeOf(time.Time{})
    41  //   }
    42  //
    43  // Next, construct a Cache, passing your functions. As its name suggests, a
    44  // Cache remembers validation and field information for a type, so subsequent
    45  // calls with the same type are very fast.
    46  //
    47  //    cache := fields.NewCache(parseTag, validate, isLeafType)
    48  //
    49  // To get the fields of a struct type as determined by the above rules, call
    50  // the Fields method:
    51  //
    52  //    fields, err := cache.Fields(reflect.TypeOf(MyStruct{}))
    53  //
    54  // The return value can be treated as a slice of Fields.
    55  //
    56  // Given a string, such as a key or column name obtained during unmarshalling,
    57  // call Match on the list of fields to find a field whose name is the best
    58  // match:
    59  //
    60  //   field := fields.Match(name)
    61  //
    62  // Match looks for an exact match first, then falls back to a case-insensitive
    63  // comparison.
    64  package fields
    65  
    66  import (
    67  	"bytes"
    68  	"reflect"
    69  	"sort"
    70  	"strings"
    71  	"sync"
    72  )
    73  
    74  // A Field records information about a struct field.
    75  type Field struct {
    76  	Name        string       // effective field name
    77  	NameFromTag bool         // did Name come from a tag?
    78  	Type        reflect.Type // field type
    79  	Index       []int        // index sequence, for reflect.Value.FieldByIndex
    80  	ParsedTag   interface{}  // third return value of the parseTag function
    81  
    82  	nameBytes []byte
    83  	equalFold func(s, t []byte) bool
    84  }
    85  
    86  // ParseTagFunc is a function that accepts a struct tag and returns four values: an alternative name for the field
    87  // extracted from the tag, a boolean saying whether to keep the field or ignore  it, additional data that is stored
    88  // with the field information to avoid having to parse the tag again, and an error.
    89  type ParseTagFunc func(reflect.StructTag) (name string, keep bool, other interface{}, err error)
    90  
    91  // ValidateFunc is a function that accepts a reflect.Type and returns an error if the struct type is invalid in any
    92  // way.
    93  type ValidateFunc func(reflect.Type) error
    94  
    95  // LeafTypesFunc is a function that accepts a reflect.Type and returns true if the struct type a leaf, or false if not.
    96  // TODO(deklerk) is this description accurate?
    97  type LeafTypesFunc func(reflect.Type) bool
    98  
    99  // A Cache records information about the fields of struct types.
   100  //
   101  // A Cache is safe for use by multiple goroutines.
   102  type Cache struct {
   103  	parseTag  ParseTagFunc
   104  	validate  ValidateFunc
   105  	leafTypes LeafTypesFunc
   106  	cache     sync.Map // from reflect.Type to cacheValue
   107  }
   108  
   109  // NewCache constructs a Cache.
   110  //
   111  // Its first argument should be a function that accepts
   112  // a struct tag and returns four values: an alternative name for the field
   113  // extracted from the tag, a boolean saying whether to keep the field or ignore
   114  // it, additional data that is stored with the field information to avoid
   115  // having to parse the tag again, and an error.
   116  //
   117  // Its second argument should be a function that accepts a reflect.Type and
   118  // returns an error if the struct type is invalid in any way. For example, it
   119  // may check that all of the struct field tags are valid, or that all fields
   120  // are of an appropriate type.
   121  func NewCache(parseTag ParseTagFunc, validate ValidateFunc, leafTypes LeafTypesFunc) *Cache {
   122  	if parseTag == nil {
   123  		parseTag = func(reflect.StructTag) (string, bool, interface{}, error) {
   124  			return "", true, nil, nil
   125  		}
   126  	}
   127  	if validate == nil {
   128  		validate = func(reflect.Type) error {
   129  			return nil
   130  		}
   131  	}
   132  	if leafTypes == nil {
   133  		leafTypes = func(reflect.Type) bool {
   134  			return false
   135  		}
   136  	}
   137  
   138  	return &Cache{
   139  		parseTag:  parseTag,
   140  		validate:  validate,
   141  		leafTypes: leafTypes,
   142  	}
   143  }
   144  
   145  // A fieldScan represents an item on the fieldByNameFunc scan work list.
   146  type fieldScan struct {
   147  	typ   reflect.Type
   148  	index []int
   149  }
   150  
   151  // Fields returns all the exported fields of t, which must be a struct type. It
   152  // follows the standard Go rules for embedded fields, modified by the presence
   153  // of tags. The result is sorted lexicographically by index.
   154  //
   155  // These rules apply in the absence of tags:
   156  // Anonymous struct fields are treated as if their inner exported fields were
   157  // fields in the outer struct (embedding). The result includes all fields that
   158  // aren't shadowed by fields at higher level of embedding. If more than one
   159  // field with the same name exists at the same level of embedding, it is
   160  // excluded. An anonymous field that is not of struct type is treated as having
   161  // its type as its name.
   162  //
   163  // Tags modify these rules as follows:
   164  // A field's tag is used as its name.
   165  // An anonymous struct field with a name given in its tag is treated as
   166  // a field having that name, rather than an embedded struct (the struct's
   167  // fields will not be returned).
   168  // If more than one field with the same name exists at the same level of embedding,
   169  // but exactly one of them is tagged, then the tagged field is reported and the others
   170  // are ignored.
   171  func (c *Cache) Fields(t reflect.Type) (List, error) {
   172  	if t.Kind() != reflect.Struct {
   173  		panic("fields: Fields of non-struct type")
   174  	}
   175  	return c.cachedTypeFields(t)
   176  }
   177  
   178  // A List is a list of Fields.
   179  type List []Field
   180  
   181  // MatchExact returns the field in the list with the given name, or nil if there is
   182  // none.
   183  func (l List) MatchExact(name string) *Field {
   184  	return l.MatchExactBytes([]byte(name))
   185  }
   186  
   187  // MatchExactBytes is identical to MatchExact, except that the argument is a byte slice.
   188  func (l List) MatchExactBytes(name []byte) *Field {
   189  	for _, f := range l {
   190  		if bytes.Equal(f.nameBytes, name) {
   191  			return &f
   192  		}
   193  	}
   194  	return nil
   195  }
   196  
   197  // MatchFold returns the field in the list whose name best matches the supplied
   198  // name, nor nil if no field does. If there is a field with the exact name, it
   199  // is returned. Otherwise the first field (sorted by index) whose name matches
   200  // case-insensitively is returned.
   201  func (l List) MatchFold(name string) *Field {
   202  	return l.MatchFoldBytes([]byte(name))
   203  }
   204  
   205  // MatchFoldBytes is identical to MatchFold, except that the argument is a byte slice.
   206  func (l List) MatchFoldBytes(name []byte) *Field {
   207  	var f *Field
   208  	for i := range l {
   209  		ff := &l[i]
   210  		if bytes.Equal(ff.nameBytes, name) {
   211  			return ff
   212  		}
   213  		if f == nil && ff.equalFold(ff.nameBytes, name) {
   214  			f = ff
   215  		}
   216  	}
   217  	return f
   218  }
   219  
   220  type cacheValue struct {
   221  	fields List
   222  	err    error
   223  }
   224  
   225  // cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
   226  // This code has been copied and modified from
   227  // https://go.googlesource.com/go/+/go1.7.3/src/encoding/json/encode.go.
   228  func (c *Cache) cachedTypeFields(t reflect.Type) (List, error) {
   229  	var cv cacheValue
   230  	x, ok := c.cache.Load(t)
   231  	if ok {
   232  		cv = x.(cacheValue)
   233  	} else {
   234  		if err := c.validate(t); err != nil {
   235  			cv = cacheValue{nil, err}
   236  		} else {
   237  			f, err := c.typeFields(t)
   238  			cv = cacheValue{List(f), err}
   239  		}
   240  		c.cache.Store(t, cv)
   241  	}
   242  	return cv.fields, cv.err
   243  }
   244  
   245  func (c *Cache) typeFields(t reflect.Type) ([]Field, error) {
   246  	fields, err := c.listFields(t)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	sort.Sort(byName(fields))
   251  	// Delete all fields that are hidden by the Go rules for embedded fields.
   252  
   253  	// The fields are sorted in primary order of name, secondary order of field
   254  	// index length. So the first field with a given name is the dominant one.
   255  	var out []Field
   256  	for advance, i := 0, 0; i < len(fields); i += advance {
   257  		// One iteration per name.
   258  		// Find the sequence of fields with the name of this first field.
   259  		fi := fields[i]
   260  		name := fi.Name
   261  		for advance = 1; i+advance < len(fields); advance++ {
   262  			fj := fields[i+advance]
   263  			if fj.Name != name {
   264  				break
   265  			}
   266  		}
   267  		// Find the dominant field, if any, out of all fields that have the same name.
   268  		dominant, ok := dominantField(fields[i : i+advance])
   269  		if ok {
   270  			out = append(out, dominant)
   271  		}
   272  	}
   273  	sort.Sort(byIndex(out))
   274  	return out, nil
   275  }
   276  
   277  func (c *Cache) listFields(t reflect.Type) ([]Field, error) {
   278  	// This uses the same condition that the Go language does: there must be a unique instance
   279  	// of the match at a given depth level. If there are multiple instances of a match at the
   280  	// same depth, they annihilate each other and inhibit any possible match at a lower level.
   281  	// The algorithm is breadth first search, one depth level at a time.
   282  
   283  	// The current and next slices are work queues:
   284  	// current lists the fields to visit on this depth level,
   285  	// and next lists the fields on the next lower level.
   286  	current := []fieldScan{}
   287  	next := []fieldScan{{typ: t}}
   288  
   289  	// nextCount records the number of times an embedded type has been
   290  	// encountered and considered for queueing in the 'next' slice.
   291  	// We only queue the first one, but we increment the count on each.
   292  	// If a struct type T can be reached more than once at a given depth level,
   293  	// then it annihilates itself and need not be considered at all when we
   294  	// process that next depth level.
   295  	var nextCount map[reflect.Type]int
   296  
   297  	// visited records the structs that have been considered already.
   298  	// Embedded pointer fields can create cycles in the graph of
   299  	// reachable embedded types; visited avoids following those cycles.
   300  	// It also avoids duplicated effort: if we didn't find the field in an
   301  	// embedded type T at level 2, we won't find it in one at level 4 either.
   302  	visited := map[reflect.Type]bool{}
   303  
   304  	var fields []Field // Fields found.
   305  
   306  	for len(next) > 0 {
   307  		current, next = next, current[:0]
   308  		count := nextCount
   309  		nextCount = nil
   310  
   311  		// Process all the fields at this depth, now listed in 'current'.
   312  		// The loop queues embedded fields found in 'next', for processing during the next
   313  		// iteration. The multiplicity of the 'current' field counts is recorded
   314  		// in 'count'; the multiplicity of the 'next' field counts is recorded in 'nextCount'.
   315  		for _, scan := range current {
   316  			t := scan.typ
   317  			if visited[t] {
   318  				// We've looked through this type before, at a higher level.
   319  				// That higher level would shadow the lower level we're now at,
   320  				// so this one can't be useful to us. Ignore it.
   321  				continue
   322  			}
   323  			visited[t] = true
   324  			for i := 0; i < t.NumField(); i++ {
   325  				f := t.Field(i)
   326  
   327  				exported := (f.PkgPath == "")
   328  
   329  				// If a named field is unexported, ignore it. An anonymous
   330  				// unexported field is processed, because it may contain
   331  				// exported fields, which are visible.
   332  				if !exported && !f.Anonymous {
   333  					continue
   334  				}
   335  
   336  				// Examine the tag.
   337  				tagName, keep, other, err := c.parseTag(f.Tag)
   338  				if err != nil {
   339  					return nil, err
   340  				}
   341  				if !keep {
   342  					continue
   343  				}
   344  				if c.leafTypes(f.Type) {
   345  					fields = append(fields, newField(f, tagName, other, scan.index, i))
   346  					continue
   347  				}
   348  
   349  				var ntyp reflect.Type
   350  				if f.Anonymous {
   351  					// Anonymous field of type T or *T.
   352  					ntyp = f.Type
   353  					if ntyp.Kind() == reflect.Ptr {
   354  						ntyp = ntyp.Elem()
   355  					}
   356  				}
   357  
   358  				// Record fields with a tag name, non-anonymous fields, or
   359  				// anonymous non-struct fields.
   360  				if tagName != "" || ntyp == nil || ntyp.Kind() != reflect.Struct {
   361  					if !exported {
   362  						continue
   363  					}
   364  					fields = append(fields, newField(f, tagName, other, scan.index, i))
   365  					if count[t] > 1 {
   366  						// If there were multiple instances, add a second,
   367  						// so that the annihilation code will see a duplicate.
   368  						fields = append(fields, fields[len(fields)-1])
   369  					}
   370  					continue
   371  				}
   372  
   373  				// Queue embedded struct fields for processing with next level,
   374  				// but only if the embedded types haven't already been queued.
   375  				if nextCount[ntyp] > 0 {
   376  					nextCount[ntyp] = 2 // exact multiple doesn't matter
   377  					continue
   378  				}
   379  				if nextCount == nil {
   380  					nextCount = map[reflect.Type]int{}
   381  				}
   382  				nextCount[ntyp] = 1
   383  				if count[t] > 1 {
   384  					nextCount[ntyp] = 2 // exact multiple doesn't matter
   385  				}
   386  				var index []int
   387  				index = append(index, scan.index...)
   388  				index = append(index, i)
   389  				next = append(next, fieldScan{ntyp, index})
   390  			}
   391  		}
   392  	}
   393  	return fields, nil
   394  }
   395  
   396  func newField(f reflect.StructField, tagName string, other interface{}, index []int, i int) Field {
   397  	name := tagName
   398  	if name == "" {
   399  		name = f.Name
   400  	}
   401  	sf := Field{
   402  		Name:        name,
   403  		NameFromTag: tagName != "",
   404  		Type:        f.Type,
   405  		ParsedTag:   other,
   406  		nameBytes:   []byte(name),
   407  	}
   408  	sf.equalFold = foldFunc(sf.nameBytes)
   409  	sf.Index = append(sf.Index, index...)
   410  	sf.Index = append(sf.Index, i)
   411  	return sf
   412  }
   413  
   414  // byName sorts fields using the following criteria, in order:
   415  // 1. name
   416  // 2. embedding depth
   417  // 3. tag presence (preferring a tagged field)
   418  // 4. index sequence.
   419  type byName []Field
   420  
   421  func (x byName) Len() int { return len(x) }
   422  
   423  func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
   424  
   425  func (x byName) Less(i, j int) bool {
   426  	if x[i].Name != x[j].Name {
   427  		return x[i].Name < x[j].Name
   428  	}
   429  	if len(x[i].Index) != len(x[j].Index) {
   430  		return len(x[i].Index) < len(x[j].Index)
   431  	}
   432  	if x[i].NameFromTag != x[j].NameFromTag {
   433  		return x[i].NameFromTag
   434  	}
   435  	return byIndex(x).Less(i, j)
   436  }
   437  
   438  // byIndex sorts field by index sequence.
   439  type byIndex []Field
   440  
   441  func (x byIndex) Len() int { return len(x) }
   442  
   443  func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
   444  
   445  func (x byIndex) Less(i, j int) bool {
   446  	xi := x[i].Index
   447  	xj := x[j].Index
   448  	ln := len(xi)
   449  	if l := len(xj); l < ln {
   450  		ln = l
   451  	}
   452  	for k := 0; k < ln; k++ {
   453  		if xi[k] != xj[k] {
   454  			return xi[k] < xj[k]
   455  		}
   456  	}
   457  	return len(xi) < len(xj)
   458  }
   459  
   460  // dominantField looks through the fields, all of which are known to have the
   461  // same name, to find the single field that dominates the others using Go's
   462  // embedding rules, modified by the presence of tags. If there are multiple
   463  // top-level fields, the boolean will be false: This condition is an error in
   464  // Go and we skip all the fields.
   465  func dominantField(fs []Field) (Field, bool) {
   466  	// The fields are sorted in increasing index-length order, then by presence of tag.
   467  	// That means that the first field is the dominant one. We need only check
   468  	// for error cases: two fields at top level, either both tagged or neither tagged.
   469  	if len(fs) > 1 && len(fs[0].Index) == len(fs[1].Index) && fs[0].NameFromTag == fs[1].NameFromTag {
   470  		return Field{}, false
   471  	}
   472  	return fs[0], true
   473  }
   474  
   475  // ParseStandardTag extracts the sub-tag named by key, then parses it using the
   476  // de facto standard format introduced in encoding/json:
   477  //   "-" means "ignore this tag", unless it has options (that is, is followed by a comma),
   478  //       in which case it is treated a name.
   479  //   "<name>" provides an alternative name for the field
   480  //   "<name>,opt1,opt2,..." specifies options after the name.
   481  // The options are returned as a []string.
   482  func ParseStandardTag(key string, t reflect.StructTag) (name string, keep bool, options []string) {
   483  	s := t.Get(key)
   484  	parts := strings.Split(s, ",")
   485  	if parts[0] == "-" && len(parts) == 1 {
   486  		return "", false, nil
   487  	}
   488  	if len(parts) > 1 {
   489  		options = parts[1:]
   490  	}
   491  	return parts[0], true, options
   492  }