go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/proto/structmask/structmask.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package structmask implements a functionality similar to
    16  // google.protobuf.FieldMask, but which applies only to google.protobuf.Struct.
    17  //
    18  // A google.protobuf.FieldMask can refer only to valid protobuf fields and
    19  // "google.golang.org/protobuf" asserts that when serializing the field mask.
    20  // It makes this mechanism unusable for targeting "unusual" struct fields
    21  // (for example ones containing '.'). Additionally, google.protobuf.FieldMask
    22  // doesn't support wildcard matches (with '*', since it is not a valid proto
    23  // field name).
    24  package structmask
    25  
    26  import (
    27  	"fmt"
    28  
    29  	"google.golang.org/protobuf/types/known/structpb"
    30  )
    31  
    32  // Filter knows how to use StructMask to filter google.protobuf.Struct.
    33  //
    34  // Construct it using NewFilter.
    35  type Filter struct {
    36  	root *node
    37  }
    38  
    39  // NewFilter returns a filter that filters structs according to the struct mask.
    40  //
    41  // Returns an error if the struct mask is malformed. If `mask` is empty, returns
    42  // a filter that doesn't actually filter anything.
    43  func NewFilter(mask []*StructMask) (*Filter, error) {
    44  	root, err := parseMask(mask)
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  	return &Filter{root}, nil
    49  }
    50  
    51  // Apply returns a shallow copy of the struct, selecting only elements matching
    52  // the mask.
    53  //
    54  // The result may reuse fields of the original struct (i.e. it copies pointers,
    55  // whenever possible, not actual objects). In extreme case of mask `*` it will
    56  // return `s` as is.
    57  //
    58  // If you need to modify the result, consider explicitly making a deep copy with
    59  // proto.Clone first.
    60  //
    61  // If given `nil`, returns `nil` as well.
    62  func (f *Filter) Apply(s *structpb.Struct) *structpb.Struct {
    63  	if s == nil || f.root == nil || f.root == leafNode {
    64  		return s
    65  	}
    66  	filtered := f.root.filterStruct(s)
    67  	if filtered == nil {
    68  		return &structpb.Struct{}
    69  	}
    70  	// During merging we use `nil` as a stand in for "empty set after filtering",
    71  	// in lists to make it distinct from NullValue representing the real `null`.
    72  	// `nil` is not allowed in *structpb.Struct. Convert them all to real Nulls.
    73  	fillNulls(filtered)
    74  	return filtered.GetStructValue()
    75  }
    76  
    77  ////////////////////////////////////////////////////////////////////////////////
    78  
    79  // leafNode is a sentinel node meaning "grab the rest of the value unfiltered".
    80  var leafNode = &node{}
    81  
    82  // node contains a filter tree applying to some struct path element and its
    83  // children.
    84  //
    85  // *node pointers have two special values:
    86  //
    87  //	nil - no filter is present (e.g. if `star == nil`, then do not recurse).
    88  //
    89  // /  leafNode - a filter that grabs all remaining values unfiltered.
    90  type node struct {
    91  	star   *node            // a filter to apply to all dict fields or list indexes, if any
    92  	fields map[string]*node // a filter for individual dict fields
    93  }
    94  
    95  // filter recursively applies the filter tree `n` to an input value.
    96  //
    97  // It returns a filtered value with possible "gaps" in lists (represented by
    98  // nils in structpb.ListValue.Values slice). These gaps appear when the filter
    99  // filters out the entire list element. They are needed because the parent
   100  // node may still fill them in. It needs to know where gaps are to do so safely.
   101  // Note that representing them with structpb.NullValue is dangerous, since
   102  // structs can have genuine `null`s in them.
   103  //
   104  // For example, a filter `a.*.x` applied to a `{"a": [{"x": 1}, {"y": 2}]}`
   105  // results in `{"a": [{"x": 1}, <gap>]}`. Similarly `*.*.y` applied to the same
   106  // input results in `{"a": [<gap>, {"y": 2}]}`. When we join these filters, we
   107  // get the result with all gaps filled in: `{"a": [{"x": 2}, {"y": 2}]}`. Note
   108  // that since filter paths start with different tokens (`*` vs `a`) the filter
   109  // nodes that actually produce gaps reside in different branches of the tree,
   110  // separated by multiple layers. This necessitates the merging and gap filling
   111  // to be recursive (see `merge`).
   112  //
   113  // Since a correctly constructed *structpb.Value isn't allowed to have `nil`s,
   114  // all gaps left in the final result are converted to `null` at the very end of
   115  // the filtering by `fillNulls`. This is documented in the StructMask proto doc
   116  // in the section that talks about "exceptional conditions".
   117  func (n *node) filter(val *structpb.Value) *structpb.Value {
   118  	if n == leafNode {
   119  		return val
   120  	}
   121  
   122  	// Since `n` is not a leafNode, it is actually `.<something>`, i.e. it needs
   123  	// to filter inner guts of `val`. We can "dive" only into dicts and lists.
   124  	// Trying to "explore" a scalar value results in "no match" result,
   125  	// represented by `nil`. Note that it is distinct from NullValue.
   126  	//
   127  	// Also if `n` is the last `.*` of the mask, return `val` unchanged as is
   128  	// without even diving into it or checking additional masks in `fields`. This
   129  	// avoids useless memory allocation of structpb.Struct/structpb.ListValue
   130  	// wrappers.
   131  	switch v := val.Kind.(type) {
   132  	case *structpb.Value_StructValue:
   133  		if n.star == leafNode {
   134  			return val
   135  		}
   136  		return n.filterStruct(v.StructValue)
   137  	case *structpb.Value_ListValue:
   138  		if n.star == leafNode {
   139  			return val
   140  		}
   141  		return n.filterList(v.ListValue)
   142  	default:
   143  		return nil
   144  	}
   145  }
   146  
   147  func (n *node) filterStruct(val *structpb.Struct) *structpb.Value {
   148  	// Apply `*` mask first (if any).
   149  	out := &structpb.Struct{}
   150  	if n.star != nil {
   151  		out.Fields = make(map[string]*structpb.Value, len(val.Fields))
   152  		for k, v := range val.Fields {
   153  			if filtered := n.star.filter(v); filtered != nil {
   154  				out.Fields[k] = filtered
   155  			}
   156  		}
   157  	} else {
   158  		out.Fields = make(map[string]*structpb.Value, len(n.fields))
   159  	}
   160  
   161  	// Merge any additional values picked by field masks targeting individual
   162  	// dict keys.
   163  	for key, filter := range n.fields {
   164  		if input, ok := val.Fields[key]; ok {
   165  			if filtered := filter.filter(input); filtered != nil {
   166  				out.Fields[key] = merge(out.Fields[key], filtered)
   167  			}
   168  		}
   169  	}
   170  
   171  	// If filtered out all keys, return an "empty set" value represented by nil.
   172  	// Note that this drops genuinely empty dicts as well. Oh, well... This is
   173  	// somewhat negated by the early return on leaf nodes in `filter`.
   174  	if len(out.Fields) == 0 {
   175  		return nil
   176  	}
   177  
   178  	return structpb.NewStructValue(out)
   179  }
   180  
   181  func (n *node) filterList(val *structpb.ListValue) *structpb.Value {
   182  	// Only `*` is supported. Picking individual list indexes is not implemented
   183  	// yet since it is not clear how to do merging step when there are different
   184  	// masks that use `*` and concrete indexes at the same time. To do the correct
   185  	// merging we need to "remember" original indexes of items in the filtered
   186  	// list and there's no place for it in *structpb.ListValue.
   187  	if n.star == nil {
   188  		return nil
   189  	}
   190  	out := &structpb.ListValue{Values: make([]*structpb.Value, len(val.Values))}
   191  	for i, v := range val.Values {
   192  		// Note that this leaves a `nil` gap if the list element was completely
   193  		// filtered out. It is important for `merge` to know where they are to do
   194  		// merging correctly. These gaps are converted to nulls by `fillNulls` at
   195  		// the very end when all merges are done.
   196  		out.Values[i] = n.star.filter(v)
   197  	}
   198  	return structpb.NewListValue(out)
   199  }
   200  
   201  // merge merges `b` into `a`, returning a shallowly combined copy of both.
   202  //
   203  // Both `a` and `b` should be results of a filtering of the same value, thus
   204  // they (if not nil) must have the same type and (if lists) have the same
   205  // length. If they are scalars, they assumed to be equal already.
   206  //
   207  // `nil` represents "empty sets", i.e. if `a` is nil, `b` will be returned as
   208  // is and vice-versa. If both are `nil`, returns `nil` as well.
   209  func merge(a, b *structpb.Value) *structpb.Value {
   210  	switch {
   211  	case a == nil:
   212  		return b
   213  	case b == nil:
   214  		return a
   215  	case a == b:
   216  		return a
   217  	}
   218  	switch a := a.Kind.(type) {
   219  	case *structpb.Value_StructValue:
   220  		// `b` *must* be a Struct here. Panic if not, it means there's a bug.
   221  		return structpb.NewStructValue(mergeStruct(a.StructValue, b.Kind.(*structpb.Value_StructValue).StructValue))
   222  	case *structpb.Value_ListValue:
   223  		// `b` *must* be a List here. Panic if not, it means there's a bug.
   224  		return structpb.NewListValue(mergeList(a.ListValue, b.Kind.(*structpb.Value_ListValue).ListValue))
   225  	default:
   226  		return b
   227  	}
   228  }
   229  
   230  func mergeStruct(a, b *structpb.Struct) *structpb.Struct {
   231  	l := len(a.Fields)
   232  	if len(b.Fields) > l {
   233  		l = len(b.Fields)
   234  	}
   235  
   236  	out := &structpb.Struct{
   237  		Fields: make(map[string]*structpb.Value, l),
   238  	}
   239  
   240  	for key, aval := range a.Fields {
   241  		out.Fields[key] = merge(aval, b.Fields[key])
   242  	}
   243  
   244  	for key, bval := range b.Fields {
   245  		// Already dealt with A&B case above. Pick only B-A.
   246  		if _, ok := a.Fields[key]; !ok {
   247  			out.Fields[key] = bval
   248  		}
   249  	}
   250  
   251  	return out
   252  }
   253  
   254  func mergeList(a, b *structpb.ListValue) *structpb.ListValue {
   255  	if len(a.Values) != len(b.Values) {
   256  		panic(fmt.Sprintf("unexpected list lengths %d != %d", len(a.Values), len(b.Values)))
   257  	}
   258  	out := &structpb.ListValue{
   259  		Values: make([]*structpb.Value, len(a.Values)),
   260  	}
   261  	for idx := range a.Values {
   262  		out.Values[idx] = merge(a.Values[idx], b.Values[idx])
   263  	}
   264  	return out
   265  }
   266  
   267  func fillNulls(v *structpb.Value) {
   268  	switch v := v.Kind.(type) {
   269  	case *structpb.Value_StructValue:
   270  		for key, elem := range v.StructValue.Fields {
   271  			if elem == nil {
   272  				// We leave nils only in lists, not in structs.
   273  				panic(fmt.Sprintf("unexpected nil for struct key %q", key))
   274  			} else {
   275  				fillNulls(elem)
   276  			}
   277  		}
   278  	case *structpb.Value_ListValue:
   279  		for idx, elem := range v.ListValue.Values {
   280  			if elem == nil {
   281  				v.ListValue.Values[idx] = structpb.NewNullValue()
   282  			} else {
   283  				fillNulls(elem)
   284  			}
   285  		}
   286  	}
   287  }