sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/genyaml/genyaml.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package genyaml can generate an example YAML snippet from
    18  // an initialized struct and decorate it with godoc comments parsed
    19  // from the AST of a given file.
    20  //
    21  // Example:
    22  //
    23  //	cm, err := NewCommentMap("example_config.go")
    24  //
    25  //	yamlSnippet, err := cm.GenYaml(&plugins.Configuration{
    26  //		Approve: []plugins.Approve{
    27  //			{
    28  //				Repos: []string{
    29  //					"ORGANIZATION",
    30  //					"ORGANIZATION/REPOSITORY",
    31  //				},
    32  //				IssueRequired:       false,
    33  //				RequireSelfApproval: new(bool),
    34  //				LgtmActsAsApprove:   false,
    35  //				IgnoreReviewState:   new(bool),
    36  //			},
    37  //		},
    38  //	})
    39  //
    40  // Alternatively, you can also use `PopulateStruct` to recursively fill all pointer fields, slices and maps of a struct via reflection:
    41  //
    42  // yamlSnippet, err := cm.GenYaml(PopulateStruct(&plugins.Configuration{}))
    43  //
    44  //	yamlSnippet will be assigned a string containing the following YAML:
    45  //
    46  //	# Approve is the configuration for the Approve plugin.
    47  //	approve:
    48  //	  - # Repos is either of the form org/repos or just org.
    49  //		repos:
    50  //		  - ORGANIZATION
    51  //		  - ORGANIZATION/REPOSITORY
    52  //
    53  //		# IssueRequired indicates if an associated issue is required for approval in the specified repos.
    54  //		issue_required: true
    55  //
    56  //		# RequireSelfApproval requires PR authors to explicitly approve their PRs. Otherwise the plugin assumes the author of the PR approves the changes in the PR.
    57  //		require_self_approval: false
    58  //
    59  //		# LgtmActsAsApprove indicates that the lgtm command should be used to indicate approval
    60  //		lgtm_acts_as_approve: true
    61  //
    62  //		# IgnoreReviewState causes the approve plugin to ignore the GitHub review state. Otherwise: * an APPROVE github review is equivalent to leaving an \"/approve\" message. * A REQUEST_CHANGES github review is equivalent to leaving an /approve cancel\" message.
    63  //		ignore_review_state: false
    64  package genyaml
    65  
    66  import (
    67  	"bytes"
    68  	"errors"
    69  	"fmt"
    70  	"go/ast"
    71  	"go/doc"
    72  	"go/parser"
    73  	"go/token"
    74  	"path/filepath"
    75  	"reflect"
    76  	"regexp"
    77  	"strings"
    78  	"sync"
    79  
    80  	"github.com/clarketm/json"
    81  	yaml3 "gopkg.in/yaml.v3"
    82  	"k8s.io/apimachinery/pkg/util/sets"
    83  )
    84  
    85  const (
    86  	jsonTag = "json"
    87  )
    88  
    89  // Comment is an abstract structure for storing mapped types to comments.
    90  type CommentMap struct {
    91  	// comments is a map of string(typeSpecName) -> string(tagName) -> Comment.
    92  	comments map[string]map[string]Comment
    93  	// RWMutex is a read/write mutex.
    94  	sync.RWMutex
    95  }
    96  
    97  // NewCommentMap is the constructor for CommentMap accepting a variadic number
    98  // of path and raw files contents.
    99  func NewCommentMap(rawFiles map[string][]byte, paths ...string) (*CommentMap, error) {
   100  	cm := &CommentMap{
   101  		comments: make(map[string]map[string]Comment),
   102  	}
   103  
   104  	// Group files in dir assuming they are from the same package, this
   105  	// technically doesn't hold true all the time, but is the best effort to
   106  	// ensure that generated yamls are from the same package.
   107  	type group struct {
   108  		paths       []string
   109  		rawContents map[string][]byte
   110  	}
   111  
   112  	packageFiles := map[string]*group{}
   113  	for _, path := range paths {
   114  		dir := filepath.Dir(path)
   115  		if _, ok := packageFiles[dir]; !ok {
   116  			packageFiles[dir] = &group{}
   117  		}
   118  		packageFiles[dir].paths = append(packageFiles[dir].paths, path)
   119  	}
   120  
   121  	for path, content := range rawFiles {
   122  		dir := filepath.Dir(path)
   123  		if _, ok := packageFiles[dir]; !ok {
   124  			packageFiles[dir] = &group{}
   125  		}
   126  		packageFiles[dir].rawContents = map[string][]byte{path: content}
   127  	}
   128  
   129  	for pkg, files := range packageFiles {
   130  		if err := cm.addPackage(files.paths, files.rawContents); err != nil {
   131  			return nil, fmt.Errorf("failed to add files in %s: %w", pkg, err)
   132  		}
   133  	}
   134  
   135  	return cm, nil
   136  }
   137  
   138  // Comment is an abstract structure for storing parsed AST comments decorated with contextual information.
   139  type Comment struct {
   140  	// Type is the underlying type of the identifier associated with the comment.
   141  	Type string
   142  	// IsObj determines if the underlying type is a object type (e.g. struct) or primitive type (e.g. string).
   143  	IsObj bool
   144  	// Doc is a comment string parsed from the AST of a node.
   145  	Doc string
   146  }
   147  
   148  // marshal marshals the object into JSON then converts JSON to YAML and returns the YAML.
   149  func marshal(o interface{}) ([]byte, error) {
   150  	j, err := json.Marshal(o)
   151  	if err != nil {
   152  		return nil, fmt.Errorf("error marshaling into JSON: %w", err)
   153  	}
   154  
   155  	y, err := jsonToYaml(j)
   156  	if err != nil {
   157  		return nil, fmt.Errorf("error converting JSON to YAML: %w", err)
   158  	}
   159  
   160  	return y, nil
   161  }
   162  
   163  // jsonToYaml Converts JSON to YAML.
   164  func jsonToYaml(j []byte) ([]byte, error) {
   165  	// Convert the JSON to an object.
   166  	var jsonObj interface{}
   167  	// We are using yaml.Unmarshal here (instead of json.Unmarshal) because the
   168  	// Go JSON library doesn't try to pick the right number type (int, float,
   169  	// etc.) when unmarshalling to interface{}, it just picks float64
   170  	// universally. go-yaml does go through the effort of picking the right
   171  	// number type, so we can preserve number type throughout this process.
   172  	err := yaml3.Unmarshal(j, &jsonObj)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  
   177  	// marshal this object into YAML.
   178  	return yaml3.Marshal(jsonObj)
   179  }
   180  
   181  // astFrom takes paths of Go files, or the content of Go files,
   182  // returns the abstract syntax tree (AST) for that file.
   183  func astFrom(paths []string, rawFiles map[string][]byte) (*doc.Package, error) {
   184  	fset := token.NewFileSet()
   185  	m := make(map[string]*ast.File)
   186  
   187  	for _, file := range paths {
   188  		f, err := parser.ParseFile(fset, file, nil, parser.ParseComments)
   189  		if err != nil {
   190  			return nil, fmt.Errorf("unable to parse file to AST from path %s: %w", file, err)
   191  		}
   192  		m[file] = f
   193  	}
   194  	for fn, content := range rawFiles {
   195  		f, err := parser.ParseFile(fset, fn, content, parser.ParseComments)
   196  		if err != nil {
   197  			return nil, fmt.Errorf("unable to parse file to AST from raw content %s: %w", fn, err)
   198  		}
   199  		m[fn] = f
   200  	}
   201  
   202  	// Copied from the go doc command: https://github.com/golang/go/blob/fc116b69e2004c159d0f2563c6e91ac75a79f872/src/go/doc/doc.go#L203
   203  	apkg, _ := ast.NewPackage(fset, m, simpleImporter, nil)
   204  
   205  	astDoc := doc.New(apkg, "", 0)
   206  	if astDoc == nil {
   207  		return nil, fmt.Errorf("unable to parse AST documentation from paths %v: got no doc", paths)
   208  	}
   209  
   210  	return astDoc, nil
   211  }
   212  
   213  func simpleImporter(imports map[string]*ast.Object, path string) (*ast.Object, error) {
   214  	pkg := imports[path]
   215  	if pkg == nil {
   216  		// note that strings.LastIndex returns -1 if there is no "/"
   217  		pkg = ast.NewObj(ast.Pkg, path[strings.LastIndex(path, "/")+1:])
   218  		pkg.Data = ast.NewScope(nil) // required by ast.NewPackage for dot-import
   219  		imports[path] = pkg
   220  	}
   221  	return pkg, nil
   222  }
   223  
   224  // fmtRawDoc formats/sanitizes a Go doc string removing TODOs, newlines, whitespace, and various other characters from the resultant string.
   225  func fmtRawDoc(rawDoc string) string {
   226  	var buffer bytes.Buffer
   227  
   228  	// Ignore all lines after ---.
   229  	rawDoc = strings.Split(rawDoc, "---")[0]
   230  
   231  	for _, line := range strings.Split(rawDoc, "\n") {
   232  		line = strings.TrimSpace(line) // Trim leading and trailing whitespace.
   233  		switch {
   234  		case strings.HasPrefix(line, "TODO"): // Ignore one line TODOs.
   235  		case strings.HasPrefix(line, "+"): // Ignore instructions to the generators.
   236  		default:
   237  			line += "\n"
   238  			buffer.WriteString(line)
   239  		}
   240  	}
   241  
   242  	postDoc := strings.TrimRight(buffer.String(), "\n")               // Remove last newline.
   243  	postDoc = strings.Replace(postDoc, "\t", " ", -1)                 // Replace tabs with spaces.
   244  	postDoc = regexp.MustCompile(` +`).ReplaceAllString(postDoc, " ") // Compress multiple spaces to a single space.
   245  
   246  	return postDoc
   247  }
   248  
   249  // fieldTag extracts the given tag or returns an empty string if the tag is not defined.
   250  func fieldTag(field *ast.Field, tag string) string {
   251  	if field.Tag == nil {
   252  		return ""
   253  	}
   254  
   255  	return reflect.StructTag(field.Tag.Value[1 : len(field.Tag.Value)-1]).Get(tag)
   256  }
   257  
   258  // fieldName extracts the name of the field as it should appear in YAML format and returns the resultant string.
   259  // "-" indicates that this field is not part of the YAML representation and is thus excluded.
   260  func fieldName(field *ast.Field, tag string) string {
   261  	tagVal := strings.Split(fieldTag(field, tag), ",")[0] // This can return "-".
   262  	if tagVal == "" {
   263  		// Set field name to the defined name in struct if defined.
   264  		if field.Names != nil {
   265  			return field.Names[0].Name
   266  		}
   267  		// Fallback field name to the immediate field type.
   268  		name, _ := fieldType(field, false)
   269  		return name
   270  	}
   271  	return tagVal
   272  }
   273  
   274  // fieldIsInlined returns true if the field is tagged with ",inline"
   275  func fieldIsInlined(field *ast.Field, tag string) bool {
   276  	values := sets.NewString(strings.Split(fieldTag(field, tag), ",")...)
   277  
   278  	return values.Has("inline")
   279  }
   280  
   281  // fieldType extracts the type of the field and returns the resultant string type and a bool indicating if it is an object type.
   282  func fieldType(field *ast.Field, recurse bool) (string, bool) {
   283  	typeName := ""
   284  	isObj, isSelect := false, false
   285  
   286  	// Find leaf node.
   287  	ast.Inspect(field, func(n ast.Node) bool {
   288  		switch x := n.(type) {
   289  		case *ast.Field:
   290  			// First node is always a field; skip.
   291  			return true
   292  		case *ast.Ident:
   293  			// Encountered a type, overwrite typeName and isObj.
   294  			typeName = x.Name
   295  			isObj = x.Obj != nil || isSelect
   296  		case *ast.SelectorExpr:
   297  			// SelectorExpr are not object types yet reference one, thus continue with DFS.
   298  			isSelect = true
   299  		}
   300  
   301  		return recurse || isSelect
   302  	})
   303  
   304  	return typeName, isObj
   305  }
   306  
   307  // getType returns the type's name within its package for a defined type. For other (non-defined) types it returns the empty string.
   308  func getType(typ interface{}) string {
   309  	t := reflect.TypeOf(typ)
   310  	if t.Kind() == reflect.Ptr {
   311  		return t.Elem().Name()
   312  	}
   313  	return t.Name()
   314  }
   315  
   316  // genDocMap extracts the name of the field as it should appear in YAML format and returns the resultant string.
   317  func (cm *CommentMap) genDocMap(packageFiles []string, rawFiles map[string][]byte) error {
   318  	pkg, err := astFrom(packageFiles, rawFiles)
   319  	if err != nil {
   320  		return fmt.Errorf("unable to generate AST documentation map: %w", err)
   321  	}
   322  
   323  	inlineFields := map[string][]string{}
   324  
   325  	for _, t := range pkg.Types {
   326  		if typeSpec, ok := t.Decl.Specs[0].(*ast.TypeSpec); ok {
   327  
   328  			var lst []*ast.Field
   329  
   330  			// Support struct type, interface type, and type alias.
   331  			switch typ := typeSpec.Type.(type) {
   332  			case *ast.InterfaceType:
   333  				lst = typ.Methods.List
   334  			case *ast.StructType:
   335  				lst = typ.Fields.List
   336  			case *ast.Ident:
   337  				// ensure that aliases for non-struct/interface types continue to work
   338  				if typ.Obj != nil {
   339  					if alias, ok := typ.Obj.Decl.(*ast.TypeSpec).Type.(*ast.InterfaceType); ok {
   340  						lst = alias.Methods.List
   341  					} else if alias, ok := typ.Obj.Decl.(*ast.TypeSpec).Type.(*ast.StructType); ok {
   342  						lst = alias.Fields.List
   343  					}
   344  				}
   345  			}
   346  
   347  			typeSpecName := typeSpec.Name.Name
   348  			cm.comments[typeSpecName] = make(map[string]Comment)
   349  
   350  			for _, field := range lst {
   351  
   352  				if tagName := fieldName(field, jsonTag); tagName != "-" {
   353  					typeName, isObj := fieldType(field, true)
   354  					docString := fmtRawDoc(field.Doc.Text())
   355  					cm.comments[typeSpecName][tagName] = Comment{typeName, isObj, docString}
   356  
   357  					if fieldIsInlined(field, jsonTag) {
   358  						existing, ok := inlineFields[typeSpecName]
   359  						if !ok {
   360  							existing = []string{}
   361  						}
   362  						inlineFields[typeSpecName] = append(existing, tagName)
   363  					}
   364  				}
   365  			}
   366  		}
   367  	}
   368  
   369  	// copy comments for inline fields from their original parent structures; this is needed
   370  	// because when walking the generated YAML, the step to switch to the "correct" parent
   371  	// struct is missing
   372  	for typeSpecName, inlined := range inlineFields {
   373  		for _, inlinedType := range inlined {
   374  			for tagName, comment := range cm.comments[inlinedType] {
   375  				cm.comments[typeSpecName][tagName] = comment
   376  			}
   377  		}
   378  	}
   379  
   380  	return nil
   381  }
   382  
   383  // injectComment reads a YAML node and injects a head comment based on its value and typeSpec.
   384  func (cm *CommentMap) injectComment(parent *yaml3.Node, typeSpec []string, depth int) {
   385  	if parent == nil || depth >= len(typeSpec) {
   386  		return
   387  	}
   388  
   389  	typ := typeSpec[depth]
   390  	isArray := parent.Kind == yaml3.SequenceNode
   391  
   392  	// Decorate YAML node with comment.
   393  	if v, ok := cm.comments[typ][parent.Value]; ok {
   394  		parent.HeadComment = v.Doc
   395  	}
   396  
   397  	if parent.Content != nil {
   398  		for i, child := range parent.Content {
   399  
   400  			// Default type for node is current (i.e. most recent) type.
   401  			nxtTyp := typeSpec[len(typeSpec)-1]
   402  
   403  			if i > 0 {
   404  				prevSibling := parent.Content[i-1]
   405  
   406  				// Skip value nodes.
   407  				if prevSibling.Kind == yaml3.ScalarNode && child.Kind == yaml3.ScalarNode && i%2 == 1 {
   408  					continue
   409  				}
   410  
   411  				// New type detected; add type of key (i.e. prevSibling) to stack.
   412  				if parent.Kind == yaml3.MappingNode && prevSibling.Kind == yaml3.ScalarNode {
   413  					if subTypeSpec, ok := cm.comments[typ][prevSibling.Value]; ok && subTypeSpec.IsObj {
   414  						nxtTyp = subTypeSpec.Type
   415  					}
   416  				}
   417  			}
   418  
   419  			// only recurse into the first element of an array, as documenting all further
   420  			// array items would be redundant
   421  			if !isArray || i == 0 {
   422  				// Recurse to inject comments on nested YAML nodes.
   423  				cm.injectComment(child, append(typeSpec, nxtTyp), depth+1)
   424  			}
   425  		}
   426  	}
   427  
   428  }
   429  
   430  // PrintComments pretty prints comments.
   431  func (cm *CommentMap) PrintComments() {
   432  	cm.RLock()
   433  	defer cm.RUnlock()
   434  
   435  	data, err := json.MarshalIndent(cm.comments, "", "  ")
   436  	if err == nil {
   437  		fmt.Print(string(data))
   438  	}
   439  }
   440  
   441  // addPackage allow for adding to the CommentMap via a list of paths to go files in the same package
   442  func (cm *CommentMap) addPackage(paths []string, rawFiles map[string][]byte) error {
   443  	cm.Lock()
   444  	defer cm.Unlock()
   445  
   446  	err := cm.genDocMap(paths, rawFiles)
   447  	if err != nil {
   448  		return err
   449  	}
   450  
   451  	return nil
   452  }
   453  
   454  // GenYaml generates a fully commented YAML snippet for a given plugin configuration.
   455  func (cm *CommentMap) GenYaml(config interface{}) (string, error) {
   456  	var buffer bytes.Buffer
   457  
   458  	encoder := yaml3.NewEncoder(&buffer)
   459  
   460  	err := cm.EncodeYaml(config, encoder)
   461  	if err != nil {
   462  		return "", fmt.Errorf("failed to encode config as YAML: %w", err)
   463  	}
   464  
   465  	return buffer.String(), nil
   466  }
   467  
   468  // EncodeYaml encodes a fully commented YAML snippet for a given plugin configuration
   469  // using the given encoder.
   470  func (cm *CommentMap) EncodeYaml(config interface{}, encoder *yaml3.Encoder) error {
   471  	cm.RLock()
   472  	defer cm.RUnlock()
   473  
   474  	var baseTypeSpec = getType(config)
   475  
   476  	// Convert Config object to an abstract YAML node.
   477  	y1, err := marshal(&config)
   478  	if err != nil {
   479  		return fmt.Errorf("failed to marshal config to yaml: %w", err)
   480  	}
   481  
   482  	node := yaml3.Node{}
   483  	err = yaml3.Unmarshal([]byte(y1), &node)
   484  	if err != nil {
   485  		return errors.New("failed to unmarshal yaml to yaml node")
   486  	}
   487  
   488  	// Inject comments
   489  	cm.injectComment(&node, []string{baseTypeSpec}, 0)
   490  
   491  	return encoder.Encode(&node)
   492  }