github.com/anchore/syft@v1.38.2/internal/jsonschema/main.go (about)

     1  package main
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"sort"
    12  	"strings"
    13  
    14  	"github.com/iancoleman/strcase"
    15  	"github.com/invopop/jsonschema"
    16  
    17  	"github.com/anchore/syft/internal"
    18  	"github.com/anchore/syft/internal/packagemetadata"
    19  	syftJsonModel "github.com/anchore/syft/syft/format/syftjson/model"
    20  )
    21  
    22  /*
    23  This method of creating the JSON schema only captures strongly typed fields for the purpose of integrations between syft
    24  JSON output and integrations. The downside to this approach is that any values and types used on weakly typed fields
    25  are not captured (empty interfaces). This means that pkg.Package.Metadata is not validated at this time. This approach
    26  can be extended to include specific package metadata struct shapes in the future.
    27  */
    28  
    29  var repoRoot string
    30  
    31  func init() {
    32  	var err error
    33  	repoRoot, err = packagemetadata.RepoRoot()
    34  	if err != nil {
    35  		fmt.Println("unable to determine repo root")
    36  		os.Exit(1)
    37  	}
    38  }
    39  
    40  func main() {
    41  	write(encode(build()))
    42  }
    43  
    44  func schemaID() jsonschema.ID {
    45  	// Today we do not host the schemas at this address, but per the JSON schema spec we should be referencing
    46  	// the schema by a URL in a domain we control. This is a placeholder for now.
    47  	return jsonschema.ID(fmt.Sprintf("anchore.io/schema/syft/json/%s", internal.JSONSchemaVersion))
    48  }
    49  
    50  func assembleTypeContainer(items []any) (any, map[string]string) {
    51  	structFields := make([]reflect.StructField, len(items))
    52  	mapping := make(map[string]string, len(items))
    53  	typesMissingNames := make([]reflect.Type, 0)
    54  	for i, item := range items {
    55  		itemType := reflect.TypeOf(item)
    56  
    57  		jsonName := packagemetadata.JSONName(item)
    58  		fieldName := strcase.ToCamel(jsonName)
    59  
    60  		if jsonName == "" {
    61  			typesMissingNames = append(typesMissingNames, itemType)
    62  			continue
    63  		}
    64  
    65  		mapping[itemType.Name()] = fieldName
    66  
    67  		structFields[i] = reflect.StructField{
    68  			Name: fieldName,
    69  			Type: itemType,
    70  		}
    71  	}
    72  
    73  	if len(typesMissingNames) > 0 {
    74  		fmt.Println("the following types are missing JSON names (manually curated in ./internal/packagemetadata/names.go):")
    75  		for _, t := range typesMissingNames {
    76  			fmt.Println("  - ", t.Name())
    77  		}
    78  		os.Exit(1)
    79  	}
    80  
    81  	structType := reflect.StructOf(structFields)
    82  	return reflect.New(structType).Elem().Interface(), mapping
    83  }
    84  
    85  func build() *jsonschema.Schema {
    86  	reflector := &jsonschema.Reflector{
    87  		BaseSchemaID:              schemaID(),
    88  		AllowAdditionalProperties: true,
    89  		Namer: func(r reflect.Type) string {
    90  			return strings.TrimPrefix(r.Name(), "JSON")
    91  		},
    92  		CommentMap: make(map[string]string),
    93  	}
    94  
    95  	// extract comments from Go source files to enrich schema descriptions
    96  	//
    97  	// note: AddGoComments parses from the module root and creates keys like "syft/pkg.TypeName",
    98  	// but the reflector expects fully qualified paths like "github.com/anchore/syft/syft/pkg.TypeName".
    99  	// We fix up the keys after extraction to match the expected format.
   100  	if err := reflector.AddGoComments("github.com/anchore/syft", repoRoot); err != nil {
   101  		fmt.Fprintf(os.Stderr, "warning: failed to extract Go comments: %v\n", err)
   102  	} else {
   103  		// fix up comment map keys to use fully qualified import paths
   104  		// note: AddGoComments includes the absolute repo path WITHOUT the leading slash
   105  		repoRootNoSlash := strings.TrimPrefix(repoRoot, "/")
   106  		fixedMap := make(map[string]string)
   107  		for k, v := range reflector.CommentMap {
   108  			newKey := k
   109  			if !strings.HasPrefix(k, "github.com/") {
   110  				// key doesn't have module prefix, add it
   111  				newKey = "github.com/anchore/syft/" + k
   112  			} else if strings.Contains(k, repoRootNoSlash) {
   113  				// key has the absolute repo path embedded, strip it
   114  				// format: github.com/anchore/syft/Users/wagoodman/code/syft-manual/syft/pkg.Type
   115  				// should be: github.com/anchore/syft/syft/pkg.Type
   116  				newKey = strings.Replace(k, repoRootNoSlash+"/", "", 1)
   117  			}
   118  			fixedMap[newKey] = v
   119  		}
   120  		reflector.CommentMap = fixedMap
   121  
   122  		// copy field comments for type aliases (e.g., type RpmArchive RpmDBEntry)
   123  		copyAliasFieldComments(reflector.CommentMap, repoRoot)
   124  	}
   125  
   126  	pkgMetadataContainer, pkgMetadataMapping := assembleTypeContainer(packagemetadata.AllTypes())
   127  	pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer)
   128  
   129  	// srcMetadataContainer := assembleTypeContainer(sourcemetadata.AllTypes())
   130  	// srcMetadataContainerType := reflect.TypeOf(srcMetadataContainer)
   131  
   132  	documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftJsonModel.Document{}))
   133  	pkgMetadataSchema := reflector.ReflectFromType(reflect.TypeOf(pkgMetadataContainer))
   134  	// srcMetadataSchema := reflector.ReflectFromType(reflect.TypeOf(srcMetadataContainer))
   135  
   136  	// TODO: add source metadata types
   137  
   138  	// inject the definitions of all packages metadata into the schema definitions
   139  
   140  	var metadataNames []string
   141  	for typeName, definition := range pkgMetadataSchema.Definitions {
   142  		if typeName == pkgMetadataContainerType.Name() {
   143  			// ignore the definition for the fake container
   144  			continue
   145  		}
   146  
   147  		displayName, ok := pkgMetadataMapping[typeName]
   148  		if ok {
   149  			// this is a package metadata type...
   150  			documentSchema.Definitions[displayName] = definition
   151  			metadataNames = append(metadataNames, displayName)
   152  		} else {
   153  			// this is a type that the metadata type uses (e.g. DpkgFileRecord)
   154  			documentSchema.Definitions[typeName] = definition
   155  		}
   156  	}
   157  
   158  	// ensure the generated list of names is stable between runs
   159  	sort.Strings(metadataNames)
   160  
   161  	metadataTypes := []map[string]string{
   162  		// allow for no metadata to be provided
   163  		{"type": "null"},
   164  	}
   165  	for _, name := range metadataNames {
   166  		metadataTypes = append(metadataTypes, map[string]string{
   167  			"$ref": fmt.Sprintf("#/$defs/%s", name),
   168  		})
   169  	}
   170  
   171  	// set the "anyOf" field for Package.Metadata to be a conjunction of several types
   172  	documentSchema.Definitions["Package"].Properties.Set("metadata", map[string][]map[string]string{
   173  		"anyOf": metadataTypes,
   174  	})
   175  
   176  	// warn about missing descriptions
   177  	warnMissingDescriptions(documentSchema, metadataNames)
   178  
   179  	return documentSchema
   180  }
   181  
   182  func encode(schema *jsonschema.Schema) []byte {
   183  	newSchemaBuffer := new(bytes.Buffer)
   184  	enc := json.NewEncoder(newSchemaBuffer)
   185  	// prevent > and < from being escaped in the payload
   186  	enc.SetEscapeHTML(false)
   187  	enc.SetIndent("", "  ")
   188  	err := enc.Encode(&schema)
   189  	if err != nil {
   190  		panic(err)
   191  	}
   192  
   193  	return newSchemaBuffer.Bytes()
   194  }
   195  
   196  func write(schema []byte) {
   197  	schemaPath := filepath.Join(repoRoot, "schema", "json", fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion))
   198  	latestSchemaPath := filepath.Join(repoRoot, "schema", "json", "schema-latest.json")
   199  
   200  	if _, err := os.Stat(schemaPath); !os.IsNotExist(err) {
   201  		// check if the schema is the same...
   202  		existingFh, err := os.Open(schemaPath)
   203  		if err != nil {
   204  			panic(err)
   205  		}
   206  
   207  		existingSchemaBytes, err := io.ReadAll(existingFh)
   208  		if err != nil {
   209  			panic(err)
   210  		}
   211  
   212  		if bytes.Equal(existingSchemaBytes, schema) {
   213  			// the generated schema is the same, bail with no error :)
   214  			fmt.Println("No change to the existing schema!")
   215  			os.Exit(0)
   216  		}
   217  
   218  		// the generated schema is different, bail with error :(
   219  		fmt.Printf("Cowardly refusing to overwrite existing schema (%s)!\nSee the schema/json/README.md for how to increment\n", schemaPath)
   220  		os.Exit(1)
   221  	}
   222  
   223  	fh, err := os.Create(schemaPath)
   224  	if err != nil {
   225  		panic(err)
   226  	}
   227  	defer fh.Close()
   228  
   229  	_, err = fh.Write(schema)
   230  	if err != nil {
   231  		panic(err)
   232  	}
   233  
   234  	latestFile, err := os.Create(latestSchemaPath)
   235  	if err != nil {
   236  		panic(err)
   237  	}
   238  	defer latestFile.Close()
   239  
   240  	_, err = latestFile.Write(schema)
   241  	if err != nil {
   242  		panic(err)
   243  	}
   244  
   245  	fmt.Printf("Wrote new schema to %q\n", schemaPath)
   246  }