github.com/CycloneDX/sbom-utility@v0.16.0/cmd/query.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  /*
     3   * Licensed to the Apache Software Foundation (ASF) under one or more
     4   * contributor license agreements.  See the NOTICE file distributed with
     5   * this work for additional information regarding copyright ownership.
     6   * The ASF licenses this file to You under the Apache License, Version 2.0
     7   * (the "License"); you may not use this file except in compliance with
     8   * the License.  You may obtain a copy of the License at
     9   *
    10   *     http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an "AS IS" BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  package cmd
    20  
    21  import (
    22  	"bytes"
    23  	"encoding/gob"
    24  	"fmt"
    25  	"io"
    26  	"strconv"
    27  	"strings"
    28  
    29  	"github.com/CycloneDX/sbom-utility/common"
    30  	"github.com/CycloneDX/sbom-utility/schema"
    31  	"github.com/CycloneDX/sbom-utility/utils"
    32  	"github.com/spf13/cobra"
    33  )
    34  
    35  // Query command flags
    36  const (
    37  	FLAG_OUTPUT_FORMAT  = "format"
    38  	FLAG_QUERY_SELECT   = "select"
    39  	FLAG_QUERY_FROM     = "from"
    40  	FLAG_QUERY_WHERE    = "where"
    41  	FLAG_QUERY_ORDER_BY = "orderby"
    42  )
    43  
    44  // Query command flag help messages
    45  const (
    46  	FLAG_QUERY_OUTPUT_FORMAT_HELP = "format output using the specified type"
    47  	FLAG_QUERY_SELECT_HELP        = "comma-separated list of JSON key names used to select fields within the object designated by the FROM flag" +
    48  		"\n- the wildcard character `*` can be used to denote inclusion of all found key-values"
    49  	FLAG_QUERY_FROM_HELP = "dot-separated list of JSON key names used to dereference into the JSON document" +
    50  		"\n - if not present, the query assumes document \"root\" as the `--from` object"
    51  	FLAG_QUERY_WHERE_HELP    = "comma-separated list of key=<regex> of clauses used to filter the SELECT result set"
    52  	FLAG_QUERY_ORDER_BY_HELP = "key name that appears in the SELECT result set used to order the result records"
    53  )
    54  
    55  var QUERY_SUPPORTED_FORMATS = MSG_SUPPORTED_OUTPUT_FORMATS_HELP +
    56  	strings.Join([]string{FORMAT_JSON}, ", ")
    57  
    58  func NewCommandQuery() *cobra.Command {
    59  	var command = new(cobra.Command)
    60  	command.Use = CMD_USAGE_QUERY
    61  	command.Short = "Query objects and key-values from SBOM (JSON) document"
    62  	command.Long = "SQL-like query (i.e. SELECT x,y FROM a.b.c WHERE x=<regex>) of JSON objects and specified fields from SBOM (JSON) document."
    63  	command.RunE = queryCmdImpl
    64  	command.PreRunE = func(cmd *cobra.Command, args []string) error {
    65  		return preRunTestForInputFile(args)
    66  	}
    67  	initCommandQuery(command)
    68  	return command
    69  }
    70  
    71  func initCommandQuery(command *cobra.Command) {
    72  	getLogger().Enter()
    73  	defer getLogger().Exit()
    74  
    75  	// Add local flags to command
    76  	command.PersistentFlags().StringVar(&utils.GlobalFlags.PersistentFlags.OutputFormat, FLAG_OUTPUT_FORMAT, FORMAT_JSON,
    77  		FLAG_QUERY_OUTPUT_FORMAT_HELP+QUERY_SUPPORTED_FORMATS)
    78  	command.Flags().StringP(FLAG_QUERY_SELECT, "", common.QUERY_TOKEN_WILDCARD, FLAG_QUERY_SELECT_HELP)
    79  	// NOTE: TODO: There appears to be a bug in Cobra where the type of the `from`` flag is `--from` (i.e., not string)
    80  	// This bug does not exhibit on any other flags
    81  	command.Flags().StringP(FLAG_QUERY_FROM, "", "", FLAG_QUERY_FROM_HELP)
    82  	command.Flags().StringP(FLAG_QUERY_WHERE, "", "", FLAG_QUERY_WHERE_HELP)
    83  	command.Flags().StringP(FLAG_QUERY_ORDER_BY, "", "", FLAG_QUERY_ORDER_BY_HELP)
    84  }
    85  
    86  // TODO: Support the --output <file> flag
    87  // TODO: are there other output formats besides JSON (default)?
    88  func queryCmdImpl(cmd *cobra.Command, args []string) (err error) {
    89  	getLogger().Enter()
    90  	defer getLogger().Exit(err)
    91  
    92  	// Create output writer
    93  	outputFilename := utils.GlobalFlags.PersistentFlags.OutputFile
    94  	outputFile, writer, err := createOutputFile(outputFilename)
    95  	getLogger().Tracef("outputFile: `%v`; writer: `%v`", outputFilename, writer)
    96  
    97  	// use function closure to assure consistent error output based upon error type
    98  	defer func() {
    99  		// always close the output file
   100  		if outputFile != nil {
   101  			outputFile.Close()
   102  			getLogger().Infof("Closed output file: `%s`", outputFilename)
   103  		}
   104  	}()
   105  
   106  	// Parse flags into a query request struct
   107  	var queryRequest *common.QueryRequest
   108  	queryRequest, err = readQueryFlags(cmd)
   109  	if err != nil {
   110  		return
   111  	}
   112  
   113  	// allocate the result structure
   114  	var queryResult *common.QueryResponse = new(common.QueryResponse)
   115  
   116  	// Query using the request/response structures
   117  	_, errQuery := Query(writer, queryRequest, queryResult)
   118  
   119  	if errQuery != nil {
   120  		return errQuery
   121  	}
   122  
   123  	return
   124  }
   125  
   126  func readQueryFlags(cmd *cobra.Command) (qr *common.QueryRequest, err error) {
   127  	getLogger().Enter()
   128  	defer getLogger().Exit()
   129  
   130  	// Read '--select' flag second as it is the next highly likely field (used to
   131  	// reduce the result set from querying the "FROM" JSON object)
   132  	rawSelect, errGetString := cmd.Flags().GetString(FLAG_QUERY_SELECT)
   133  	getLogger().Tracef("Query: '%s' flag: %s, err: %s", FLAG_QUERY_SELECT, rawSelect, errGetString)
   134  
   135  	// Read '--from` flag first as its result is required for any other field to operate on
   136  	rawFrom, errGetString := cmd.Flags().GetString(FLAG_QUERY_FROM)
   137  	getLogger().Tracef("Query: '%s' flag: %s, err: %s", FLAG_QUERY_FROM, rawFrom, errGetString)
   138  
   139  	// Read '--where' flag second as it is the next likely field
   140  	// (used to further reduce the set of results from field value "matches"
   141  	// as part of the SELECT processing)
   142  	rawWhere, errGetString := cmd.Flags().GetString(FLAG_QUERY_WHERE)
   143  	getLogger().Tracef("Query: '%s' flag: %s, err: %s", FLAG_QUERY_WHERE, rawWhere, errGetString)
   144  
   145  	// TODO: Read '--orderby' flag to be used to order by field (keys) data in the "output" phase
   146  	//rawOrderBy, errGetString := cmd.Flags().GetString(FLAG_QUERY_ORDER_BY)
   147  	//getLogger().Tracef("Query: '%s' flag: %s, err: %s", FLAG_QUERY_ORDER_BY, rawOrderBy, errGetString)
   148  
   149  	qr, err = common.NewQueryRequestSelectFromWhere(rawSelect, rawFrom, rawWhere)
   150  
   151  	return
   152  }
   153  
   154  func processQueryResults(err error) {
   155  	if err != nil {
   156  		getLogger().Error(err)
   157  	}
   158  }
   159  
   160  // Query JSON map and return selected subset
   161  // i.e., use QueryRequest (syntax) to implement the Query into the JSON document
   162  func Query(writer io.Writer, request *common.QueryRequest, response *common.QueryResponse) (resultJson interface{}, err error) {
   163  	getLogger().Enter()
   164  	defer getLogger().Exit()
   165  	// use function closure to assure consistent error output based upon error type
   166  	defer func() {
   167  		if err != nil {
   168  			processQueryResults(err)
   169  		}
   170  	}()
   171  
   172  	// Note: returns error if either file load or unmarshal to JSON map fails
   173  	var document *schema.BOM
   174  	document, err = LoadInputBOMFileAndDetectSchema()
   175  
   176  	if err != nil {
   177  		return
   178  	}
   179  
   180  	// At this time, fail SPDX format SBOMs as "unsupported"
   181  	if !document.FormatInfo.IsCycloneDx() {
   182  		err = schema.NewUnsupportedFormatForCommandError(
   183  			document.GetFilename(),
   184  			document.FormatInfo.CanonicalName,
   185  			CMD_QUERY, FORMAT_ANY)
   186  		return
   187  	}
   188  
   189  	// Assure we have a map to dereference
   190  	if document.GetJSONMap() == nil {
   191  		err = fmt.Errorf(ERR_TYPE_INVALID_JSON_MAP)
   192  		return
   193  	}
   194  
   195  	// Validate we have query request/response structs
   196  	if request == nil {
   197  		err = fmt.Errorf(common.MSG_QUERY_INVALID_REQUEST)
   198  		return
   199  	}
   200  
   201  	if response == nil {
   202  		err = fmt.Errorf(common.MSG_QUERY_INVALID_RESPONSE)
   203  		return
   204  	}
   205  
   206  	if resultJson, err = QueryJSONMap(document.GetJSONMap(), request); err != nil {
   207  		return
   208  	}
   209  
   210  	// Use the selected output device (e.g., default stdout or the specified --output-file)
   211  	// Note: JSON data files MUST ends in a newline as this is a POSIX standard
   212  	// which is already accounted for by the JSON encoder.
   213  	// NOTE: the "--format" flag is ignored for query which always outputs in JSON
   214  	_, err = utils.WriteAnyAsEncodedJSONInt(writer, resultJson,
   215  		utils.GlobalFlags.PersistentFlags.GetOutputIndentInt())
   216  
   217  	// NOTE: previously, query results defaulted to an indent of 2 spaces which could be done
   218  	// just for this command as follows:
   219  	// flags := rootCmd.PersistentFlags()
   220  	// flags.Set(FLAG_OUTPUT_INDENT, "2")
   221  
   222  	return
   223  }
   224  
   225  func QueryJSONMap(jsonMap map[string]interface{}, request *common.QueryRequest) (resultJson interface{}, err error) {
   226  	// Query set of FROM objects
   227  	// if a FROM select object is not provided, assume "root" search
   228  	if len(request.GetFromKeys()) == 0 {
   229  		getLogger().Tracef("request object FROM selector empty; assume query uses document \"root\".")
   230  	}
   231  
   232  	if resultJson, err = findFromObject(request, jsonMap); err != nil {
   233  		return
   234  	}
   235  
   236  	// SELECT specific fields from the FROM object(s)
   237  	// logic varies depending on data type of FROM object (i.e., map or slice)
   238  	switch typedResult := resultJson.(type) {
   239  	case map[string]interface{}:
   240  		// use this (map) output instead of the one from the "find" stage
   241  		resultJson, err = selectFieldsFromMap(request, typedResult)
   242  		if err != nil {
   243  			getLogger().Debugf("selectFieldsFromMap() failed. QueryRequest: %s", request.String())
   244  			return
   245  		}
   246  		// Warn WHERE clause cannot be applied to a single map object; it was
   247  		// intended only for slices of objects... ignore and return ALL fields
   248  		whereFilters, _ := request.GetWhereFilters()
   249  		if len(whereFilters) > 0 {
   250  			getLogger().Warningf("Cannot apply WHERE filter (%v) to a singleton FROM object (%v)",
   251  				whereFilters,
   252  				request.GetFromKeys())
   253  		}
   254  	case []interface{}:
   255  		fromObjectSlice, _ := resultJson.([]interface{})
   256  		resultJson, err = selectFieldsFromSlice(request, fromObjectSlice)
   257  		if err != nil {
   258  			getLogger().Debugf("selectFieldsFromSlice() failed. QueryRequest: %s", request.String())
   259  			return
   260  		}
   261  	default:
   262  		// NOTE: this SHOULD never be reached from the "query" command
   263  		// as the FROM should always reference a map or []interface{}
   264  		// which is required for JSON output results.
   265  		err = common.NewQueryResultInvalidTypeError(request, typedResult)
   266  		return
   267  	}
   268  
   269  	if err != nil {
   270  		// TODO: use %w once supported by logging package
   271  		getLogger().Debugf("unhandled error: %s, QueryRequest: %s", err, request.String())
   272  		return
   273  	}
   274  	return
   275  }
   276  
   277  func findFromObject(request *common.QueryRequest, jsonMap map[string]interface{}) (pResults interface{}, err error) {
   278  	getLogger().Enter()
   279  	defer getLogger().Exit()
   280  
   281  	// initialize local map pointer and its return value to starting JSON map
   282  	var tempMap map[string]interface{} = jsonMap
   283  	pResults = jsonMap
   284  
   285  	getLogger().Tracef("Finding JSON object using path key(s): %v\n", request.GetFromKeys())
   286  
   287  	for i, key := range request.GetFromKeys() {
   288  		pResults = tempMap[key]
   289  
   290  		// if we find a nil value, this means we failed to find the object
   291  		if pResults == nil {
   292  			err = common.NewQueryFromClauseError(
   293  				request,
   294  				fmt.Sprintf("%s: (%s)", MSG_QUERY_ERROR_FROM_KEY_NOT_FOUND, key))
   295  			return
   296  		}
   297  
   298  		switch typedResult := pResults.(type) {
   299  		case map[string]interface{}:
   300  			// If the resulting value is indeed another map type, we expect for a Json Map
   301  			// we preserve that pointer for the next iteration
   302  			tempMap = typedResult //pResults.(map[string]interface{})
   303  		case []interface{}:
   304  			// TODO: We only support a slice (i.e., []interface{}) as the last selector
   305  			// in theory, we could support arrays (perhaps array notation) in the FROM clause
   306  			// at any point (e.g., "metadata.component.properties[0]").
   307  			// TODO: we should still be able to support implicit arrays as well.
   308  
   309  			// We no longer have a map to dereference into
   310  			// So if there are more keys left as selectors it is an error
   311  			if len(request.GetFromKeys()) > i+1 {
   312  				err = common.NewQueryFromClauseError(request,
   313  					fmt.Sprintf("%s: (%s)", MSG_QUERY_ERROR_FROM_KEY_SLICE_DEREFERENCE, key))
   314  				return
   315  			}
   316  		default:
   317  			// NOTE: this SHOULD never be reached from the "query" command
   318  			// as the FROM should always reference a map or []interface{}
   319  			// which is required for JSON output results.
   320  			err = common.NewQueryResultInvalidTypeError(request, typedResult)
   321  			return
   322  		}
   323  	}
   324  	return
   325  }
   326  
   327  // NOTE: it is the caller's responsibility to convert to other output formats
   328  // based upon other flag values
   329  func selectFieldsFromMap(request *common.QueryRequest, jsonMap map[string]interface{}) (mapSelectedFields map[string]interface{}, err error) {
   330  	getLogger().Enter()
   331  	defer getLogger().Exit()
   332  
   333  	selectors := request.GetSelectKeys()
   334  
   335  	// Default to wildcard behavior (i.e., if request had no "select keys")
   336  	// NOTE: The default set by the CLI framework SHOULD be QUERY_TOKEN_WILDCARD
   337  	if len(selectors) == 0 {
   338  		getLogger().Tracef("no query selectors found (i.e., length=0); defaulting to wildcard behavior. ")
   339  		return jsonMap, nil
   340  	}
   341  
   342  	// Check for wildcard; if it is the only selector, return the original map
   343  	if len(selectors) == 1 && selectors[0] == common.QUERY_TOKEN_WILDCARD {
   344  		getLogger().Tracef("wildcard only selector found; returning entire map.")
   345  		return jsonMap, nil
   346  	}
   347  
   348  	// allocate map to hold selected fields
   349  	mapSelectedFields = make(map[string]interface{})
   350  
   351  	// Copy selected fields into output map
   352  	// NOTE: wildcard "short-circuit" returns original map above
   353  	for _, fieldKey := range selectors {
   354  		// validate wildcard not used with other fields; if so, that is a conflict
   355  		if fieldKey == common.QUERY_TOKEN_WILDCARD {
   356  			err = common.NewQuerySelectClauseError(
   357  				request,
   358  				MSG_QUERY_ERROR_SELECT_WILDCARD)
   359  			getLogger().Trace(err)
   360  			return
   361  		}
   362  
   363  		mapSelectedFields[fieldKey] = jsonMap[fieldKey]
   364  	}
   365  
   366  	return
   367  }
   368  
   369  // NOTE: it is the caller's responsibility to convert to other output formats
   370  // based upon other flag values
   371  func selectFieldsFromSlice(request *common.QueryRequest, jsonSlice []interface{}) (sliceSelectedFields []interface{}, err error) {
   372  	getLogger().Enter()
   373  	defer getLogger().Exit()
   374  
   375  	var whereFilters []common.WhereFilter
   376  	whereFilters, err = request.GetWhereFilters()
   377  	getLogger().Debugf("whereFilters: %v", whereFilters)
   378  	if err != nil {
   379  		return
   380  	}
   381  
   382  	// Add only those objects whose field values match provided WhereFilters
   383  	// and add them to a new "result" slice for further SELECT operations.
   384  	// If no WhereFilters were provided, then add the object to the "result" slice.
   385  	var match bool
   386  	for _, iObject := range jsonSlice {
   387  		mapObject, ok := iObject.(map[string]interface{})
   388  
   389  		if !ok {
   390  			err = getLogger().Errorf("Unable to convert object: %v, to map[string]interface{}", iObject)
   391  			return
   392  		}
   393  
   394  		// If where filters exist, apply them to the map object
   395  		// to see if it should be included in the result map
   396  		if whereFilters != nil {
   397  			if match, err = whereFilterMatch(mapObject, whereFilters); err != nil {
   398  				return
   399  			}
   400  		}
   401  
   402  		// If no WHERE filters were provided OR we matched all the regex comparisons,
   403  		// against the original map object, then add a new map object with only the
   404  		// SELECT(ed) fields requested.
   405  		if whereFilters == nil || match {
   406  			if mapObject, err = selectFieldsFromMap(request, mapObject); err != nil {
   407  				return
   408  			}
   409  			// Reduce result object to only the requested SELECT fields
   410  			sliceSelectedFields = append(sliceSelectedFields, mapObject)
   411  		}
   412  	}
   413  
   414  	return
   415  }
   416  
   417  // Note: Golang supports the RE2 regular exp. engine which does not support many
   418  // features such as lookahead, lookbehind, etc.
   419  // See: https://en.wikipedia.org/wiki/Comparison_of_regular_expression_engines
   420  func whereFilterMatch(mapObject map[string]interface{}, whereFilters []common.WhereFilter) (match bool, err error) {
   421  	var buf bytes.Buffer
   422  	var key string
   423  
   424  	// create a byte encoder
   425  	enc := gob.NewEncoder(&buf)
   426  
   427  	for _, filter := range whereFilters {
   428  
   429  		key = filter.Key
   430  		value, present := mapObject[key]
   431  		if !present {
   432  			match = false
   433  			err = getLogger().Errorf("key `%s` not found in object map", key)
   434  			break
   435  		}
   436  
   437  		// Reset the encoder'a byte buffer on each iteration and
   438  		// convert the value (an interface{}) to []byte we can use on regex. eval.
   439  		buf.Reset()
   440  
   441  		// Do not encode nil pointer values; replace with empty string
   442  		if value == nil {
   443  			value = ""
   444  		}
   445  
   446  		// Handle non-string data types in the map by converting them to string
   447  		switch data := value.(type) {
   448  		case bool:
   449  			value = strconv.FormatBool(data)
   450  		case int:
   451  			value = strconv.Itoa(data)
   452  		}
   453  
   454  		err = enc.Encode(value)
   455  
   456  		if err != nil {
   457  			err = getLogger().Errorf("Unable to convert value: `%v`, to []byte", value)
   458  			return
   459  		}
   460  
   461  		// Test that the field value matches the regex supplied in the current filter
   462  		// Note: the regex compilation is performed during command param. processing
   463  		if match = filter.ValueRegEx.Match(buf.Bytes()); !match {
   464  			break
   465  		}
   466  	}
   467  
   468  	return
   469  }