github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/cmd/validate.go (about)

     1  package cmd
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/csv"
     7  	"encoding/json"
     8  	"errors"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/qri-io/dataset"
    14  	"github.com/qri-io/ioes"
    15  	"github.com/qri-io/jsonschema"
    16  	"github.com/qri-io/qri/lib"
    17  	"github.com/qri-io/qri/repo"
    18  	"github.com/spf13/cobra"
    19  )
    20  
    21  // NewValidateCommand creates a new `qri validate` cobra command for showing schema errors
    22  // in a dataset body
    23  func NewValidateCommand(f Factory, ioStreams ioes.IOStreams) *cobra.Command {
    24  	o := &ValidateOptions{IOStreams: ioStreams}
    25  	cmd := &cobra.Command{
    26  		Use:   "validate [DATASET]",
    27  		Short: "show schema validation errors",
    28  		Annotations: map[string]string{
    29  			"group": "dataset",
    30  		},
    31  		Long: `Validate checks data for errors using a schema and then printing a list of
    32  issues. By default validate checks a dataset's body against it’s own schema.
    33  Validate is a flexible command that works with data and schemas either
    34  inside or outside of qri by providing the --body and --schema or --structure
    35  flags.
    36  
    37  Providing either --schema or --structure and --body is an “external
    38  validation" that uses nothing stored in qri. When only one of these flags,
    39  are provided, the other comes from a dataset reference. For example, to
    40  check how a file “data.csv” validates against a dataset "foo”, we would run:
    41  
    42    $ qri validate --body data.csv me/foo
    43  
    44  In this case, qri will will print any validation as if data.csv was foo’s data.
    45  
    46  To see how changes to a schema will validate against a dataset in qri, we
    47  would run:
    48  
    49    $ qri validate --schema schema.json me/foo
    50  
    51  In this case, qri will print validation errors as if schema.json was the
    52  schema for dataset "me/foo"
    53  
    54  Using validate this way is a great way to see how changes to data or schema
    55  will affect a dataset before saving changes to a dataset.
    56  
    57  You can get the current schema of a dataset by running the ` + "`qri get structure.schema`" + `
    58  command.
    59  
    60  Note: --body and --schema or --structure flags will override the dataset
    61  if these flags are provided.`,
    62  		Example: `  # Show errors in an existing dataset:
    63    $ qri validate b5/comics
    64  
    65    # Validate a new body against an existing schema:
    66    $ qri validate --body new_data.csv me/annual_pop
    67  
    68    # Validate data against a new schema:
    69    $ qri validate --body data.csv --schema schema.json`,
    70  		Args: cobra.MaximumNArgs(1),
    71  		RunE: func(cmd *cobra.Command, args []string) error {
    72  			if err := o.Complete(f, args); err != nil {
    73  				return err
    74  			}
    75  			return o.Run()
    76  		},
    77  	}
    78  
    79  	// TODO: restore
    80  	// cmd.Flags().StringVarP(&o.URL, "url", "u", "", "url to file to initialize from")
    81  	cmd.Flags().StringVarP(&o.BodyFilepath, "body", "b", "", "body file to validate")
    82  	cmd.MarkFlagFilename("body")
    83  	cmd.Flags().StringVar(&o.SchemaFilepath, "schema", "", "json schema file to use for validation")
    84  	cmd.MarkFlagFilename("schema", "json")
    85  	cmd.Flags().StringVarP(&o.StructureFilepath, "structure", "", "", "json structure file to use for validation")
    86  	cmd.MarkFlagFilename("structure", "json")
    87  	cmd.Flags().StringVar(&o.Format, "format", "table", "output format. One of: [table|json|csv]")
    88  
    89  	return cmd
    90  }
    91  
    92  // ValidateOptions encapsulates state for the validate command
    93  type ValidateOptions struct {
    94  	ioes.IOStreams
    95  
    96  	Refs              *RefSelect
    97  	BodyFilepath      string
    98  	SchemaFilepath    string
    99  	StructureFilepath string
   100  	Format            string
   101  
   102  	inst *lib.Instance
   103  }
   104  
   105  // Complete adds any configuration that can only be added just before calling Run
   106  func (o *ValidateOptions) Complete(f Factory, args []string) (err error) {
   107  	if o.inst, err = f.Instance(); err != nil {
   108  		return
   109  	}
   110  
   111  	if o.Format != "table" && o.Format != "json" && o.Format != "csv" {
   112  		return fmt.Errorf(`%q is not a valid output format. Please use one of: "table", "json", "csv"`, o.Format)
   113  	}
   114  
   115  	o.Refs, err = GetCurrentRefSelect(f, args, 1)
   116  	if errors.Is(err, repo.ErrEmptyRef) {
   117  		// It is not an error to call validate without a dataset reference. Might be
   118  		// validating a body file against a schema file directly.
   119  		o.Refs = NewEmptyRefSelect()
   120  		err = nil
   121  	}
   122  	return
   123  }
   124  
   125  // Run executes the run command
   126  func (o *ValidateOptions) Run() (err error) {
   127  	o.StartSpinner()
   128  	defer o.StopSpinner()
   129  
   130  	ref := o.Refs.Ref()
   131  	p := &lib.ValidateParams{
   132  		Ref:               ref,
   133  		BodyFilename:      o.BodyFilepath,
   134  		SchemaFilename:    o.SchemaFilepath,
   135  		StructureFilename: o.StructureFilepath,
   136  	}
   137  
   138  	ctx := context.TODO()
   139  	res, err := o.inst.Dataset().Validate(ctx, p)
   140  	if err != nil {
   141  		return err
   142  	}
   143  
   144  	o.StopSpinner()
   145  
   146  	switch o.Format {
   147  	case "table":
   148  		if len(res.Errors) == 0 {
   149  			printSuccess(o.Out, "✔ All good!")
   150  			return nil
   151  		}
   152  		header, data := tabularValidationData(res.Structure, res.Errors)
   153  		buf := &bytes.Buffer{}
   154  		renderTable(buf, header, data)
   155  		printToPager(o.Out, buf)
   156  	case "csv":
   157  		header, data := tabularValidationData(res.Structure, res.Errors)
   158  		csv.NewWriter(o.Out).WriteAll(append([][]string{header}, data...))
   159  	case "json":
   160  		if err := json.NewEncoder(o.Out).Encode(res.Errors); err != nil {
   161  			return err
   162  		}
   163  	}
   164  	return nil
   165  }
   166  
   167  func tabularValidationData(st *dataset.Structure, errs []jsonschema.KeyError) ([]string, [][]string) {
   168  	var (
   169  		header []string
   170  		data   = make([][]string, len(errs))
   171  	)
   172  
   173  	if st.Depth == 2 {
   174  		header = []string{"#", "row", "col", "value", "error"}
   175  		for i, e := range errs {
   176  			paths := strings.Split(e.PropertyPath, "/")
   177  			if len(paths) < 3 {
   178  				paths = []string{"", "", ""}
   179  			}
   180  			data[i] = []string{strconv.FormatInt(int64(i), 10), paths[1], paths[2], valStr(e.InvalidValue), e.Message}
   181  		}
   182  	} else {
   183  		header = []string{"#", "path", "value", "error"}
   184  		for i, e := range errs {
   185  			data[i] = []string{strconv.FormatInt(int64(i), 10), e.PropertyPath, valStr(e.InvalidValue), e.Message}
   186  		}
   187  	}
   188  
   189  	return header, data
   190  }
   191  
   192  func valStr(v interface{}) string {
   193  	switch x := v.(type) {
   194  	case string:
   195  		if len(x) > 20 {
   196  			x = x[:17] + "..."
   197  		}
   198  		return x
   199  	case int:
   200  		return strconv.FormatInt(int64(x), 10)
   201  	case int64:
   202  		return strconv.FormatInt(x, 10)
   203  	case float64:
   204  		return strconv.FormatFloat(x, 'E', -1, 64)
   205  	case bool:
   206  		return strconv.FormatBool(x)
   207  	case nil:
   208  		return "NULL"
   209  	default:
   210  		return "<unknown>"
   211  	}
   212  }