go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/vpython/python/command_line.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package python
    16  
    17  import (
    18  	"fmt"
    19  	"strings"
    20  	"unicode/utf8"
    21  
    22  	"go.chromium.org/luci/common/errors"
    23  )
    24  
    25  // CommandLineFlag is a command-line flag and its associated argument, if one
    26  // is provided.
    27  type CommandLineFlag struct {
    28  	Flag string
    29  	Arg  string
    30  }
    31  
    32  // String returns a string representation of this flag, which is a command-line
    33  // suitable representation of its value.
    34  func (f *CommandLineFlag) String() string {
    35  	return fmt.Sprintf("-%s%s", f.Flag, f.Arg)
    36  }
    37  
    38  // Target describes a Python invocation target.
    39  //
    40  // Targets are identified by parsing a Python command-line using
    41  // ParseCommandLine.
    42  //
    43  // A Target is identified through type assertion, and will be one of:
    44  //
    45  //   - NoTarget
    46  //   - ScriptTarget
    47  //   - CommandTarget
    48  //   - ModuleTarget
    49  type Target interface {
    50  	// buildArgsForTarget returns the arguments to pass to the interpreter to
    51  	// invoke this target.
    52  	buildArgsForTarget() []string
    53  	// followsFlagSeparator returns true if this target follows the command-line
    54  	// flag separator (if present). This will be false for all flag arguments,
    55  	// since flags must precede the separator.
    56  	//
    57  	//	- true: python <flags> -- <script> ...
    58  	//	- false: python <flags> <script> -- ...
    59  	followsFlagSeparator() bool
    60  }
    61  
    62  // NoTarget is a Target implementation indicating no Python target (i.e.,
    63  // interactive).
    64  type NoTarget struct{}
    65  
    66  func (NoTarget) buildArgsForTarget() []string { return nil }
    67  func (NoTarget) followsFlagSeparator() bool   { return false }
    68  
    69  // ScriptTarget is a Python executable script target.
    70  type ScriptTarget struct {
    71  	// Path is the path to the script that is being invoked.
    72  	//
    73  	// This may be "-", indicating that the script is being read from STDIN.
    74  	Path string
    75  	// FollowsSeparator is true if the script argument follows the flag separator.
    76  	FollowsSeparator bool
    77  }
    78  
    79  func (t ScriptTarget) buildArgsForTarget() []string { return []string{t.Path} }
    80  func (t ScriptTarget) followsFlagSeparator() bool   { return t.FollowsSeparator }
    81  
    82  // CommandTarget is a Target implementation for a command-line string
    83  // (-c ...).
    84  type CommandTarget struct {
    85  	// Command is the command contents.
    86  	Command string
    87  }
    88  
    89  func (t CommandTarget) buildArgsForTarget() []string { return []string{"-c", t.Command} }
    90  func (CommandTarget) followsFlagSeparator() bool     { return false }
    91  
    92  // ModuleTarget is a Target implementing indicating a Python module (-m ...).
    93  type ModuleTarget struct {
    94  	// Module is the name of the target module.
    95  	Module string
    96  }
    97  
    98  func (t ModuleTarget) buildArgsForTarget() []string { return []string{"-m", t.Module} }
    99  func (ModuleTarget) followsFlagSeparator() bool     { return false }
   100  
   101  // parsedFlagState is the current state of a parsed flag block. It is advanced
   102  // in CommandLine's parseSingleFlag as flags are parsed.
   103  type parsedFlagState struct {
   104  	// flag is the current flag block. It does not include the preceding "-".
   105  	//
   106  	// If a block is single, e.g., "-w", it will contain "w".
   107  	// If a block contains multiple flags, e.g, "-vvv", it will contain "vvv".
   108  	flag string
   109  	// args is the remainder of args following the flag block. It is used when
   110  	// a multi-argument flag does not include a conjoined data block.
   111  	//
   112  	// For example, "-Wall" has flag "W", value "all". ["-W", "all"], parses
   113  	// identically, but requires the argument after the "-W" to resolve.
   114  	args []string
   115  }
   116  
   117  // CommandLine is a parsed Python command-line.
   118  //
   119  // CommandLine can be parsed from arguments via ParseCommandLine.
   120  type CommandLine struct {
   121  	// Target is the Python target type.
   122  	Target Target
   123  
   124  	// Flags are flags to the Python interpreter.
   125  	Flags []CommandLineFlag
   126  	// FlagSeparator, if true, means that a "--" flag separator, which separates
   127  	// the interpreter's flags from its positional arguments, was found.
   128  	FlagSeparator bool
   129  	// Args are arguments passed to the Python script.
   130  	Args []string
   131  }
   132  
   133  // BuildArgs returns an array of Python interpreter arguments for cl.
   134  func (cl *CommandLine) BuildArgs() []string {
   135  	targetArgs := cl.Target.buildArgsForTarget()
   136  	args := make([]string, 0, len(cl.Flags)+1+len(targetArgs)+len(cl.Args))
   137  	for _, flag := range cl.Flags {
   138  		args = append(args, flag.String())
   139  	}
   140  
   141  	var flagSeparator []string
   142  	if cl.FlagSeparator {
   143  		flagSeparator = []string{"--"}
   144  	}
   145  
   146  	// If our target is specified as a flag, we need to emit it before the flag
   147  	// separator. If our target is specified as a positional argument (e.g.,
   148  	// CommandTarget), we can emit it on either side.
   149  	if !cl.Target.followsFlagSeparator() {
   150  		args = append(args, targetArgs...)
   151  		args = append(args, flagSeparator...)
   152  	} else {
   153  		args = append(args, flagSeparator...)
   154  		args = append(args, targetArgs...)
   155  	}
   156  
   157  	args = append(args, cl.Args...)
   158  	return args
   159  }
   160  
   161  // AddFlag adds an interpreter flag to cl if it's not already present.
   162  func (cl *CommandLine) AddFlag(flag CommandLineFlag) {
   163  	if strings.HasPrefix(flag.Flag, "-") {
   164  		panic("flag must not begin with '-'")
   165  	}
   166  	for _, f := range cl.Flags {
   167  		if f == flag {
   168  			return
   169  		}
   170  	}
   171  	cl.Flags = append(cl.Flags, flag)
   172  }
   173  
   174  // AddSingleFlag adds a single no-argument interpreter flag to cl
   175  // if it's not already specified.
   176  func (cl *CommandLine) AddSingleFlag(flag string) {
   177  	cl.AddFlag(CommandLineFlag{Flag: flag})
   178  }
   179  
   180  // RemoveFlagMatch removes all instances of flags that match the selection
   181  // function.
   182  //
   183  // matchFn is a function that accepts a candidate flag and returns true if it
   184  // should be removed, false if it should not.
   185  func (cl *CommandLine) RemoveFlagMatch(matchFn func(CommandLineFlag) bool) (found bool) {
   186  	newFlags := cl.Flags[:0]
   187  	for _, f := range cl.Flags {
   188  		if !matchFn(f) {
   189  			newFlags = append(newFlags, f)
   190  		} else {
   191  			found = true
   192  		}
   193  	}
   194  	cl.Flags = newFlags
   195  	return
   196  }
   197  
   198  // RemoveFlag removes all instances of the specified flag from the interpreter
   199  // command line.
   200  func (cl *CommandLine) RemoveFlag(flag CommandLineFlag) (found bool) {
   201  	return cl.RemoveFlagMatch(func(f CommandLineFlag) bool { return f == flag })
   202  }
   203  
   204  // RemoveAllFlag removes all instances of the specified flag from the interpreter
   205  // command line.
   206  func (cl *CommandLine) RemoveAllFlag(flag string) (found bool) {
   207  	return cl.RemoveFlagMatch(func(f CommandLineFlag) bool { return f.Flag == flag })
   208  }
   209  
   210  // Clone returns an independent deep copy of cl.
   211  func (cl *CommandLine) Clone() *CommandLine {
   212  	return &CommandLine{
   213  		Target: cl.Target,
   214  		Flags:  append([]CommandLineFlag(nil), cl.Flags...),
   215  		Args:   append([]string(nil), cl.Args...),
   216  	}
   217  }
   218  
   219  // parseSingleFlag parses a single flag from a state.
   220  func (cl *CommandLine) parseSingleFlag(fs *parsedFlagState) error {
   221  	// Consume the first character from flag into "r". "flag" is the remainder.
   222  	r, l := utf8.DecodeRuneInString(fs.flag)
   223  	if r == utf8.RuneError {
   224  		return errors.Reason("invalid rune in flag").Err()
   225  	}
   226  	fs.flag = fs.flag[l:]
   227  
   228  	// Retrieve the value for a non-binary flag. This mutates the flag state to
   229  	// consume that value.
   230  	getFlagValue := func() (val string, err error) {
   231  		switch {
   232  		case len(fs.flag) > 0:
   233  			// Combined flag/value (e.g., -c'paoskdpo')
   234  			val, fs.flag = fs.flag, ""
   235  		case len(fs.args) == 0:
   236  			err = errors.New("two-value flag missing second value")
   237  		default:
   238  			// Flag value is in subsequent argument (e.g., "-c 'paoskdpo'").
   239  			// Consume the argument.
   240  			val, fs.args = fs.args[0], fs.args[1:]
   241  		}
   242  		return
   243  	}
   244  
   245  	// Some cases will set this to true if `r` is determined to just be a no-value
   246  	// single-character flag
   247  	isSingleCharFlag := false
   248  
   249  	switch r {
   250  	case 'c':
   251  		// Inline command target.
   252  		val, err := getFlagValue()
   253  		if err != nil {
   254  			return err
   255  		}
   256  		cl.Target = CommandTarget{val}
   257  
   258  	case 'm':
   259  		// Python module target.
   260  		val, err := getFlagValue()
   261  		if err != nil {
   262  			return err
   263  		}
   264  		cl.Target = ModuleTarget{val}
   265  
   266  	case 'Q', 'W', 'X':
   267  		// Two-argument Python flags.
   268  		val, err := getFlagValue()
   269  		if err != nil {
   270  			return err
   271  		}
   272  		cl.Flags = append(cl.Flags, CommandLineFlag{string(r), val})
   273  
   274  	case 'O':
   275  		// Handle the case of the odd flag "-OO", which parses as a single flag.
   276  		var has bool
   277  		if fs.flag, has = trimPrefix(fs.flag, "O"); has {
   278  			cl.Flags = append(cl.Flags, CommandLineFlag{"OO", ""})
   279  			break
   280  		}
   281  
   282  		// Single "O", do normal single-flag parsing.
   283  		isSingleCharFlag = true
   284  
   285  	case '-':
   286  		// handle the case of "--version", which is an atypical many-character flag.
   287  		if fs.flag == "version" {
   288  			fs.flag = ""
   289  			cl.Flags = append(cl.Flags, CommandLineFlag{"-version", ""})
   290  			break
   291  		}
   292  
   293  		// Not sure what this could be, but fall through none the less.
   294  		isSingleCharFlag = true
   295  
   296  	default:
   297  		isSingleCharFlag = true
   298  	}
   299  
   300  	if isSingleCharFlag {
   301  		// One-argument Python flags. If there are more characters in "flag",
   302  		// don't consume the entire flag; instead, replace it with the remainder
   303  		// for subsequent parses. This handles cases like "-vvc <script>".
   304  		cl.Flags = append(cl.Flags, CommandLineFlag{string(r), ""})
   305  	}
   306  
   307  	return nil
   308  }
   309  
   310  // ParseCommandLine parses Python command-line arguments and returns a
   311  // structured representation.
   312  func ParseCommandLine(args []string) (*CommandLine, error) {
   313  	noTarget := NoTarget{}
   314  
   315  	cl := CommandLine{
   316  		Target: noTarget,
   317  	}
   318  	i := 0
   319  	for len(args) > 0 {
   320  		// Stop parsing after we have a target, as Python does.
   321  		if cl.Target != noTarget {
   322  			break
   323  		}
   324  
   325  		// Consume the next argument.
   326  		arg := args[0]
   327  		args = args[1:]
   328  		i++
   329  
   330  		if arg == "-" {
   331  			// "-" instructs Python to load the script from STDIN.
   332  			cl.Target = ScriptTarget{
   333  				Path: "-",
   334  			}
   335  			continue
   336  		}
   337  
   338  		isFlag := false
   339  		if !cl.FlagSeparator {
   340  			arg, isFlag = trimPrefix(arg, "-")
   341  		}
   342  
   343  		if !isFlag {
   344  			// The first positional argument is the path to the script, and all
   345  			// subsequent arguments are script arguments.
   346  			cl.Target = ScriptTarget{
   347  				Path:             arg,
   348  				FollowsSeparator: cl.FlagSeparator,
   349  			}
   350  			continue
   351  		}
   352  
   353  		// Note that at this point we've trimmed the preceding "-" from arg, so
   354  		// this is really "--". If we encounter "--" that marks the end of
   355  		// interpreter flag parsing; everything hereafter is considered positional
   356  		// to the interpreter.
   357  		if arg == "-" {
   358  			cl.FlagSeparator = true
   359  			continue
   360  		}
   361  
   362  		// Parse this flag and any remainder.
   363  		fs := parsedFlagState{
   364  			flag: arg,
   365  			args: args,
   366  		}
   367  		for len(fs.flag) > 0 {
   368  			if err := cl.parseSingleFlag(&fs); err != nil {
   369  				return nil, errors.Annotate(err, "failed to parse Python flag #%d: %q", i, arg).Err()
   370  			}
   371  		}
   372  		args = fs.args
   373  	}
   374  
   375  	// The remainder of arguments are for the script.
   376  	cl.Args = append([]string(nil), args...)
   377  	return &cl, nil
   378  }
   379  
   380  func trimPrefix(v, pfx string) (string, bool) {
   381  	if strings.HasPrefix(v, pfx) {
   382  		return v[len(pfx):], true
   383  	}
   384  	return v, false
   385  }