github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/ruby/gemspec/gemspec_resolve.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gemspec
    16  
    17  import (
    18  	"bufio"
    19  	"errors"
    20  	"fmt"
    21  	"io/fs"
    22  	"path"
    23  	"slices"
    24  
    25  	// Use filepath to parse all paths extracted from disk, and convert with filepath.ToSlash() before
    26  	// interacting with fs.FS to ensure consistent OS-agnostic handling.
    27  	"path/filepath"
    28  	"strings"
    29  )
    30  
    31  // resolveVersionFromRequires attempts to locate and read files referenced by
    32  // require_relative statements in order to find the value of the specified version
    33  // constant. It returns an error if the constant cannot be resolved.
    34  func resolveVersionFromRequires(fsys fs.FS, gemspecPath string, requirePaths []string, constName string) (string, error) {
    35  	if fsys == nil {
    36  		return "", errors.New("filesystem unavailable for resolving version constant")
    37  	}
    38  
    39  	gemspecDir := path.Dir(gemspecPath)
    40  	visited := make(map[string]struct{})
    41  
    42  	for _, req := range requirePaths {
    43  		if req == "" {
    44  			continue
    45  		}
    46  
    47  		candidates := versionFileCandidates(req)
    48  		for _, candidate := range candidates {
    49  			fullPath := candidate
    50  			if gemspecDir != "." && gemspecDir != "" {
    51  				fullPath = path.Join(gemspecDir, candidate)
    52  			}
    53  			fullPath = path.Clean(fullPath)
    54  			if _, ok := visited[fullPath]; ok {
    55  				continue
    56  			}
    57  			visited[fullPath] = struct{}{}
    58  
    59  			version, err := findConstantValueInFile(fsys, fullPath, constName)
    60  			if err == nil {
    61  				return version, nil
    62  			}
    63  		}
    64  	}
    65  
    66  	return "", fmt.Errorf("unable to resolve constant %s from require_relative targets", constName)
    67  }
    68  
    69  // versionConstantName checks if the provided expression matches common Ruby
    70  // version constant naming patterns and returns the constant name and true if so.
    71  // Otherwise, it returns an empty string and false.
    72  func versionConstantName(expr string) (string, bool) {
    73  	expr = strings.TrimSpace(expr)
    74  	if expr == "" {
    75  		return "", false
    76  	}
    77  	parts := strings.Split(expr, "::")
    78  	name := parts[len(parts)-1]
    79  	// Support common version constant naming patterns
    80  	upperName := strings.ToUpper(name)
    81  	if upperName == "VERSION" {
    82  		return name, true
    83  	}
    84  	return "", false
    85  }
    86  
    87  // versionFileCandidates returns possible file paths to check for a version
    88  // constant based on a require or require_relative argument.
    89  func versionFileCandidates(req string) []string {
    90  	req = strings.TrimSpace(req)
    91  	req = strings.TrimPrefix(req, "./")
    92  	req = filepath.Clean(req)
    93  	// Convert to slash-separated path to handle Windows paths (unlikely for ruby files).
    94  	req = filepath.ToSlash(req)
    95  	if filepath.Ext(req) == ".rb" {
    96  		return []string{req}
    97  	}
    98  	return []string{req, req + ".rb"}
    99  }
   100  
   101  // constantValueFromMatch extracts the assigned value from regex match groups,
   102  // handling both single- and double-quoted literals.
   103  //
   104  // It returns an empty string if no value is found.
   105  func constantValueFromMatch(matches []string) string {
   106  	if len(matches) > 2 && matches[2] != "" {
   107  		return matches[2]
   108  	}
   109  	if len(matches) > 3 && matches[3] != "" {
   110  		return matches[3]
   111  	}
   112  	return ""
   113  }
   114  
   115  // findConstantValueInFile scans the specified file for an assignment to the
   116  // given constant name and returns its value if found. It returns an error if
   117  // the file cannot be read or the constant is not found.
   118  func findConstantValueInFile(fsys fs.FS, path, constName string) (string, error) {
   119  	f, err := fsys.Open(path)
   120  	if err != nil {
   121  		return "", err
   122  	}
   123  	defer f.Close()
   124  
   125  	scanner := bufio.NewScanner(f)
   126  	for scanner.Scan() {
   127  		line := scanner.Text()
   128  		if matches := reConstAssignment.FindStringSubmatch(line); len(matches) > 1 && matches[1] == constName {
   129  			if val := constantValueFromMatch(matches); val != "" {
   130  				return val, nil
   131  			}
   132  		}
   133  	}
   134  	if err := scanner.Err(); err != nil {
   135  		return "", err
   136  	}
   137  	return "", fmt.Errorf("constant %s not found in %s", constName, path)
   138  }
   139  
   140  // extractRequireTargets returns project-relative paths referenced by require or
   141  // require_relative statements on the provided line of Ruby code.
   142  func extractRequireTargets(line string) []string {
   143  	stripped := stripInlineComment(line)
   144  	if stripped == "" {
   145  		return nil
   146  	}
   147  
   148  	trimmed := strings.TrimSpace(stripped)
   149  	keyword := requireKeyword(trimmed)
   150  	if keyword == "" {
   151  		return nil
   152  	}
   153  
   154  	var results []string
   155  
   156  	if keyword == "require_relative" {
   157  		// Fast path: direct require_relative "literal"
   158  		if matches := reRequireRel.FindStringSubmatch(trimmed); len(matches) > 1 {
   159  			results = appendUnique(results, matches[1])
   160  		}
   161  	} else {
   162  		// For plain require, only include likely project-local files.
   163  		if matches := reRequireLiteral.FindStringSubmatch(trimmed); len(matches) > 1 && looksLikeProjectPath(matches[1]) {
   164  			results = appendUnique(results, matches[1])
   165  		}
   166  	}
   167  
   168  	// Parse expressions following the keyword, e.g. File.join(...)
   169  	expr := strings.TrimSpace(trimmed[len(keyword):])
   170  	if expr == "" {
   171  		return results
   172  	}
   173  
   174  	if strings.HasPrefix(expr, "(") && strings.HasSuffix(expr, ")") {
   175  		expr = strings.TrimSpace(expr[1 : len(expr)-1])
   176  	}
   177  	// Require statements often have trailing conditionals; drop them.
   178  	expr = strings.TrimSpace(trimRubyTrailingCondition(expr))
   179  	if expr == "" {
   180  		return results
   181  	}
   182  
   183  	if val, ok := parseQuotedLiteral(expr); ok {
   184  		if keyword == "require_relative" || looksLikeProjectPath(val) {
   185  			results = appendUnique(results, val)
   186  		}
   187  	}
   188  
   189  	if strings.HasPrefix(expr, "File.join") {
   190  		// Handle require_relative File.join('lib', 'foo') patterns.
   191  		if path := parseFileJoin(expr); path != "" {
   192  			results = appendUnique(results, path)
   193  		}
   194  	}
   195  	if strings.HasPrefix(expr, "File.expand_path") {
   196  		// Support File.expand_path('lib/foo', __dir__)
   197  		if path := parseFileExpand(expr); path != "" {
   198  			results = appendUnique(results, path)
   199  		}
   200  	}
   201  	if strings.Contains(expr, "File.dirname(__FILE__)") || strings.Contains(expr, "__dir__") {
   202  		// Handle legacy File.dirname(__FILE__) + '/lib/foo'
   203  		if path := parseDirnameConcat(expr); path != "" {
   204  			results = appendUnique(results, path)
   205  		}
   206  	}
   207  	// TODO: add support for additional static helpers (e.g. File.dirname(__FILE__) << '/lib', %w literals)
   208  
   209  	return results
   210  }
   211  
   212  // requireAccumulator builds a complete require/require_relative statement that
   213  // may span multiple lines and yields extracted targets once the statement is
   214  // syntactically complete.
   215  type requireAccumulator struct {
   216  	pending string
   217  }
   218  
   219  // Add processes a line of Ruby code, accumulating partial require statements
   220  // and returning any extracted require targets once a complete statement is formed.
   221  func (a *requireAccumulator) Add(line string) []string {
   222  	stripped := stripInlineComment(line)
   223  	if stripped == "" {
   224  		return nil
   225  	}
   226  
   227  	if a.pending != "" {
   228  		// Concatenate continued lines; space ensures tokens stay separated.
   229  		a.pending = strings.TrimSpace(a.pending + " " + stripped)
   230  		if requireStatementComplete(a.pending) {
   231  			statement := a.pending
   232  			a.pending = ""
   233  			return extractRequireTargets(statement)
   234  		}
   235  		return nil
   236  	}
   237  
   238  	if keyword := requireKeyword(stripped); keyword != "" {
   239  		// Start buffering a new require statement.
   240  		a.pending = stripped
   241  		if requireStatementComplete(a.pending) {
   242  			statement := a.pending
   243  			a.pending = ""
   244  			return extractRequireTargets(statement)
   245  		}
   246  		return nil
   247  	}
   248  
   249  	// If no require keyword and no pending context, attempt standalone extraction.
   250  	return extractRequireTargets(stripped)
   251  }
   252  
   253  // Flush returns any pending require statement if it is complete, clearing the
   254  // accumulator. If the pending statement is incomplete, it is discarded and an
   255  // empty slice is returned.
   256  func (a *requireAccumulator) Flush() []string {
   257  	if a.pending == "" {
   258  		return nil
   259  	}
   260  	if !requireStatementComplete(a.pending) {
   261  		return nil
   262  	}
   263  	statement := a.pending
   264  	a.pending = ""
   265  	return extractRequireTargets(statement)
   266  }
   267  
   268  // appendUnique appends candidates that are non-empty and not already present in existing.
   269  func appendUnique(existing []string, candidates ...string) []string {
   270  	for _, candidate := range candidates {
   271  		if candidate == "" {
   272  			continue
   273  		}
   274  		if !slices.Contains(existing, candidate) {
   275  			existing = append(existing, candidate)
   276  		}
   277  	}
   278  	return existing
   279  }
   280  
   281  // parseFileJoin attempts to extract a static path from File.join calls with
   282  // literal string arguments; it returns an empty string if parsing fails.
   283  func parseFileJoin(expr string) string {
   284  	args, ok := extractCallArguments(expr, "File.join")
   285  	if !ok {
   286  		return ""
   287  	}
   288  
   289  	segments := splitArgs(args)
   290  	var parts []string
   291  	for _, segment := range segments {
   292  		segment = strings.TrimSpace(segment)
   293  		if val, ok := parseQuotedLiteral(segment); ok {
   294  			// Only literal segments contribute to a static path.
   295  			parts = append(parts, val)
   296  			continue
   297  		}
   298  		// Any non-literal segment prevents static resolution.
   299  		return ""
   300  	}
   301  	if len(parts) == 0 {
   302  		return ""
   303  	}
   304  
   305  	joined := filepath.Join(parts...)
   306  	return filepath.Clean(joined)
   307  }
   308  
   309  // parseFileExpand extracts the first argument to File.expand_path when it is a
   310  // literal or another supported static helper, returning an empty string otherwise.
   311  func parseFileExpand(expr string) string {
   312  	args, ok := extractCallArguments(expr, "File.expand_path")
   313  	if !ok {
   314  		return ""
   315  	}
   316  	segments := splitArgs(args)
   317  	if len(segments) == 0 {
   318  		return ""
   319  	}
   320  	first := strings.TrimSpace(segments[0])
   321  	// File.expand_path may wrap File.join or a literal path.
   322  	if strings.HasPrefix(first, "File.join") {
   323  		return parseFileJoin(first)
   324  	}
   325  	if val, ok := parseQuotedLiteral(first); ok {
   326  		return filepath.Clean(val)
   327  	}
   328  	return ""
   329  }
   330  
   331  // parseDirnameConcat resolves concatenations that include File.dirname(__FILE__)
   332  // or __dir__ with static path literals to produce a relative path.
   333  func parseDirnameConcat(expr string) string {
   334  	parts := splitOnPlus(expr)
   335  	if len(parts) == 0 {
   336  		return ""
   337  	}
   338  	hasDirname := false
   339  	var literals []string
   340  	for _, part := range parts {
   341  		part = strings.TrimSpace(part)
   342  		if part == "" {
   343  			continue
   344  		}
   345  		if strings.Contains(part, "File.dirname(__FILE__)") || part == "__dir__" {
   346  			// Track presence of dirname anchor; no literal to append yet.
   347  			hasDirname = true
   348  			continue
   349  		}
   350  		if strings.HasSuffix(part, "__dir__") && strings.Contains(part, "File.expand_path") {
   351  			// handled elsewhere; skip to avoid double detection.
   352  			continue
   353  		}
   354  		if strings.HasPrefix(part, "File.join") {
   355  			// Support nested File.join helpers within concatenations.
   356  			if joined := parseFileJoin(part); joined != "" {
   357  				literals = append(literals, joined)
   358  			}
   359  			continue
   360  		}
   361  		if strings.HasPrefix(part, "File.expand_path") {
   362  			// Allow nested expand_path inside concatenations.
   363  			if val := parseFileExpand(part); val != "" {
   364  				literals = append(literals, val)
   365  			}
   366  			continue
   367  		}
   368  		if val, ok := parseQuotedLiteral(part); ok {
   369  			literals = append(literals, val)
   370  		}
   371  	}
   372  	if !hasDirname || len(literals) == 0 {
   373  		return ""
   374  	}
   375  	for i, lit := range literals {
   376  		literals[i] = strings.TrimPrefix(lit, string(filepath.Separator))
   377  	}
   378  	joined := filepath.Clean(filepath.Join(literals...))
   379  	return strings.TrimPrefix(joined, string(filepath.Separator))
   380  }
   381  
   382  // extractCallArguments returns the argument string inside the parentheses for
   383  // the specified call prefix, handling nested parentheses and quoted strings.
   384  func extractCallArguments(expr, prefix string) (string, bool) {
   385  	rem := strings.TrimSpace(expr)
   386  	if !strings.HasPrefix(rem, prefix) {
   387  		return "", false
   388  	}
   389  	rem = strings.TrimSpace(rem[len(prefix):])
   390  	if !strings.HasPrefix(rem, "(") {
   391  		return "", false
   392  	}
   393  	rem = rem[1:]
   394  	depth := 1
   395  	var b strings.Builder
   396  	inSingle, inDouble := false, false
   397  	for i := range len(rem) {
   398  		ch := rem[i]
   399  		switch ch {
   400  		case '\\':
   401  			if inSingle || inDouble {
   402  				if i+1 < len(rem) {
   403  					b.WriteByte(ch)
   404  					i++
   405  					b.WriteByte(rem[i])
   406  					continue
   407  				}
   408  			}
   409  		case '\'':
   410  			if !inDouble {
   411  				inSingle = !inSingle
   412  			}
   413  		case '"':
   414  			if !inSingle {
   415  				inDouble = !inDouble
   416  			}
   417  		case '(':
   418  			if !inSingle && !inDouble {
   419  				depth++
   420  			}
   421  		case ')':
   422  			if !inSingle && !inDouble {
   423  				depth--
   424  				if depth == 0 {
   425  					// Return captured arguments once parentheses balance.
   426  					return strings.TrimSpace(b.String()), true
   427  				}
   428  			}
   429  		}
   430  		if depth > 0 {
   431  			b.WriteByte(ch)
   432  		}
   433  	}
   434  	return "", false
   435  }
   436  
   437  // splitArgs splits a comma-separated argument list into individual arguments,
   438  // respecting quoted strings and nested parentheses.
   439  func splitArgs(expr string) []string {
   440  	var (
   441  		args       []string
   442  		current    strings.Builder
   443  		inSingle   bool
   444  		inDouble   bool
   445  		parenDepth int
   446  	)
   447  
   448  	for i := range len(expr) {
   449  		ch := expr[i]
   450  		switch ch {
   451  		case '\\':
   452  			if inSingle || inDouble {
   453  				current.WriteByte(ch)
   454  				if i+1 < len(expr) {
   455  					i++
   456  					current.WriteByte(expr[i])
   457  				}
   458  				continue
   459  			}
   460  		case '\'':
   461  			if !inDouble {
   462  				inSingle = !inSingle
   463  			}
   464  		case '"':
   465  			if !inSingle {
   466  				inDouble = !inDouble
   467  			}
   468  		case '(':
   469  			if !inSingle && !inDouble {
   470  				parenDepth++
   471  			}
   472  		case ')':
   473  			if !inSingle && !inDouble && parenDepth > 0 {
   474  				parenDepth--
   475  			}
   476  		case ',':
   477  			if !inSingle && !inDouble && parenDepth == 0 {
   478  				// Emit current argument when outside nested constructs.
   479  				args = append(args, strings.TrimSpace(current.String()))
   480  				current.Reset()
   481  				continue
   482  			}
   483  		}
   484  		current.WriteByte(ch)
   485  	}
   486  
   487  	if tail := strings.TrimSpace(current.String()); tail != "" {
   488  		args = append(args, tail)
   489  	}
   490  	return args
   491  }
   492  
   493  // parseQuotedLiteral returns the unescaped contents of a quoted Ruby literal and
   494  // a boolean indicating success.
   495  func parseQuotedLiteral(expr string) (string, bool) {
   496  	trimmed := strings.TrimSpace(expr)
   497  	if trimmed == "" {
   498  		return "", false
   499  	}
   500  	quote := trimmed[0]
   501  	if quote != '\'' && quote != '"' {
   502  		return "", false
   503  	}
   504  	var (
   505  		value   strings.Builder
   506  		escaped bool
   507  	)
   508  	for i := 1; i < len(trimmed); i++ {
   509  		ch := trimmed[i]
   510  		if escaped {
   511  			value.WriteByte(ch)
   512  			escaped = false
   513  			continue
   514  		}
   515  		if ch == '\\' {
   516  			escaped = true
   517  			continue
   518  		}
   519  		if ch == quote {
   520  			// Found closing quote; return accumulated value.
   521  			return value.String(), true
   522  		}
   523  		value.WriteByte(ch)
   524  	}
   525  	return "", false
   526  }
   527  
   528  // stripInlineComment removes Ruby inline comments while preserving quoted hash
   529  // characters inside string literals.
   530  func stripInlineComment(line string) string {
   531  	var (
   532  		inSingle bool
   533  		inDouble bool
   534  		escaped  bool
   535  	)
   536  	for i := range len(line) {
   537  		ch := line[i]
   538  		if escaped {
   539  			escaped = false
   540  			continue
   541  		}
   542  		switch ch {
   543  		case '\\':
   544  			if inSingle || inDouble {
   545  				escaped = true
   546  			}
   547  		case '\'':
   548  			if !inDouble {
   549  				inSingle = !inSingle
   550  			}
   551  		case '"':
   552  			if !inSingle {
   553  				inDouble = !inDouble
   554  			}
   555  		case '#':
   556  			if !inSingle && !inDouble {
   557  				// Trim comment marker and trailing spaces.
   558  				return strings.TrimSpace(line[:i])
   559  			}
   560  		}
   561  	}
   562  	return strings.TrimSpace(line)
   563  }
   564  
   565  // trimRubyTrailingCondition removes trailing single-line conditionals (if,
   566  // unless, while, until) to simplify require target parsing.
   567  func trimRubyTrailingCondition(expr string) string {
   568  	for _, kw := range []string{" if ", " unless ", " while ", " until "} {
   569  		if idx := strings.Index(expr, kw); idx >= 0 {
   570  			return strings.TrimSpace(expr[:idx])
   571  		}
   572  	}
   573  	return expr
   574  }
   575  
   576  // requireKeyword returns the require variant found at the beginning of expr, or
   577  // an empty string if none is present.
   578  func requireKeyword(expr string) string {
   579  	trimmed := strings.TrimSpace(expr)
   580  	if trimmed == "" {
   581  		return ""
   582  	}
   583  	if strings.HasPrefix(trimmed, "require_relative") {
   584  		return "require_relative"
   585  	}
   586  	if strings.HasPrefix(trimmed, "require(") || strings.HasPrefix(trimmed, "require ") {
   587  		return "require"
   588  	}
   589  	return ""
   590  }
   591  
   592  // looksLikeProjectPath heuristically determines whether the provided path refers
   593  // to a project-local file rather than a standard library or gem.
   594  func looksLikeProjectPath(path string) bool {
   595  	if path == "" {
   596  		return false
   597  	}
   598  	if strings.HasPrefix(path, ".") {
   599  		return true
   600  	}
   601  	if strings.Contains(path, "/") {
   602  		return true
   603  	}
   604  	if strings.HasSuffix(path, ".rb") {
   605  		return true
   606  	}
   607  	return false
   608  }
   609  
   610  // requireStatementComplete reports whether the given require statement has
   611  // balanced delimiters and closed quotes, indicating it is ready to be parsed.
   612  func requireStatementComplete(expr string) bool {
   613  	trimmed := strings.TrimSpace(expr)
   614  	if trimmed == "" {
   615  		return false
   616  	}
   617  	trimmed = strings.TrimSpace(trimRubyTrailingCondition(trimmed))
   618  	inSingle := false
   619  	inDouble := false
   620  	escaped := false
   621  	depth := 0
   622  	for i := range len(trimmed) {
   623  		ch := trimmed[i]
   624  		if escaped {
   625  			escaped = false
   626  			continue
   627  		}
   628  		switch ch {
   629  		case '\\':
   630  			if inSingle || inDouble {
   631  				escaped = true
   632  			}
   633  		case '\'':
   634  			if !inDouble {
   635  				inSingle = !inSingle
   636  			}
   637  		case '"':
   638  			if !inSingle {
   639  				inDouble = !inDouble
   640  			}
   641  		case '(':
   642  			if !inSingle && !inDouble {
   643  				depth++
   644  			}
   645  		case ')':
   646  			if !inSingle && !inDouble {
   647  				depth--
   648  			}
   649  		}
   650  	}
   651  	return !inSingle && !inDouble && depth <= 0
   652  }
   653  
   654  // splitOnPlus splits an expression on plus operators while respecting quoted
   655  // strings and balanced parentheses, returning trimmed segments.
   656  func splitOnPlus(expr string) []string {
   657  	var (
   658  		parts      []string
   659  		current    strings.Builder
   660  		inSingle   bool
   661  		inDouble   bool
   662  		parenDepth int
   663  	)
   664  	for i := 0; i < len(expr); i++ {
   665  		ch := expr[i]
   666  		switch ch {
   667  		case '\\':
   668  			if inSingle || inDouble {
   669  				current.WriteByte(ch)
   670  				if i+1 < len(expr) {
   671  					i++
   672  					current.WriteByte(expr[i])
   673  				}
   674  				continue
   675  			}
   676  		case '\'':
   677  			if !inDouble {
   678  				inSingle = !inSingle
   679  			}
   680  		case '"':
   681  			if !inSingle {
   682  				inDouble = !inDouble
   683  			}
   684  		case '(':
   685  			if !inSingle && !inDouble {
   686  				parenDepth++
   687  			}
   688  		case ')':
   689  			if !inSingle && !inDouble && parenDepth > 0 {
   690  				parenDepth--
   691  			}
   692  		case '+':
   693  			if !inSingle && !inDouble && parenDepth == 0 {
   694  				parts = append(parts, strings.TrimSpace(current.String()))
   695  				current.Reset()
   696  				continue
   697  			}
   698  		}
   699  		current.WriteByte(ch)
   700  	}
   701  	if tail := strings.TrimSpace(current.String()); tail != "" {
   702  		parts = append(parts, tail)
   703  	}
   704  	return parts
   705  }