github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/ruby/gemspec/gemspec_resolve.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gemspec 16 17 import ( 18 "bufio" 19 "errors" 20 "fmt" 21 "io/fs" 22 "path" 23 "slices" 24 25 // Use filepath to parse all paths extracted from disk, and convert with filepath.ToSlash() before 26 // interacting with fs.FS to ensure consistent OS-agnostic handling. 27 "path/filepath" 28 "strings" 29 ) 30 31 // resolveVersionFromRequires attempts to locate and read files referenced by 32 // require_relative statements in order to find the value of the specified version 33 // constant. It returns an error if the constant cannot be resolved. 34 func resolveVersionFromRequires(fsys fs.FS, gemspecPath string, requirePaths []string, constName string) (string, error) { 35 if fsys == nil { 36 return "", errors.New("filesystem unavailable for resolving version constant") 37 } 38 39 gemspecDir := path.Dir(gemspecPath) 40 visited := make(map[string]struct{}) 41 42 for _, req := range requirePaths { 43 if req == "" { 44 continue 45 } 46 47 candidates := versionFileCandidates(req) 48 for _, candidate := range candidates { 49 fullPath := candidate 50 if gemspecDir != "." && gemspecDir != "" { 51 fullPath = path.Join(gemspecDir, candidate) 52 } 53 fullPath = path.Clean(fullPath) 54 if _, ok := visited[fullPath]; ok { 55 continue 56 } 57 visited[fullPath] = struct{}{} 58 59 version, err := findConstantValueInFile(fsys, fullPath, constName) 60 if err == nil { 61 return version, nil 62 } 63 } 64 } 65 66 return "", fmt.Errorf("unable to resolve constant %s from require_relative targets", constName) 67 } 68 69 // versionConstantName checks if the provided expression matches common Ruby 70 // version constant naming patterns and returns the constant name and true if so. 71 // Otherwise, it returns an empty string and false. 72 func versionConstantName(expr string) (string, bool) { 73 expr = strings.TrimSpace(expr) 74 if expr == "" { 75 return "", false 76 } 77 parts := strings.Split(expr, "::") 78 name := parts[len(parts)-1] 79 // Support common version constant naming patterns 80 upperName := strings.ToUpper(name) 81 if upperName == "VERSION" { 82 return name, true 83 } 84 return "", false 85 } 86 87 // versionFileCandidates returns possible file paths to check for a version 88 // constant based on a require or require_relative argument. 89 func versionFileCandidates(req string) []string { 90 req = strings.TrimSpace(req) 91 req = strings.TrimPrefix(req, "./") 92 req = filepath.Clean(req) 93 // Convert to slash-separated path to handle Windows paths (unlikely for ruby files). 94 req = filepath.ToSlash(req) 95 if filepath.Ext(req) == ".rb" { 96 return []string{req} 97 } 98 return []string{req, req + ".rb"} 99 } 100 101 // constantValueFromMatch extracts the assigned value from regex match groups, 102 // handling both single- and double-quoted literals. 103 // 104 // It returns an empty string if no value is found. 105 func constantValueFromMatch(matches []string) string { 106 if len(matches) > 2 && matches[2] != "" { 107 return matches[2] 108 } 109 if len(matches) > 3 && matches[3] != "" { 110 return matches[3] 111 } 112 return "" 113 } 114 115 // findConstantValueInFile scans the specified file for an assignment to the 116 // given constant name and returns its value if found. It returns an error if 117 // the file cannot be read or the constant is not found. 118 func findConstantValueInFile(fsys fs.FS, path, constName string) (string, error) { 119 f, err := fsys.Open(path) 120 if err != nil { 121 return "", err 122 } 123 defer f.Close() 124 125 scanner := bufio.NewScanner(f) 126 for scanner.Scan() { 127 line := scanner.Text() 128 if matches := reConstAssignment.FindStringSubmatch(line); len(matches) > 1 && matches[1] == constName { 129 if val := constantValueFromMatch(matches); val != "" { 130 return val, nil 131 } 132 } 133 } 134 if err := scanner.Err(); err != nil { 135 return "", err 136 } 137 return "", fmt.Errorf("constant %s not found in %s", constName, path) 138 } 139 140 // extractRequireTargets returns project-relative paths referenced by require or 141 // require_relative statements on the provided line of Ruby code. 142 func extractRequireTargets(line string) []string { 143 stripped := stripInlineComment(line) 144 if stripped == "" { 145 return nil 146 } 147 148 trimmed := strings.TrimSpace(stripped) 149 keyword := requireKeyword(trimmed) 150 if keyword == "" { 151 return nil 152 } 153 154 var results []string 155 156 if keyword == "require_relative" { 157 // Fast path: direct require_relative "literal" 158 if matches := reRequireRel.FindStringSubmatch(trimmed); len(matches) > 1 { 159 results = appendUnique(results, matches[1]) 160 } 161 } else { 162 // For plain require, only include likely project-local files. 163 if matches := reRequireLiteral.FindStringSubmatch(trimmed); len(matches) > 1 && looksLikeProjectPath(matches[1]) { 164 results = appendUnique(results, matches[1]) 165 } 166 } 167 168 // Parse expressions following the keyword, e.g. File.join(...) 169 expr := strings.TrimSpace(trimmed[len(keyword):]) 170 if expr == "" { 171 return results 172 } 173 174 if strings.HasPrefix(expr, "(") && strings.HasSuffix(expr, ")") { 175 expr = strings.TrimSpace(expr[1 : len(expr)-1]) 176 } 177 // Require statements often have trailing conditionals; drop them. 178 expr = strings.TrimSpace(trimRubyTrailingCondition(expr)) 179 if expr == "" { 180 return results 181 } 182 183 if val, ok := parseQuotedLiteral(expr); ok { 184 if keyword == "require_relative" || looksLikeProjectPath(val) { 185 results = appendUnique(results, val) 186 } 187 } 188 189 if strings.HasPrefix(expr, "File.join") { 190 // Handle require_relative File.join('lib', 'foo') patterns. 191 if path := parseFileJoin(expr); path != "" { 192 results = appendUnique(results, path) 193 } 194 } 195 if strings.HasPrefix(expr, "File.expand_path") { 196 // Support File.expand_path('lib/foo', __dir__) 197 if path := parseFileExpand(expr); path != "" { 198 results = appendUnique(results, path) 199 } 200 } 201 if strings.Contains(expr, "File.dirname(__FILE__)") || strings.Contains(expr, "__dir__") { 202 // Handle legacy File.dirname(__FILE__) + '/lib/foo' 203 if path := parseDirnameConcat(expr); path != "" { 204 results = appendUnique(results, path) 205 } 206 } 207 // TODO: add support for additional static helpers (e.g. File.dirname(__FILE__) << '/lib', %w literals) 208 209 return results 210 } 211 212 // requireAccumulator builds a complete require/require_relative statement that 213 // may span multiple lines and yields extracted targets once the statement is 214 // syntactically complete. 215 type requireAccumulator struct { 216 pending string 217 } 218 219 // Add processes a line of Ruby code, accumulating partial require statements 220 // and returning any extracted require targets once a complete statement is formed. 221 func (a *requireAccumulator) Add(line string) []string { 222 stripped := stripInlineComment(line) 223 if stripped == "" { 224 return nil 225 } 226 227 if a.pending != "" { 228 // Concatenate continued lines; space ensures tokens stay separated. 229 a.pending = strings.TrimSpace(a.pending + " " + stripped) 230 if requireStatementComplete(a.pending) { 231 statement := a.pending 232 a.pending = "" 233 return extractRequireTargets(statement) 234 } 235 return nil 236 } 237 238 if keyword := requireKeyword(stripped); keyword != "" { 239 // Start buffering a new require statement. 240 a.pending = stripped 241 if requireStatementComplete(a.pending) { 242 statement := a.pending 243 a.pending = "" 244 return extractRequireTargets(statement) 245 } 246 return nil 247 } 248 249 // If no require keyword and no pending context, attempt standalone extraction. 250 return extractRequireTargets(stripped) 251 } 252 253 // Flush returns any pending require statement if it is complete, clearing the 254 // accumulator. If the pending statement is incomplete, it is discarded and an 255 // empty slice is returned. 256 func (a *requireAccumulator) Flush() []string { 257 if a.pending == "" { 258 return nil 259 } 260 if !requireStatementComplete(a.pending) { 261 return nil 262 } 263 statement := a.pending 264 a.pending = "" 265 return extractRequireTargets(statement) 266 } 267 268 // appendUnique appends candidates that are non-empty and not already present in existing. 269 func appendUnique(existing []string, candidates ...string) []string { 270 for _, candidate := range candidates { 271 if candidate == "" { 272 continue 273 } 274 if !slices.Contains(existing, candidate) { 275 existing = append(existing, candidate) 276 } 277 } 278 return existing 279 } 280 281 // parseFileJoin attempts to extract a static path from File.join calls with 282 // literal string arguments; it returns an empty string if parsing fails. 283 func parseFileJoin(expr string) string { 284 args, ok := extractCallArguments(expr, "File.join") 285 if !ok { 286 return "" 287 } 288 289 segments := splitArgs(args) 290 var parts []string 291 for _, segment := range segments { 292 segment = strings.TrimSpace(segment) 293 if val, ok := parseQuotedLiteral(segment); ok { 294 // Only literal segments contribute to a static path. 295 parts = append(parts, val) 296 continue 297 } 298 // Any non-literal segment prevents static resolution. 299 return "" 300 } 301 if len(parts) == 0 { 302 return "" 303 } 304 305 joined := filepath.Join(parts...) 306 return filepath.Clean(joined) 307 } 308 309 // parseFileExpand extracts the first argument to File.expand_path when it is a 310 // literal or another supported static helper, returning an empty string otherwise. 311 func parseFileExpand(expr string) string { 312 args, ok := extractCallArguments(expr, "File.expand_path") 313 if !ok { 314 return "" 315 } 316 segments := splitArgs(args) 317 if len(segments) == 0 { 318 return "" 319 } 320 first := strings.TrimSpace(segments[0]) 321 // File.expand_path may wrap File.join or a literal path. 322 if strings.HasPrefix(first, "File.join") { 323 return parseFileJoin(first) 324 } 325 if val, ok := parseQuotedLiteral(first); ok { 326 return filepath.Clean(val) 327 } 328 return "" 329 } 330 331 // parseDirnameConcat resolves concatenations that include File.dirname(__FILE__) 332 // or __dir__ with static path literals to produce a relative path. 333 func parseDirnameConcat(expr string) string { 334 parts := splitOnPlus(expr) 335 if len(parts) == 0 { 336 return "" 337 } 338 hasDirname := false 339 var literals []string 340 for _, part := range parts { 341 part = strings.TrimSpace(part) 342 if part == "" { 343 continue 344 } 345 if strings.Contains(part, "File.dirname(__FILE__)") || part == "__dir__" { 346 // Track presence of dirname anchor; no literal to append yet. 347 hasDirname = true 348 continue 349 } 350 if strings.HasSuffix(part, "__dir__") && strings.Contains(part, "File.expand_path") { 351 // handled elsewhere; skip to avoid double detection. 352 continue 353 } 354 if strings.HasPrefix(part, "File.join") { 355 // Support nested File.join helpers within concatenations. 356 if joined := parseFileJoin(part); joined != "" { 357 literals = append(literals, joined) 358 } 359 continue 360 } 361 if strings.HasPrefix(part, "File.expand_path") { 362 // Allow nested expand_path inside concatenations. 363 if val := parseFileExpand(part); val != "" { 364 literals = append(literals, val) 365 } 366 continue 367 } 368 if val, ok := parseQuotedLiteral(part); ok { 369 literals = append(literals, val) 370 } 371 } 372 if !hasDirname || len(literals) == 0 { 373 return "" 374 } 375 for i, lit := range literals { 376 literals[i] = strings.TrimPrefix(lit, string(filepath.Separator)) 377 } 378 joined := filepath.Clean(filepath.Join(literals...)) 379 return strings.TrimPrefix(joined, string(filepath.Separator)) 380 } 381 382 // extractCallArguments returns the argument string inside the parentheses for 383 // the specified call prefix, handling nested parentheses and quoted strings. 384 func extractCallArguments(expr, prefix string) (string, bool) { 385 rem := strings.TrimSpace(expr) 386 if !strings.HasPrefix(rem, prefix) { 387 return "", false 388 } 389 rem = strings.TrimSpace(rem[len(prefix):]) 390 if !strings.HasPrefix(rem, "(") { 391 return "", false 392 } 393 rem = rem[1:] 394 depth := 1 395 var b strings.Builder 396 inSingle, inDouble := false, false 397 for i := range len(rem) { 398 ch := rem[i] 399 switch ch { 400 case '\\': 401 if inSingle || inDouble { 402 if i+1 < len(rem) { 403 b.WriteByte(ch) 404 i++ 405 b.WriteByte(rem[i]) 406 continue 407 } 408 } 409 case '\'': 410 if !inDouble { 411 inSingle = !inSingle 412 } 413 case '"': 414 if !inSingle { 415 inDouble = !inDouble 416 } 417 case '(': 418 if !inSingle && !inDouble { 419 depth++ 420 } 421 case ')': 422 if !inSingle && !inDouble { 423 depth-- 424 if depth == 0 { 425 // Return captured arguments once parentheses balance. 426 return strings.TrimSpace(b.String()), true 427 } 428 } 429 } 430 if depth > 0 { 431 b.WriteByte(ch) 432 } 433 } 434 return "", false 435 } 436 437 // splitArgs splits a comma-separated argument list into individual arguments, 438 // respecting quoted strings and nested parentheses. 439 func splitArgs(expr string) []string { 440 var ( 441 args []string 442 current strings.Builder 443 inSingle bool 444 inDouble bool 445 parenDepth int 446 ) 447 448 for i := range len(expr) { 449 ch := expr[i] 450 switch ch { 451 case '\\': 452 if inSingle || inDouble { 453 current.WriteByte(ch) 454 if i+1 < len(expr) { 455 i++ 456 current.WriteByte(expr[i]) 457 } 458 continue 459 } 460 case '\'': 461 if !inDouble { 462 inSingle = !inSingle 463 } 464 case '"': 465 if !inSingle { 466 inDouble = !inDouble 467 } 468 case '(': 469 if !inSingle && !inDouble { 470 parenDepth++ 471 } 472 case ')': 473 if !inSingle && !inDouble && parenDepth > 0 { 474 parenDepth-- 475 } 476 case ',': 477 if !inSingle && !inDouble && parenDepth == 0 { 478 // Emit current argument when outside nested constructs. 479 args = append(args, strings.TrimSpace(current.String())) 480 current.Reset() 481 continue 482 } 483 } 484 current.WriteByte(ch) 485 } 486 487 if tail := strings.TrimSpace(current.String()); tail != "" { 488 args = append(args, tail) 489 } 490 return args 491 } 492 493 // parseQuotedLiteral returns the unescaped contents of a quoted Ruby literal and 494 // a boolean indicating success. 495 func parseQuotedLiteral(expr string) (string, bool) { 496 trimmed := strings.TrimSpace(expr) 497 if trimmed == "" { 498 return "", false 499 } 500 quote := trimmed[0] 501 if quote != '\'' && quote != '"' { 502 return "", false 503 } 504 var ( 505 value strings.Builder 506 escaped bool 507 ) 508 for i := 1; i < len(trimmed); i++ { 509 ch := trimmed[i] 510 if escaped { 511 value.WriteByte(ch) 512 escaped = false 513 continue 514 } 515 if ch == '\\' { 516 escaped = true 517 continue 518 } 519 if ch == quote { 520 // Found closing quote; return accumulated value. 521 return value.String(), true 522 } 523 value.WriteByte(ch) 524 } 525 return "", false 526 } 527 528 // stripInlineComment removes Ruby inline comments while preserving quoted hash 529 // characters inside string literals. 530 func stripInlineComment(line string) string { 531 var ( 532 inSingle bool 533 inDouble bool 534 escaped bool 535 ) 536 for i := range len(line) { 537 ch := line[i] 538 if escaped { 539 escaped = false 540 continue 541 } 542 switch ch { 543 case '\\': 544 if inSingle || inDouble { 545 escaped = true 546 } 547 case '\'': 548 if !inDouble { 549 inSingle = !inSingle 550 } 551 case '"': 552 if !inSingle { 553 inDouble = !inDouble 554 } 555 case '#': 556 if !inSingle && !inDouble { 557 // Trim comment marker and trailing spaces. 558 return strings.TrimSpace(line[:i]) 559 } 560 } 561 } 562 return strings.TrimSpace(line) 563 } 564 565 // trimRubyTrailingCondition removes trailing single-line conditionals (if, 566 // unless, while, until) to simplify require target parsing. 567 func trimRubyTrailingCondition(expr string) string { 568 for _, kw := range []string{" if ", " unless ", " while ", " until "} { 569 if idx := strings.Index(expr, kw); idx >= 0 { 570 return strings.TrimSpace(expr[:idx]) 571 } 572 } 573 return expr 574 } 575 576 // requireKeyword returns the require variant found at the beginning of expr, or 577 // an empty string if none is present. 578 func requireKeyword(expr string) string { 579 trimmed := strings.TrimSpace(expr) 580 if trimmed == "" { 581 return "" 582 } 583 if strings.HasPrefix(trimmed, "require_relative") { 584 return "require_relative" 585 } 586 if strings.HasPrefix(trimmed, "require(") || strings.HasPrefix(trimmed, "require ") { 587 return "require" 588 } 589 return "" 590 } 591 592 // looksLikeProjectPath heuristically determines whether the provided path refers 593 // to a project-local file rather than a standard library or gem. 594 func looksLikeProjectPath(path string) bool { 595 if path == "" { 596 return false 597 } 598 if strings.HasPrefix(path, ".") { 599 return true 600 } 601 if strings.Contains(path, "/") { 602 return true 603 } 604 if strings.HasSuffix(path, ".rb") { 605 return true 606 } 607 return false 608 } 609 610 // requireStatementComplete reports whether the given require statement has 611 // balanced delimiters and closed quotes, indicating it is ready to be parsed. 612 func requireStatementComplete(expr string) bool { 613 trimmed := strings.TrimSpace(expr) 614 if trimmed == "" { 615 return false 616 } 617 trimmed = strings.TrimSpace(trimRubyTrailingCondition(trimmed)) 618 inSingle := false 619 inDouble := false 620 escaped := false 621 depth := 0 622 for i := range len(trimmed) { 623 ch := trimmed[i] 624 if escaped { 625 escaped = false 626 continue 627 } 628 switch ch { 629 case '\\': 630 if inSingle || inDouble { 631 escaped = true 632 } 633 case '\'': 634 if !inDouble { 635 inSingle = !inSingle 636 } 637 case '"': 638 if !inSingle { 639 inDouble = !inDouble 640 } 641 case '(': 642 if !inSingle && !inDouble { 643 depth++ 644 } 645 case ')': 646 if !inSingle && !inDouble { 647 depth-- 648 } 649 } 650 } 651 return !inSingle && !inDouble && depth <= 0 652 } 653 654 // splitOnPlus splits an expression on plus operators while respecting quoted 655 // strings and balanced parentheses, returning trimmed segments. 656 func splitOnPlus(expr string) []string { 657 var ( 658 parts []string 659 current strings.Builder 660 inSingle bool 661 inDouble bool 662 parenDepth int 663 ) 664 for i := 0; i < len(expr); i++ { 665 ch := expr[i] 666 switch ch { 667 case '\\': 668 if inSingle || inDouble { 669 current.WriteByte(ch) 670 if i+1 < len(expr) { 671 i++ 672 current.WriteByte(expr[i]) 673 } 674 continue 675 } 676 case '\'': 677 if !inDouble { 678 inSingle = !inSingle 679 } 680 case '"': 681 if !inSingle { 682 inDouble = !inDouble 683 } 684 case '(': 685 if !inSingle && !inDouble { 686 parenDepth++ 687 } 688 case ')': 689 if !inSingle && !inDouble && parenDepth > 0 { 690 parenDepth-- 691 } 692 case '+': 693 if !inSingle && !inDouble && parenDepth == 0 { 694 parts = append(parts, strings.TrimSpace(current.String())) 695 current.Reset() 696 continue 697 } 698 } 699 current.WriteByte(ch) 700 } 701 if tail := strings.TrimSpace(current.String()); tail != "" { 702 parts = append(parts, tail) 703 } 704 return parts 705 }