github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/javaclass.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package java 16 17 import ( 18 "encoding/binary" 19 "errors" 20 "fmt" 21 "io" 22 "strings" 23 "unicode/utf8" 24 ) 25 26 var ( 27 // BinaryBaseTypes comes from https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.3 28 BinaryBaseTypes = []string{ 29 "B", 30 "C", 31 "D", 32 "F", 33 "I", 34 "J", 35 "L", 36 "S", 37 "Z", 38 } 39 40 // StandardLibraryPrefixes defines the prefixes of standard library classes. 41 StandardLibraryPrefixes = []string{ 42 "java/", 43 "javax/", 44 "jdk/", 45 "sun/", 46 "org/ietf/", 47 "org/omg/", 48 "org/w3c/", 49 "org/xml/", 50 } 51 ) 52 53 // ClassFile struct represents the overall structure of a Java class file. 54 // This only contains the fields we care about for reachability analysis. 55 type ClassFile struct { 56 Magic uint32 57 MinorVersion uint16 58 MajorVersion uint16 59 ConstantPoolCount uint16 60 ConstantPool []ConstantPoolInfo 61 AccessFlags uint16 62 ThisClass uint16 63 } 64 65 // ConstantPoolInfo interface represents the base type for all constant pool entries. 66 type ConstantPoolInfo interface { 67 Type() ConstantKind 68 } 69 70 // ConstantKind is the type of a constant pool entry. 71 type ConstantKind uint8 72 73 // ConstantKind values are defined in JAR constant pool entries. 74 const ( 75 ConstantKindUtf8 ConstantKind = 1 76 ConstantKindInteger ConstantKind = 3 77 ConstantKindFloat ConstantKind = 4 78 ConstantKindLong ConstantKind = 5 79 ConstantKindDouble ConstantKind = 6 80 ConstantKindClass ConstantKind = 7 81 ConstantKindString ConstantKind = 8 82 ConstantKindFieldref ConstantKind = 9 83 ConstantKindMethodref ConstantKind = 10 84 ConstantKindInterfaceMethodref ConstantKind = 11 85 ConstantKindNameAndType ConstantKind = 12 86 ConstantKindMethodHandle ConstantKind = 15 87 ConstantKindMethodType ConstantKind = 16 88 ConstantKindDynamic ConstantKind = 17 89 ConstantKindInvokeDynamic ConstantKind = 18 90 ConstantKindModule ConstantKind = 19 91 ConstantKindPackage ConstantKind = 20 92 93 // ConstantKindPlaceholder is not a real Java class constant kind. 94 // We use this to implement long and double constants taking up two entries 95 // in the constant pool, as well as the constant pool being 1-indexed. 96 // 97 // From https://docs.oracle.com/javase/specs/jvms/se22/html/jvms-4.html#jvms-4.4.5 98 // All 8-byte constants take up two entries in the constant_pool table of 99 // the class file. If a CONSTANT_Long_info or CONSTANT_Double_info structure 100 // is the entry at index n in the constant_pool table, then the next usable 101 // entry in the table is located at index n+2. The constant_pool index n+1 102 // must be valid but is considered unusable. 103 ConstantKindPlaceholder ConstantKind = 255 104 ) 105 106 type ( 107 // ConstantClass represents a class constant pool entry. 108 ConstantClass struct { 109 NameIndex uint16 110 } 111 // ConstantFieldref represents a field reference constant pool entry. 112 ConstantFieldref struct { 113 ClassIndex uint16 114 NameAndTypeIndex uint16 115 } 116 // ConstantMethodref represents a method reference constant pool entry. 117 ConstantMethodref struct { 118 ClassIndex uint16 119 NameAndTypeIndex uint16 120 } 121 // ConstantInterfaceMethodref represents an interface method reference constant pool entry. 122 ConstantInterfaceMethodref struct { 123 ClassIndex uint16 124 NameAndTypeIndex uint16 125 } 126 // ConstantString represents a string constant pool entry. 127 ConstantString struct { 128 StringIndex uint16 129 } 130 // ConstantInteger represents an integer constant pool entry. 131 ConstantInteger struct { 132 Bytes int32 133 } 134 // ConstantFloat represents a float constant pool entry. 135 ConstantFloat struct { 136 Bytes float32 137 } 138 // ConstantLong represents a long constant pool entry. 139 ConstantLong struct { 140 Bytes int64 141 } 142 // ConstantDouble represents a double constant pool entry. 143 ConstantDouble struct { 144 Bytes float64 145 } 146 // ConstantNameAndType represents a name and type constant pool entry. 147 ConstantNameAndType struct { 148 NameIndex uint16 149 DescriptorIndex uint16 150 } 151 // ConstantUtf8 represents a UTF-8 string constant pool entry. 152 ConstantUtf8 struct { 153 Length uint16 154 Bytes []byte 155 } 156 // ConstantMethodHandle represents a method handle constant pool entry. 157 ConstantMethodHandle struct { 158 ReferenceKind uint8 159 ReferenceIndex uint16 160 } 161 // ConstantMethodType represents a method type constant pool entry. 162 ConstantMethodType struct { 163 DescriptorIndex uint16 164 } 165 // ConstantInvokeDynamic represents an invoke dynamic constant pool entry. 166 ConstantInvokeDynamic struct { 167 BootstrapMethodAttrIndex uint16 168 NameAndTypeIndex uint16 169 } 170 // ConstantModule represents a module constant pool entry. 171 ConstantModule struct { 172 NameIndex uint16 173 } 174 // ConstantPackage represents a package constant pool entry. 175 ConstantPackage struct { 176 NameIndex uint16 177 } 178 // ConstantDynamic represents a dynamic constant pool entry. 179 ConstantDynamic struct { 180 BootstrapMethodAttrIndex uint16 181 NameAndTypeIndex uint16 182 } 183 // ConstantPlaceholder is a placeholder constant pool entry. 184 ConstantPlaceholder struct{} 185 ) 186 187 // Type returns the ConstantKind for ConstantClass. 188 func (c ConstantClass) Type() ConstantKind { return ConstantKindClass } 189 190 // Type returns the ConstantKind for ConstantFieldref. 191 func (c ConstantFieldref) Type() ConstantKind { return ConstantKindFieldref } 192 193 // Type returns the ConstantKind for ConstantMethodref. 194 func (c ConstantMethodref) Type() ConstantKind { return ConstantKindMethodref } 195 196 // Type returns the ConstantKind for ConstantInterfaceMethodref. 197 func (c ConstantInterfaceMethodref) Type() ConstantKind { return ConstantKindInterfaceMethodref } 198 199 // Type returns the ConstantKind for ConstantString. 200 func (c ConstantString) Type() ConstantKind { return ConstantKindString } 201 202 // Type returns the ConstantKind for ConstantInteger. 203 func (c ConstantInteger) Type() ConstantKind { return ConstantKindInteger } 204 205 // Type returns the ConstantKind for ConstantFloat. 206 func (c ConstantFloat) Type() ConstantKind { return ConstantKindFloat } 207 208 // Type returns the ConstantKind for ConstantLong. 209 func (c ConstantLong) Type() ConstantKind { return ConstantKindLong } 210 211 // Type returns the ConstantKind for ConstantDouble. 212 func (c ConstantDouble) Type() ConstantKind { return ConstantKindDouble } 213 214 // Type returns the ConstantKind for ConstantNameAndType. 215 func (c ConstantNameAndType) Type() ConstantKind { return ConstantKindNameAndType } 216 217 // Type returns the ConstantKind for ConstantUtf8. 218 func (c ConstantUtf8) Type() ConstantKind { return ConstantKindUtf8 } 219 220 // Type returns the ConstantKind for ConstantMethodHandle. 221 func (c ConstantMethodHandle) Type() ConstantKind { return ConstantKindMethodHandle } 222 223 // Type returns the ConstantKind for ConstantMethodType. 224 func (c ConstantMethodType) Type() ConstantKind { return ConstantKindMethodType } 225 226 // Type returns the ConstantKind for ConstantInvokeDynamic. 227 func (c ConstantInvokeDynamic) Type() ConstantKind { return ConstantKindInvokeDynamic } 228 229 // Type returns the ConstantKind for ConstantModule. 230 func (c ConstantModule) Type() ConstantKind { return ConstantKindModule } 231 232 // Type returns the ConstantKind for ConstantPackage. 233 func (c ConstantPackage) Type() ConstantKind { return ConstantKindPackage } 234 235 // Type returns the ConstantKind for ConstantDynamic. 236 func (c ConstantDynamic) Type() ConstantKind { return ConstantKindDynamic } 237 238 // Type returns the ConstantKind for ConstantPlaceholder. 239 func (c ConstantPlaceholder) Type() ConstantKind { return ConstantKindPlaceholder } 240 241 // ParseClass parses a Java class file from a reader. 242 func ParseClass(r io.Reader) (*ClassFile, error) { 243 var cf ClassFile 244 err := binary.Read(r, binary.BigEndian, &cf.Magic) 245 if err != nil { 246 return nil, err 247 } 248 err = binary.Read(r, binary.BigEndian, &cf.MinorVersion) 249 if err != nil { 250 return nil, err 251 } 252 err = binary.Read(r, binary.BigEndian, &cf.MajorVersion) 253 if err != nil { 254 return nil, err 255 } 256 err = binary.Read(r, binary.BigEndian, &cf.ConstantPoolCount) 257 if err != nil { 258 return nil, err 259 } 260 261 // Add a dummy constant so that entries are 1-indexed per the Java spec. 262 cf.ConstantPool = append(cf.ConstantPool, &ConstantPlaceholder{}) 263 264 // The value of the constant_pool_count item is equal to the number of 265 // entries in the constant_pool table plus one. 266 for i := 0; i < int(cf.ConstantPoolCount-1); i++ { 267 var kind ConstantKind 268 err := binary.Read(r, binary.BigEndian, &kind) 269 if err != nil { 270 return nil, err 271 } 272 273 var cp ConstantPoolInfo 274 275 switch kind { 276 case ConstantKindUtf8: 277 constant := &ConstantUtf8{} 278 err := binary.Read(r, binary.BigEndian, &constant.Length) 279 if err != nil { 280 return nil, err 281 } 282 283 const maxConstantLength = 32 * 1024 284 if constant.Length > maxConstantLength { 285 return nil, fmt.Errorf("constant size too large (%d)", constant.Length) 286 } 287 288 constant.Bytes = make([]byte, constant.Length) 289 _, err = r.Read(constant.Bytes) 290 if err != nil { 291 return nil, err 292 } 293 cp = constant 294 case ConstantKindInteger: 295 constant := &ConstantInteger{} 296 err := binary.Read(r, binary.BigEndian, &constant.Bytes) 297 if err != nil { 298 return nil, err 299 } 300 cp = constant 301 case ConstantKindFloat: 302 constant := &ConstantFloat{} 303 err := binary.Read(r, binary.BigEndian, &constant.Bytes) 304 if err != nil { 305 return nil, err 306 } 307 cp = constant 308 case ConstantKindLong: 309 constant := &ConstantLong{} 310 err := binary.Read(r, binary.BigEndian, &constant.Bytes) 311 if err != nil { 312 return nil, err 313 } 314 cp = constant 315 case ConstantKindDouble: 316 constant := &ConstantDouble{} 317 err := binary.Read(r, binary.BigEndian, &constant.Bytes) 318 if err != nil { 319 return nil, err 320 } 321 cp = constant 322 case ConstantKindClass: 323 constant := &ConstantClass{} 324 err := binary.Read(r, binary.BigEndian, &constant.NameIndex) 325 if err != nil { 326 return nil, err 327 } 328 cp = constant 329 case ConstantKindString: 330 constant := &ConstantString{} 331 err := binary.Read(r, binary.BigEndian, &constant.StringIndex) 332 if err != nil { 333 return nil, err 334 } 335 cp = constant 336 case ConstantKindFieldref: 337 constant := &ConstantFieldref{} 338 err := binary.Read(r, binary.BigEndian, &constant.ClassIndex) 339 if err != nil { 340 return nil, err 341 } 342 err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex) 343 if err != nil { 344 return nil, err 345 } 346 cp = constant 347 case ConstantKindMethodref: 348 constant := &ConstantMethodref{} 349 err := binary.Read(r, binary.BigEndian, &constant.ClassIndex) 350 if err != nil { 351 return nil, err 352 } 353 err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex) 354 if err != nil { 355 return nil, err 356 } 357 cp = constant 358 case ConstantKindInterfaceMethodref: 359 constant := &ConstantInterfaceMethodref{} 360 err := binary.Read(r, binary.BigEndian, &constant.ClassIndex) 361 if err != nil { 362 return nil, err 363 } 364 err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex) 365 if err != nil { 366 return nil, err 367 } 368 cp = constant 369 case ConstantKindNameAndType: 370 constant := &ConstantNameAndType{} 371 err := binary.Read(r, binary.BigEndian, &constant.NameIndex) 372 if err != nil { 373 return nil, err 374 } 375 err = binary.Read(r, binary.BigEndian, &constant.DescriptorIndex) 376 if err != nil { 377 return nil, err 378 } 379 cp = constant 380 case ConstantKindMethodHandle: 381 constant := &ConstantMethodHandle{} 382 err := binary.Read(r, binary.BigEndian, &constant.ReferenceKind) 383 if err != nil { 384 return nil, err 385 } 386 err = binary.Read(r, binary.BigEndian, &constant.ReferenceIndex) 387 if err != nil { 388 return nil, err 389 } 390 cp = constant 391 case ConstantKindMethodType: 392 constant := &ConstantMethodType{} 393 err := binary.Read(r, binary.BigEndian, &constant.DescriptorIndex) 394 if err != nil { 395 return nil, err 396 } 397 cp = constant 398 case ConstantKindDynamic: 399 constant := &ConstantDynamic{} 400 err := binary.Read(r, binary.BigEndian, &constant.BootstrapMethodAttrIndex) 401 if err != nil { 402 return nil, err 403 } 404 err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex) 405 if err != nil { 406 return nil, err 407 } 408 cp = constant 409 case ConstantKindInvokeDynamic: 410 constant := &ConstantInvokeDynamic{} 411 err := binary.Read(r, binary.BigEndian, &constant.BootstrapMethodAttrIndex) 412 if err != nil { 413 return nil, err 414 } 415 err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex) 416 if err != nil { 417 return nil, err 418 } 419 cp = constant 420 case ConstantKindModule: 421 constant := &ConstantModule{} 422 err := binary.Read(r, binary.BigEndian, &constant.NameIndex) 423 if err != nil { 424 return nil, err 425 } 426 cp = constant 427 case ConstantKindPackage: 428 constant := &ConstantPackage{} 429 err := binary.Read(r, binary.BigEndian, &constant.NameIndex) 430 if err != nil { 431 return nil, err 432 } 433 cp = constant 434 case ConstantKindPlaceholder: 435 fallthrough 436 default: 437 return nil, fmt.Errorf("invalid cp_info type %d at index %d", kind, i+1) 438 } 439 440 cf.ConstantPool = append(cf.ConstantPool, cp) 441 442 if cp.Type() == ConstantKindDouble || cp.Type() == ConstantKindLong { 443 // 8-byte values take up 2 constant pool entries. 444 cf.ConstantPool = append(cf.ConstantPool, &ConstantPlaceholder{}) 445 i++ 446 } 447 } 448 449 err = binary.Read(r, binary.BigEndian, &cf.AccessFlags) 450 if err != nil { 451 return nil, err 452 } 453 454 err = binary.Read(r, binary.BigEndian, &cf.ThisClass) 455 if err != nil { 456 return nil, err 457 } 458 459 return &cf, nil 460 } 461 462 func (cf *ClassFile) checkIndex(idx int) error { 463 // From https://docs.oracle.com/javase/specs/jvms/se22/html/jvms-4.html#jvms-4.4.1 464 // 465 // A constant_pool index is considered valid if it is greater than 466 // zero and less than constant_pool_count, with the exception for 467 // constants of type long and double noted in ยง4.4.5. 468 if idx == 0 || idx >= len(cf.ConstantPool) { 469 return fmt.Errorf("invalid index %d", idx) 470 } 471 472 return nil 473 } 474 475 // ConstantPoolMethodref returns the class, method, and descriptor for a method reference at the 476 // given index. 477 func (cf *ClassFile) ConstantPoolMethodref(idx int) (class string, method string, descriptor string, err error) { 478 err = cf.checkIndex(idx) 479 if err != nil { 480 return class, method, descriptor, err 481 } 482 483 if cf.ConstantPool[idx].Type() != ConstantKindMethodref { 484 err = errors.New("constant pool idx does not point to a method ref") 485 return class, method, descriptor, err 486 } 487 488 methodRef := cf.ConstantPool[idx].(*ConstantMethodref) 489 class, err = cf.ConstantPoolClass(int(methodRef.ClassIndex)) 490 if err != nil { 491 return class, method, descriptor, err 492 } 493 494 err = cf.checkIndex(int(methodRef.NameAndTypeIndex)) 495 if err != nil { 496 return class, method, descriptor, err 497 } 498 499 nameAndType, ok := cf.ConstantPool[methodRef.NameAndTypeIndex].(*ConstantNameAndType) 500 if !ok { 501 err = errors.New("invalid constant name and type") 502 return class, method, descriptor, err 503 } 504 method, err = cf.ConstantPoolUtf8(int(nameAndType.NameIndex)) 505 if err != nil { 506 return class, method, descriptor, err 507 } 508 descriptor, err = cf.ConstantPoolUtf8(int(nameAndType.DescriptorIndex)) 509 510 return class, method, descriptor, err 511 } 512 513 // ConstantPoolClass returns the class name at the given index. 514 func (cf *ClassFile) ConstantPoolClass(idx int) (string, error) { 515 if err := cf.checkIndex(idx); err != nil { 516 return "", err 517 } 518 if cf.ConstantPool[idx].Type() != ConstantKindClass { 519 return "", errors.New("constant pool idx does not point to a class") 520 } 521 522 classInfo := cf.ConstantPool[idx].(*ConstantClass) 523 524 return cf.ConstantPoolUtf8(int(classInfo.NameIndex)) 525 } 526 527 // ConstantPoolUtf8 returns the UTF-8 string at the given index. 528 func (cf *ClassFile) ConstantPoolUtf8(idx int) (string, error) { 529 if err := cf.checkIndex(idx); err != nil { 530 return "", err 531 } 532 if cf.ConstantPool[idx].Type() != ConstantKindUtf8 { 533 return "", errors.New("constant pool idx does not point to a utf8 string") 534 } 535 536 data := cf.ConstantPool[idx].(*ConstantUtf8) 537 if !utf8.Valid(data.Bytes) { 538 return "", errors.New("invalid utf8 bytes") 539 } 540 541 return string(data.Bytes), nil 542 } 543 544 // IsStdLib returns true if the class is a standard library class. 545 func IsStdLib(class string) bool { 546 for _, prefix := range StandardLibraryPrefixes { 547 if strings.HasPrefix(class, prefix) { 548 return true 549 } 550 } 551 552 return false 553 }