github.com/google/osv-scalibr@v0.4.1/enricher/reachability/java/javaclass.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package java
    16  
    17  import (
    18  	"encoding/binary"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"strings"
    23  	"unicode/utf8"
    24  )
    25  
    26  var (
    27  	// BinaryBaseTypes comes from https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.3
    28  	BinaryBaseTypes = []string{
    29  		"B",
    30  		"C",
    31  		"D",
    32  		"F",
    33  		"I",
    34  		"J",
    35  		"L",
    36  		"S",
    37  		"Z",
    38  	}
    39  
    40  	// StandardLibraryPrefixes defines the prefixes of standard library classes.
    41  	StandardLibraryPrefixes = []string{
    42  		"java/",
    43  		"javax/",
    44  		"jdk/",
    45  		"sun/",
    46  		"org/ietf/",
    47  		"org/omg/",
    48  		"org/w3c/",
    49  		"org/xml/",
    50  	}
    51  )
    52  
    53  // ClassFile struct represents the overall structure of a Java class file.
    54  // This only contains the fields we care about for reachability analysis.
    55  type ClassFile struct {
    56  	Magic             uint32
    57  	MinorVersion      uint16
    58  	MajorVersion      uint16
    59  	ConstantPoolCount uint16
    60  	ConstantPool      []ConstantPoolInfo
    61  	AccessFlags       uint16
    62  	ThisClass         uint16
    63  }
    64  
    65  // ConstantPoolInfo interface represents the base type for all constant pool entries.
    66  type ConstantPoolInfo interface {
    67  	Type() ConstantKind
    68  }
    69  
    70  // ConstantKind is the type of a constant pool entry.
    71  type ConstantKind uint8
    72  
    73  // ConstantKind values are defined in JAR constant pool entries.
    74  const (
    75  	ConstantKindUtf8               ConstantKind = 1
    76  	ConstantKindInteger            ConstantKind = 3
    77  	ConstantKindFloat              ConstantKind = 4
    78  	ConstantKindLong               ConstantKind = 5
    79  	ConstantKindDouble             ConstantKind = 6
    80  	ConstantKindClass              ConstantKind = 7
    81  	ConstantKindString             ConstantKind = 8
    82  	ConstantKindFieldref           ConstantKind = 9
    83  	ConstantKindMethodref          ConstantKind = 10
    84  	ConstantKindInterfaceMethodref ConstantKind = 11
    85  	ConstantKindNameAndType        ConstantKind = 12
    86  	ConstantKindMethodHandle       ConstantKind = 15
    87  	ConstantKindMethodType         ConstantKind = 16
    88  	ConstantKindDynamic            ConstantKind = 17
    89  	ConstantKindInvokeDynamic      ConstantKind = 18
    90  	ConstantKindModule             ConstantKind = 19
    91  	ConstantKindPackage            ConstantKind = 20
    92  
    93  	// ConstantKindPlaceholder is not a real Java class constant kind.
    94  	// We use this to implement long and double constants taking up two entries
    95  	// in the constant pool, as well as the constant pool being 1-indexed.
    96  	//
    97  	// From https://docs.oracle.com/javase/specs/jvms/se22/html/jvms-4.html#jvms-4.4.5
    98  	// All 8-byte constants take up two entries in the constant_pool table of
    99  	// the class file. If a CONSTANT_Long_info or CONSTANT_Double_info structure
   100  	// is the entry at index n in the constant_pool table, then the next usable
   101  	// entry in the table is located at index n+2. The constant_pool index n+1
   102  	// must be valid but is considered unusable.
   103  	ConstantKindPlaceholder ConstantKind = 255
   104  )
   105  
   106  type (
   107  	// ConstantClass represents a class constant pool entry.
   108  	ConstantClass struct {
   109  		NameIndex uint16
   110  	}
   111  	// ConstantFieldref represents a field reference constant pool entry.
   112  	ConstantFieldref struct {
   113  		ClassIndex       uint16
   114  		NameAndTypeIndex uint16
   115  	}
   116  	// ConstantMethodref represents a method reference constant pool entry.
   117  	ConstantMethodref struct {
   118  		ClassIndex       uint16
   119  		NameAndTypeIndex uint16
   120  	}
   121  	// ConstantInterfaceMethodref represents an interface method reference constant pool entry.
   122  	ConstantInterfaceMethodref struct {
   123  		ClassIndex       uint16
   124  		NameAndTypeIndex uint16
   125  	}
   126  	// ConstantString represents a string constant pool entry.
   127  	ConstantString struct {
   128  		StringIndex uint16
   129  	}
   130  	// ConstantInteger represents an integer constant pool entry.
   131  	ConstantInteger struct {
   132  		Bytes int32
   133  	}
   134  	// ConstantFloat represents a float constant pool entry.
   135  	ConstantFloat struct {
   136  		Bytes float32
   137  	}
   138  	// ConstantLong represents a long constant pool entry.
   139  	ConstantLong struct {
   140  		Bytes int64
   141  	}
   142  	// ConstantDouble represents a double constant pool entry.
   143  	ConstantDouble struct {
   144  		Bytes float64
   145  	}
   146  	// ConstantNameAndType represents a name and type constant pool entry.
   147  	ConstantNameAndType struct {
   148  		NameIndex       uint16
   149  		DescriptorIndex uint16
   150  	}
   151  	// ConstantUtf8 represents a UTF-8 string constant pool entry.
   152  	ConstantUtf8 struct {
   153  		Length uint16
   154  		Bytes  []byte
   155  	}
   156  	// ConstantMethodHandle represents a method handle constant pool entry.
   157  	ConstantMethodHandle struct {
   158  		ReferenceKind  uint8
   159  		ReferenceIndex uint16
   160  	}
   161  	// ConstantMethodType represents a method type constant pool entry.
   162  	ConstantMethodType struct {
   163  		DescriptorIndex uint16
   164  	}
   165  	// ConstantInvokeDynamic represents an invoke dynamic constant pool entry.
   166  	ConstantInvokeDynamic struct {
   167  		BootstrapMethodAttrIndex uint16
   168  		NameAndTypeIndex         uint16
   169  	}
   170  	// ConstantModule represents a module constant pool entry.
   171  	ConstantModule struct {
   172  		NameIndex uint16
   173  	}
   174  	// ConstantPackage represents a package constant pool entry.
   175  	ConstantPackage struct {
   176  		NameIndex uint16
   177  	}
   178  	// ConstantDynamic represents a dynamic constant pool entry.
   179  	ConstantDynamic struct {
   180  		BootstrapMethodAttrIndex uint16
   181  		NameAndTypeIndex         uint16
   182  	}
   183  	// ConstantPlaceholder is a placeholder constant pool entry.
   184  	ConstantPlaceholder struct{}
   185  )
   186  
   187  // Type returns the ConstantKind for ConstantClass.
   188  func (c ConstantClass) Type() ConstantKind { return ConstantKindClass }
   189  
   190  // Type returns the ConstantKind for ConstantFieldref.
   191  func (c ConstantFieldref) Type() ConstantKind { return ConstantKindFieldref }
   192  
   193  // Type returns the ConstantKind for ConstantMethodref.
   194  func (c ConstantMethodref) Type() ConstantKind { return ConstantKindMethodref }
   195  
   196  // Type returns the ConstantKind for ConstantInterfaceMethodref.
   197  func (c ConstantInterfaceMethodref) Type() ConstantKind { return ConstantKindInterfaceMethodref }
   198  
   199  // Type returns the ConstantKind for ConstantString.
   200  func (c ConstantString) Type() ConstantKind { return ConstantKindString }
   201  
   202  // Type returns the ConstantKind for ConstantInteger.
   203  func (c ConstantInteger) Type() ConstantKind { return ConstantKindInteger }
   204  
   205  // Type returns the ConstantKind for ConstantFloat.
   206  func (c ConstantFloat) Type() ConstantKind { return ConstantKindFloat }
   207  
   208  // Type returns the ConstantKind for ConstantLong.
   209  func (c ConstantLong) Type() ConstantKind { return ConstantKindLong }
   210  
   211  // Type returns the ConstantKind for ConstantDouble.
   212  func (c ConstantDouble) Type() ConstantKind { return ConstantKindDouble }
   213  
   214  // Type returns the ConstantKind for ConstantNameAndType.
   215  func (c ConstantNameAndType) Type() ConstantKind { return ConstantKindNameAndType }
   216  
   217  // Type returns the ConstantKind for ConstantUtf8.
   218  func (c ConstantUtf8) Type() ConstantKind { return ConstantKindUtf8 }
   219  
   220  // Type returns the ConstantKind for ConstantMethodHandle.
   221  func (c ConstantMethodHandle) Type() ConstantKind { return ConstantKindMethodHandle }
   222  
   223  // Type returns the ConstantKind for ConstantMethodType.
   224  func (c ConstantMethodType) Type() ConstantKind { return ConstantKindMethodType }
   225  
   226  // Type returns the ConstantKind for ConstantInvokeDynamic.
   227  func (c ConstantInvokeDynamic) Type() ConstantKind { return ConstantKindInvokeDynamic }
   228  
   229  // Type returns the ConstantKind for ConstantModule.
   230  func (c ConstantModule) Type() ConstantKind { return ConstantKindModule }
   231  
   232  // Type returns the ConstantKind for ConstantPackage.
   233  func (c ConstantPackage) Type() ConstantKind { return ConstantKindPackage }
   234  
   235  // Type returns the ConstantKind for ConstantDynamic.
   236  func (c ConstantDynamic) Type() ConstantKind { return ConstantKindDynamic }
   237  
   238  // Type returns the ConstantKind for ConstantPlaceholder.
   239  func (c ConstantPlaceholder) Type() ConstantKind { return ConstantKindPlaceholder }
   240  
   241  // ParseClass parses a Java class file from a reader.
   242  func ParseClass(r io.Reader) (*ClassFile, error) {
   243  	var cf ClassFile
   244  	err := binary.Read(r, binary.BigEndian, &cf.Magic)
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  	err = binary.Read(r, binary.BigEndian, &cf.MinorVersion)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	err = binary.Read(r, binary.BigEndian, &cf.MajorVersion)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  	err = binary.Read(r, binary.BigEndian, &cf.ConstantPoolCount)
   257  	if err != nil {
   258  		return nil, err
   259  	}
   260  
   261  	// Add a dummy constant so that entries are 1-indexed per the Java spec.
   262  	cf.ConstantPool = append(cf.ConstantPool, &ConstantPlaceholder{})
   263  
   264  	// The value of the constant_pool_count item is equal to the number of
   265  	// entries in the constant_pool table plus one.
   266  	for i := 0; i < int(cf.ConstantPoolCount-1); i++ {
   267  		var kind ConstantKind
   268  		err := binary.Read(r, binary.BigEndian, &kind)
   269  		if err != nil {
   270  			return nil, err
   271  		}
   272  
   273  		var cp ConstantPoolInfo
   274  
   275  		switch kind {
   276  		case ConstantKindUtf8:
   277  			constant := &ConstantUtf8{}
   278  			err := binary.Read(r, binary.BigEndian, &constant.Length)
   279  			if err != nil {
   280  				return nil, err
   281  			}
   282  
   283  			const maxConstantLength = 32 * 1024
   284  			if constant.Length > maxConstantLength {
   285  				return nil, fmt.Errorf("constant size too large (%d)", constant.Length)
   286  			}
   287  
   288  			constant.Bytes = make([]byte, constant.Length)
   289  			_, err = r.Read(constant.Bytes)
   290  			if err != nil {
   291  				return nil, err
   292  			}
   293  			cp = constant
   294  		case ConstantKindInteger:
   295  			constant := &ConstantInteger{}
   296  			err := binary.Read(r, binary.BigEndian, &constant.Bytes)
   297  			if err != nil {
   298  				return nil, err
   299  			}
   300  			cp = constant
   301  		case ConstantKindFloat:
   302  			constant := &ConstantFloat{}
   303  			err := binary.Read(r, binary.BigEndian, &constant.Bytes)
   304  			if err != nil {
   305  				return nil, err
   306  			}
   307  			cp = constant
   308  		case ConstantKindLong:
   309  			constant := &ConstantLong{}
   310  			err := binary.Read(r, binary.BigEndian, &constant.Bytes)
   311  			if err != nil {
   312  				return nil, err
   313  			}
   314  			cp = constant
   315  		case ConstantKindDouble:
   316  			constant := &ConstantDouble{}
   317  			err := binary.Read(r, binary.BigEndian, &constant.Bytes)
   318  			if err != nil {
   319  				return nil, err
   320  			}
   321  			cp = constant
   322  		case ConstantKindClass:
   323  			constant := &ConstantClass{}
   324  			err := binary.Read(r, binary.BigEndian, &constant.NameIndex)
   325  			if err != nil {
   326  				return nil, err
   327  			}
   328  			cp = constant
   329  		case ConstantKindString:
   330  			constant := &ConstantString{}
   331  			err := binary.Read(r, binary.BigEndian, &constant.StringIndex)
   332  			if err != nil {
   333  				return nil, err
   334  			}
   335  			cp = constant
   336  		case ConstantKindFieldref:
   337  			constant := &ConstantFieldref{}
   338  			err := binary.Read(r, binary.BigEndian, &constant.ClassIndex)
   339  			if err != nil {
   340  				return nil, err
   341  			}
   342  			err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex)
   343  			if err != nil {
   344  				return nil, err
   345  			}
   346  			cp = constant
   347  		case ConstantKindMethodref:
   348  			constant := &ConstantMethodref{}
   349  			err := binary.Read(r, binary.BigEndian, &constant.ClassIndex)
   350  			if err != nil {
   351  				return nil, err
   352  			}
   353  			err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex)
   354  			if err != nil {
   355  				return nil, err
   356  			}
   357  			cp = constant
   358  		case ConstantKindInterfaceMethodref:
   359  			constant := &ConstantInterfaceMethodref{}
   360  			err := binary.Read(r, binary.BigEndian, &constant.ClassIndex)
   361  			if err != nil {
   362  				return nil, err
   363  			}
   364  			err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex)
   365  			if err != nil {
   366  				return nil, err
   367  			}
   368  			cp = constant
   369  		case ConstantKindNameAndType:
   370  			constant := &ConstantNameAndType{}
   371  			err := binary.Read(r, binary.BigEndian, &constant.NameIndex)
   372  			if err != nil {
   373  				return nil, err
   374  			}
   375  			err = binary.Read(r, binary.BigEndian, &constant.DescriptorIndex)
   376  			if err != nil {
   377  				return nil, err
   378  			}
   379  			cp = constant
   380  		case ConstantKindMethodHandle:
   381  			constant := &ConstantMethodHandle{}
   382  			err := binary.Read(r, binary.BigEndian, &constant.ReferenceKind)
   383  			if err != nil {
   384  				return nil, err
   385  			}
   386  			err = binary.Read(r, binary.BigEndian, &constant.ReferenceIndex)
   387  			if err != nil {
   388  				return nil, err
   389  			}
   390  			cp = constant
   391  		case ConstantKindMethodType:
   392  			constant := &ConstantMethodType{}
   393  			err := binary.Read(r, binary.BigEndian, &constant.DescriptorIndex)
   394  			if err != nil {
   395  				return nil, err
   396  			}
   397  			cp = constant
   398  		case ConstantKindDynamic:
   399  			constant := &ConstantDynamic{}
   400  			err := binary.Read(r, binary.BigEndian, &constant.BootstrapMethodAttrIndex)
   401  			if err != nil {
   402  				return nil, err
   403  			}
   404  			err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex)
   405  			if err != nil {
   406  				return nil, err
   407  			}
   408  			cp = constant
   409  		case ConstantKindInvokeDynamic:
   410  			constant := &ConstantInvokeDynamic{}
   411  			err := binary.Read(r, binary.BigEndian, &constant.BootstrapMethodAttrIndex)
   412  			if err != nil {
   413  				return nil, err
   414  			}
   415  			err = binary.Read(r, binary.BigEndian, &constant.NameAndTypeIndex)
   416  			if err != nil {
   417  				return nil, err
   418  			}
   419  			cp = constant
   420  		case ConstantKindModule:
   421  			constant := &ConstantModule{}
   422  			err := binary.Read(r, binary.BigEndian, &constant.NameIndex)
   423  			if err != nil {
   424  				return nil, err
   425  			}
   426  			cp = constant
   427  		case ConstantKindPackage:
   428  			constant := &ConstantPackage{}
   429  			err := binary.Read(r, binary.BigEndian, &constant.NameIndex)
   430  			if err != nil {
   431  				return nil, err
   432  			}
   433  			cp = constant
   434  		case ConstantKindPlaceholder:
   435  			fallthrough
   436  		default:
   437  			return nil, fmt.Errorf("invalid cp_info type %d at index %d", kind, i+1)
   438  		}
   439  
   440  		cf.ConstantPool = append(cf.ConstantPool, cp)
   441  
   442  		if cp.Type() == ConstantKindDouble || cp.Type() == ConstantKindLong {
   443  			// 8-byte values take up 2 constant pool entries.
   444  			cf.ConstantPool = append(cf.ConstantPool, &ConstantPlaceholder{})
   445  			i++
   446  		}
   447  	}
   448  
   449  	err = binary.Read(r, binary.BigEndian, &cf.AccessFlags)
   450  	if err != nil {
   451  		return nil, err
   452  	}
   453  
   454  	err = binary.Read(r, binary.BigEndian, &cf.ThisClass)
   455  	if err != nil {
   456  		return nil, err
   457  	}
   458  
   459  	return &cf, nil
   460  }
   461  
   462  func (cf *ClassFile) checkIndex(idx int) error {
   463  	// From https://docs.oracle.com/javase/specs/jvms/se22/html/jvms-4.html#jvms-4.4.1
   464  	//
   465  	// A constant_pool index is considered valid if it is greater than
   466  	// zero and less than constant_pool_count, with the exception for
   467  	// constants of type long and double noted in ยง4.4.5.
   468  	if idx == 0 || idx >= len(cf.ConstantPool) {
   469  		return fmt.Errorf("invalid index %d", idx)
   470  	}
   471  
   472  	return nil
   473  }
   474  
   475  // ConstantPoolMethodref returns the class, method, and descriptor for a method reference at the
   476  // given index.
   477  func (cf *ClassFile) ConstantPoolMethodref(idx int) (class string, method string, descriptor string, err error) {
   478  	err = cf.checkIndex(idx)
   479  	if err != nil {
   480  		return class, method, descriptor, err
   481  	}
   482  
   483  	if cf.ConstantPool[idx].Type() != ConstantKindMethodref {
   484  		err = errors.New("constant pool idx does not point to a method ref")
   485  		return class, method, descriptor, err
   486  	}
   487  
   488  	methodRef := cf.ConstantPool[idx].(*ConstantMethodref)
   489  	class, err = cf.ConstantPoolClass(int(methodRef.ClassIndex))
   490  	if err != nil {
   491  		return class, method, descriptor, err
   492  	}
   493  
   494  	err = cf.checkIndex(int(methodRef.NameAndTypeIndex))
   495  	if err != nil {
   496  		return class, method, descriptor, err
   497  	}
   498  
   499  	nameAndType, ok := cf.ConstantPool[methodRef.NameAndTypeIndex].(*ConstantNameAndType)
   500  	if !ok {
   501  		err = errors.New("invalid constant name and type")
   502  		return class, method, descriptor, err
   503  	}
   504  	method, err = cf.ConstantPoolUtf8(int(nameAndType.NameIndex))
   505  	if err != nil {
   506  		return class, method, descriptor, err
   507  	}
   508  	descriptor, err = cf.ConstantPoolUtf8(int(nameAndType.DescriptorIndex))
   509  
   510  	return class, method, descriptor, err
   511  }
   512  
   513  // ConstantPoolClass returns the class name at the given index.
   514  func (cf *ClassFile) ConstantPoolClass(idx int) (string, error) {
   515  	if err := cf.checkIndex(idx); err != nil {
   516  		return "", err
   517  	}
   518  	if cf.ConstantPool[idx].Type() != ConstantKindClass {
   519  		return "", errors.New("constant pool idx does not point to a class")
   520  	}
   521  
   522  	classInfo := cf.ConstantPool[idx].(*ConstantClass)
   523  
   524  	return cf.ConstantPoolUtf8(int(classInfo.NameIndex))
   525  }
   526  
   527  // ConstantPoolUtf8 returns the UTF-8 string at the given index.
   528  func (cf *ClassFile) ConstantPoolUtf8(idx int) (string, error) {
   529  	if err := cf.checkIndex(idx); err != nil {
   530  		return "", err
   531  	}
   532  	if cf.ConstantPool[idx].Type() != ConstantKindUtf8 {
   533  		return "", errors.New("constant pool idx does not point to a utf8 string")
   534  	}
   535  
   536  	data := cf.ConstantPool[idx].(*ConstantUtf8)
   537  	if !utf8.Valid(data.Bytes) {
   538  		return "", errors.New("invalid utf8 bytes")
   539  	}
   540  
   541  	return string(data.Bytes), nil
   542  }
   543  
   544  // IsStdLib returns true if the class is a standard library class.
   545  func IsStdLib(class string) bool {
   546  	for _, prefix := range StandardLibraryPrefixes {
   547  		if strings.HasPrefix(class, prefix) {
   548  			return true
   549  		}
   550  	}
   551  
   552  	return false
   553  }