github.com/apache/beam/sdks/v2@v2.48.2/java/container/boot.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  // boot is the boot code for the Java SDK harness container. It is responsible
    17  // for retrieving staged files and invoking the JVM correctly.
    18  package main
    19  
    20  import (
    21  	"context"
    22  	"encoding/json"
    23  	"flag"
    24  	"fmt"
    25  	"io/ioutil"
    26  	"log"
    27  	"os"
    28  	"path/filepath"
    29  	"sort"
    30  	"strconv"
    31  	"strings"
    32  
    33  	"github.com/apache/beam/sdks/v2/go/container/tools"
    34  	"github.com/apache/beam/sdks/v2/go/pkg/beam/artifact"
    35  	fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1"
    36  	pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1"
    37  	"github.com/apache/beam/sdks/v2/go/pkg/beam/util/execx"
    38  	"github.com/apache/beam/sdks/v2/go/pkg/beam/util/grpcx"
    39  	"github.com/apache/beam/sdks/v2/go/pkg/beam/util/syscallx"
    40  	"github.com/golang/protobuf/proto"
    41  )
    42  
    43  var (
    44  	// Contract: https://s.apache.org/beam-fn-api-container-contract.
    45  
    46  	id                = flag.String("id", "", "Local identifier (required).")
    47  	loggingEndpoint   = flag.String("logging_endpoint", "", "Logging endpoint (required).")
    48  	artifactEndpoint  = flag.String("artifact_endpoint", "", "Artifact endpoint (required).")
    49  	provisionEndpoint = flag.String("provision_endpoint", "", "Provision endpoint (required).")
    50  	controlEndpoint   = flag.String("control_endpoint", "", "Control endpoint (required).")
    51  	semiPersistDir    = flag.String("semi_persist_dir", "/tmp", "Local semi-persistent directory (optional).")
    52  )
    53  
    54  const (
    55  	disableJammAgentOption              = "disable_jamm_agent"
    56  	enableGoogleCloudProfilerOption     = "enable_google_cloud_profiler"
    57  	enableGoogleCloudHeapSamplingOption = "enable_google_cloud_heap_sampling"
    58  	googleCloudProfilerAgentBaseArgs    = "-agentpath:/opt/google_cloud_profiler/profiler_java_agent.so=-logtostderr,-cprof_service=%s,-cprof_service_version=%s"
    59  	googleCloudProfilerAgentHeapArgs    = googleCloudProfilerAgentBaseArgs + ",-cprof_enable_heap_sampling,-cprof_heap_sampling_interval=2097152"
    60  	jammAgentArgs                       = "-javaagent:/opt/apache/beam/jars/jamm.jar"
    61  )
    62  
    63  func main() {
    64  	flag.Parse()
    65  	if *id == "" {
    66  		log.Fatal("No id provided.")
    67  	}
    68  	if *provisionEndpoint == "" {
    69  		log.Fatal("No provision endpoint provided.")
    70  	}
    71  
    72  	ctx := grpcx.WriteWorkerID(context.Background(), *id)
    73  
    74  	info, err := tools.ProvisionInfo(ctx, *provisionEndpoint)
    75  	if err != nil {
    76  		log.Fatalf("Failed to obtain provisioning information: %v", err)
    77  	}
    78  	log.Printf("Provision info:\n%v", info)
    79  
    80  	// TODO(BEAM-8201): Simplify once flags are no longer used.
    81  	if info.GetLoggingEndpoint().GetUrl() != "" {
    82  		*loggingEndpoint = info.GetLoggingEndpoint().GetUrl()
    83  	}
    84  	if info.GetArtifactEndpoint().GetUrl() != "" {
    85  		*artifactEndpoint = info.GetArtifactEndpoint().GetUrl()
    86  	}
    87  	if info.GetControlEndpoint().GetUrl() != "" {
    88  		*controlEndpoint = info.GetControlEndpoint().GetUrl()
    89  	}
    90  
    91  	if *loggingEndpoint == "" {
    92  		log.Fatal("No logging endpoint provided.")
    93  	}
    94  	if *artifactEndpoint == "" {
    95  		log.Fatal("No artifact endpoint provided.")
    96  	}
    97  	if *controlEndpoint == "" {
    98  		log.Fatal("No control endpoint provided.")
    99  	}
   100  	logger := &tools.Logger{Endpoint: *loggingEndpoint}
   101  
   102  	logger.Printf(ctx, "Initializing java harness: %v", strings.Join(os.Args, " "))
   103  
   104  	// (1) Obtain the pipeline options
   105  	options, err := tools.ProtoToJSON(info.GetPipelineOptions())
   106  	if err != nil {
   107  		logger.Fatalf(ctx, "Failed to convert pipeline options: %v", err)
   108  	}
   109  
   110  	// (2) Retrieve the staged user jars. We ignore any disk limit,
   111  	// because the staged jars are mandatory.
   112  
   113  	// Using the SDK Harness ID in the artifact destination path to make sure that dependencies used by multiple
   114  	// SDK Harnesses in the same VM do not conflict. This is needed since some runners (for example, Dataflow)
   115  	// may share the artifact staging directory across multiple SDK Harnesses
   116  	// TODO(https://github.com/apache/beam/issues/20009): consider removing the SDK Harness ID from the staging path after Dataflow can properly
   117  	// seperate out dependencies per environment.
   118  	dir := filepath.Join(*semiPersistDir, *id, "staged")
   119  
   120  	artifacts, err := artifact.Materialize(ctx, *artifactEndpoint, info.GetDependencies(), info.GetRetrievalToken(), dir)
   121  	if err != nil {
   122  		logger.Fatalf(ctx, "Failed to retrieve staged files: %v", err)
   123  	}
   124  
   125  	// (3) Invoke the Java harness, preserving artifact ordering in classpath.
   126  
   127  	os.Setenv("HARNESS_ID", *id)
   128  	os.Setenv("PIPELINE_OPTIONS", options)
   129  	os.Setenv("LOGGING_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pipepb.ApiServiceDescriptor{Url: *loggingEndpoint}))
   130  	os.Setenv("CONTROL_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(&pipepb.ApiServiceDescriptor{Url: *controlEndpoint}))
   131  	os.Setenv("RUNNER_CAPABILITIES", strings.Join(info.GetRunnerCapabilities(), " "))
   132  
   133  	if info.GetStatusEndpoint() != nil {
   134  		os.Setenv("STATUS_API_SERVICE_DESCRIPTOR", proto.MarshalTextString(info.GetStatusEndpoint()))
   135  	}
   136  
   137  	const jarsDir = "/opt/apache/beam/jars"
   138  	cp := []string{
   139  		filepath.Join(jarsDir, "slf4j-api.jar"),
   140  		filepath.Join(jarsDir, "slf4j-jdk14.jar"),
   141  		filepath.Join(jarsDir, "jcl-over-slf4j.jar"),
   142  		filepath.Join(jarsDir, "log4j-over-slf4j.jar"),
   143  		filepath.Join(jarsDir, "log4j-to-slf4j.jar"),
   144  		filepath.Join(jarsDir, "beam-sdks-java-harness.jar"),
   145  	}
   146  
   147  	var hasWorkerExperiment = strings.Contains(options, "use_staged_dataflow_worker_jar")
   148  	for _, a := range artifacts {
   149  		name, _ := artifact.MustExtractFilePayload(a)
   150  		if hasWorkerExperiment {
   151  			if strings.HasPrefix(name, "beam-runners-google-cloud-dataflow-java-fn-api-worker") {
   152  				continue
   153  			}
   154  			if name == "dataflow-worker.jar" {
   155  				continue
   156  			}
   157  		}
   158  		cp = append(cp, filepath.Join(dir, filepath.FromSlash(name)))
   159  	}
   160  
   161  	args := []string{
   162  		"-Xmx" + strconv.FormatUint(heapSizeLimit(info), 10),
   163  		// ParallelGC the most adequate for high throughput and lower CPU utilization
   164  		// It is the default GC in Java 8, but not on newer versions
   165  		"-XX:+UseParallelGC",
   166  		"-XX:+AlwaysActAsServerClassMachine",
   167  		"-XX:-OmitStackTraceInFastThrow",
   168  	}
   169  
   170  	enableGoogleCloudProfiler := strings.Contains(options, enableGoogleCloudProfilerOption)
   171  	enableGoogleCloudHeapSampling := strings.Contains(options, enableGoogleCloudHeapSamplingOption)
   172  	if enableGoogleCloudProfiler {
   173  		if metadata := info.GetMetadata(); metadata != nil {
   174  			if jobName, nameExists := metadata["job_name"]; nameExists {
   175  				if jobId, idExists := metadata["job_id"]; idExists {
   176  					if enableGoogleCloudHeapSampling {
   177  						args = append(args, fmt.Sprintf(googleCloudProfilerAgentHeapArgs, jobName, jobId))
   178  					} else {
   179  						args = append(args, fmt.Sprintf(googleCloudProfilerAgentBaseArgs, jobName, jobId))
   180  					}
   181  					logger.Printf(ctx, "Turning on Cloud Profiling. Profile heap: %t", enableGoogleCloudHeapSampling)
   182  				} else {
   183  					logger.Printf(ctx, "Required job_id missing from metadata, profiling will not be enabled without it.")
   184  				}
   185  			} else {
   186  				logger.Printf(ctx, "Required job_name missing from metadata, profiling will not be enabled without it.")
   187  			}
   188  		} else {
   189  			logger.Printf(ctx, "enable_google_cloud_profiler is set to true, but no metadata is received from provision server, profiling will not be enabled.")
   190  		}
   191  	}
   192  
   193  	disableJammAgent := strings.Contains(options, disableJammAgentOption)
   194  	if disableJammAgent {
   195  		logger.Printf(ctx, "Disabling Jamm agent. Measuring object size will be inaccurate.")
   196  	} else {
   197  		args = append(args, jammAgentArgs)
   198  	}
   199  	// Apply meta options
   200  	const metaDir = "/opt/apache/beam/options"
   201  
   202  	// Note: Error is unchecked, so parsing errors won't abort container.
   203  	// TODO: verify if it's intentional or not.
   204  	metaOptions, _ := LoadMetaOptions(ctx, logger, metaDir)
   205  
   206  	javaOptions := BuildOptions(ctx, logger, metaOptions)
   207  	// (1) Add custom jvm arguments: "-server -Xmx1324 -XXfoo .."
   208  	args = append(args, javaOptions.JavaArguments...)
   209  
   210  	// (2) Add classpath: "-cp foo.jar:bar.jar:.."
   211  	if len(javaOptions.Classpath) > 0 {
   212  		cp = append(cp, javaOptions.Classpath...)
   213  	}
   214  	pathingjar, err := makePathingJar(cp)
   215  	if err != nil {
   216  		logger.Fatalf(ctx, "makePathingJar failed: %v", err)
   217  	}
   218  	args = append(args, "-cp")
   219  	args = append(args, pathingjar)
   220  
   221  	// (3) Add (sorted) properties: "-Dbar=baz -Dfoo=bar .."
   222  	var properties []string
   223  	for key, value := range javaOptions.Properties {
   224  		properties = append(properties, fmt.Sprintf("-D%s=%s", key, value))
   225  	}
   226  	sort.Strings(properties)
   227  	args = append(args, properties...)
   228  
   229  	// Open modules specified in pipeline options
   230  	if pipelineOptions, ok := info.GetPipelineOptions().GetFields()["options"]; ok {
   231  		if modules, ok := pipelineOptions.GetStructValue().GetFields()["jdkAddOpenModules"]; ok {
   232  			for _, module := range modules.GetListValue().GetValues() {
   233  				args = append(args, "--add-opens="+module.GetStringValue())
   234  			}
   235  		}
   236  	}
   237  	// Automatically open modules for Java 11+
   238  	openModuleAgentJar := "/opt/apache/beam/jars/open-module-agent.jar"
   239  	if _, err := os.Stat(openModuleAgentJar); err == nil {
   240  		args = append(args, "-javaagent:"+openModuleAgentJar)
   241  	}
   242  	args = append(args, "org.apache.beam.fn.harness.FnHarness")
   243  	logger.Printf(ctx, "Executing: java %v", strings.Join(args, " "))
   244  
   245  	logger.Fatalf(ctx, "Java exited: %v", execx.Execute("java", args...))
   246  }
   247  
   248  // heapSizeLimit returns 80% of the runner limit, if provided. If not provided,
   249  // it returns 70% of the physical memory on the machine. If it cannot determine
   250  // that value, it returns 1GB. This is an imperfect heuristic. It aims to
   251  // ensure there is memory for non-heap use and other overhead, while also not
   252  // underutilizing the machine.
   253  func heapSizeLimit(info *fnpb.ProvisionInfo) uint64 {
   254  	if size, err := syscallx.PhysicalMemorySize(); err == nil {
   255  		return (size * 70) / 100
   256  	}
   257  	return 1 << 30
   258  }
   259  
   260  // Options represents java VM invocation options in a simple,
   261  // semi-structured way.
   262  type Options struct {
   263  	JavaArguments []string          `json:"java_arguments,omitempty"`
   264  	Properties    map[string]string `json:"properties,omitempty"`
   265  	Classpath     []string          `json:"classpath,omitempty"`
   266  }
   267  
   268  // MetaOption represents a jvm environment transformation or setup
   269  // that the launcher employs. The aim is to keep the service-side and
   270  // user-side required configuration simple and minimal, yet allow
   271  // numerous execution tweaks. Most tweaks are enabled by default and
   272  // require no input. Some setups, such as Cloud Debugging, are opt-in.
   273  //
   274  // Meta-options are usually included with the image and use supporting
   275  // files, usually jars. A few are intrinsic because they are require
   276  // additional input or complex computations, such as Cloud Debugging
   277  // and Cloud Profiling. Meta-options can be enabled or disabled by
   278  // name. For the most part, the meta-option names are not guaranteed
   279  // to be backwards compatible or stable. They are rather knobs that
   280  // can be tuned if some well-intended transformation cause trouble for
   281  // a customer. For tweaks, the expectation is that the default is
   282  // almost always correct.
   283  //
   284  // Meta-options are simple additive manipulations applied in priority
   285  // order (applied low to high) to allow jvm customization by adding
   286  // files, notably enabling customization by later docker layers. The
   287  // override semantics is prepend for lists and simple overwrite
   288  // otherwise. A common use case is adding a jar to the beginning of
   289  // the classpath, such as the shuffle or windmill jni jar, or adding
   290  // an agent.
   291  type MetaOption struct {
   292  	Name        string  `json:"name,omitempty"`
   293  	Description string  `json:"description,omitempty"`
   294  	Enabled     bool    `json:"enabled,omitempty"`
   295  	Priority    int     `json:"priority,omitempty"`
   296  	Options     Options `json:"options"`
   297  }
   298  
   299  // byPriority sorts MetaOptions by priority, highest first.
   300  type byPriority []*MetaOption
   301  
   302  func (f byPriority) Len() int           { return len(f) }
   303  func (f byPriority) Swap(i, j int)      { f[i], f[j] = f[j], f[i] }
   304  func (f byPriority) Less(i, j int) bool { return f[i].Priority > f[j].Priority }
   305  
   306  // LoadMetaOptions scans the directory tree for meta-option metadata
   307  // files and loads them. Any regular file named "option-XX.json" is
   308  // strictly assumed to be a meta-option file. This strictness allows
   309  // us to fail hard if such a file cannot be parsed.
   310  //
   311  // Loading meta-options from disk allows extra files and their
   312  // configuration be kept together and defined externally.
   313  func LoadMetaOptions(ctx context.Context, logger *tools.Logger, dir string) ([]*MetaOption, error) {
   314  	var meta []*MetaOption
   315  
   316  	worker := func(path string, info os.FileInfo, err error) error {
   317  		if err != nil {
   318  			return err
   319  		}
   320  		if !info.Mode().IsRegular() {
   321  			return nil
   322  		}
   323  		if !strings.HasPrefix(info.Name(), "option-") {
   324  			return nil
   325  		}
   326  		if !strings.HasSuffix(info.Name(), ".json") {
   327  			return nil
   328  		}
   329  
   330  		content, err := ioutil.ReadFile(path)
   331  		if err != nil {
   332  			return err
   333  		}
   334  
   335  		var option MetaOption
   336  		if err := json.Unmarshal(content, &option); err != nil {
   337  			return fmt.Errorf("failed to parse %s: %v", path, err)
   338  		}
   339  
   340  		logger.Printf(ctx, "Loaded meta-option '%s'", option.Name)
   341  
   342  		meta = append(meta, &option)
   343  		return nil
   344  	}
   345  
   346  	if err := filepath.Walk(dir, worker); err != nil {
   347  		return nil, err
   348  	}
   349  	return meta, nil
   350  }
   351  
   352  func BuildOptions(ctx context.Context, logger *tools.Logger, metaOptions []*MetaOption) *Options {
   353  	options := &Options{Properties: make(map[string]string)}
   354  
   355  	sort.Sort(byPriority(metaOptions))
   356  	for _, meta := range metaOptions {
   357  		if !meta.Enabled {
   358  			continue
   359  		}
   360  
   361  		// Rightmost takes precedence
   362  		options.JavaArguments = append(meta.Options.JavaArguments, options.JavaArguments...)
   363  
   364  		for key, value := range meta.Options.Properties {
   365  			_, exists := options.Properties[key]
   366  			if !exists {
   367  				options.Properties[key] = value
   368  			} else {
   369  				logger.Warnf(ctx, "Warning: %s property -D%s=%s was redefined", meta.Name, key, value)
   370  			}
   371  		}
   372  
   373  		options.Classpath = append(options.Classpath, meta.Options.Classpath...)
   374  	}
   375  	return options
   376  }