github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/server/multi_process.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package server
    22  
    23  import (
    24  	"fmt"
    25  	"math"
    26  	"net"
    27  	"os"
    28  	"runtime"
    29  	"strconv"
    30  	"sync"
    31  
    32  	"github.com/m3db/m3/src/cmd/services/m3query/config"
    33  	xnet "github.com/m3db/m3/src/x/net"
    34  	"github.com/m3db/m3/src/x/panicmon"
    35  
    36  	"go.uber.org/zap"
    37  )
    38  
    39  const (
    40  	multiProcessInstanceEnvVar = "MULTIPROCESS_INSTANCE"
    41  	multiProcessParentInstance = "0"
    42  	multiProcessMetricTagID    = "multiprocess_id"
    43  	goMaxProcsEnvVar           = "GOMAXPROCS"
    44  )
    45  
    46  type multiProcessResult struct {
    47  	isParentCleanExit bool
    48  
    49  	cfg          config.Configuration
    50  	logger       *zap.Logger
    51  	listenerOpts xnet.ListenerOptions
    52  	commonLabels map[string]string
    53  }
    54  
    55  func multiProcessProcessID() string {
    56  	return os.Getenv(multiProcessInstanceEnvVar)
    57  }
    58  
    59  func multiProcessRun(
    60  	cfg config.Configuration,
    61  	logger *zap.Logger,
    62  	listenerOpts xnet.ListenerOptions,
    63  ) (multiProcessResult, error) {
    64  	multiProcessInstance := multiProcessProcessID()
    65  	if multiProcessInstance != "" {
    66  		// Otherwise is already a sub-process, make sure listener options
    67  		// will reuse ports so multiple processes can listen on the same
    68  		// listen port.
    69  		listenerOpts = xnet.NewListenerOptions(xnet.ListenerReusePort(true))
    70  
    71  		// Configure instrumentation to be correctly partitioned.
    72  		logger = logger.With(zap.String("processID", multiProcessInstance))
    73  
    74  		instance, err := strconv.Atoi(multiProcessInstance)
    75  		if err != nil {
    76  			return multiProcessResult{},
    77  				fmt.Errorf("multi-process process ID is non-integer: %v", err)
    78  		}
    79  
    80  		metrics := cfg.MetricsOrDefault()
    81  		// Listen on a different Prometheus metrics handler listen port.
    82  		if metrics.PrometheusReporter != nil && metrics.PrometheusReporter.ListenAddress != "" {
    83  			// Simply increment the listen address port by instance number
    84  			host, port, err := net.SplitHostPort(metrics.PrometheusReporter.ListenAddress)
    85  			if err != nil {
    86  				return multiProcessResult{},
    87  					fmt.Errorf("could not split host:port for metrics reporter: %v", err)
    88  			}
    89  
    90  			portValue, err := strconv.Atoi(port)
    91  			if err != nil {
    92  				return multiProcessResult{},
    93  					fmt.Errorf("prometheus metric reporter port is non-integer: %v", err)
    94  			}
    95  			if portValue > 0 {
    96  				// Increment port value by process ID if valid port.
    97  				address := net.JoinHostPort(host, strconv.Itoa(portValue+instance-1))
    98  				metrics.PrometheusReporter.ListenAddress = address
    99  				logger.Info("multi-process prometheus metrics reporter listen address configured",
   100  					zap.String("address", address))
   101  			}
   102  		}
   103  		return multiProcessResult{
   104  			cfg:          cfg,
   105  			logger:       logger,
   106  			listenerOpts: listenerOpts,
   107  			// Ensure multi-process process ID is set on all metrics.
   108  			commonLabels: map[string]string{multiProcessMetricTagID: multiProcessInstance},
   109  		}, nil
   110  	}
   111  
   112  	logger = logger.With(zap.String("processID", multiProcessParentInstance))
   113  
   114  	perCPU := defaultPerCPUMultiProcess
   115  	if v := cfg.MultiProcess.PerCPU; v > 0 {
   116  		// Allow config to override per CPU factor for determining count.
   117  		perCPU = v
   118  	}
   119  
   120  	count := int(math.Max(1, float64(runtime.NumCPU())*perCPU))
   121  	if v := cfg.MultiProcess.Count; v > 0 {
   122  		// Allow config to override per CPU auto derived count.
   123  		count = v
   124  	}
   125  
   126  	logger.Info("starting multi-process subprocesses",
   127  		zap.Int("count", count))
   128  	var (
   129  		wg       sync.WaitGroup
   130  		statuses = make([]panicmon.StatusCode, count)
   131  	)
   132  	for i := 0; i < count; i++ {
   133  		i := i
   134  		wg.Add(1)
   135  		go func() {
   136  			defer wg.Done()
   137  
   138  			newEnv := []string{
   139  				fmt.Sprintf("%s=%d", multiProcessInstanceEnvVar, i+1),
   140  			}
   141  
   142  			// Set GOMAXPROCS correctly if configured.
   143  			if v := cfg.MultiProcess.GoMaxProcs; v > 0 {
   144  				newEnv = append(newEnv,
   145  					fmt.Sprintf("%s=%d", goMaxProcsEnvVar, v))
   146  			}
   147  
   148  			newEnv = append(newEnv, os.Environ()...)
   149  
   150  			exec := panicmon.NewExecutor(panicmon.ExecutorOptions{
   151  				Env: newEnv,
   152  			})
   153  			status, err := exec.Run(os.Args)
   154  			if err != nil {
   155  				logger.Error("process failed", zap.Error(err))
   156  			}
   157  
   158  			statuses[i] = status
   159  		}()
   160  	}
   161  
   162  	wg.Wait()
   163  
   164  	exitNotOk := 0
   165  	for _, v := range statuses {
   166  		if v != 0 {
   167  			exitNotOk++
   168  		}
   169  	}
   170  
   171  	if exitNotOk > 0 {
   172  		return multiProcessResult{},
   173  			fmt.Errorf("child exit codes not ok: %v", statuses)
   174  	}
   175  
   176  	return multiProcessResult{
   177  		isParentCleanExit: true,
   178  	}, nil
   179  }