github.com/LazyboyChen7/engine@v17.12.1-ce-rc2+incompatible/daemon/logger/splunk/splunk.go (about)

     1  // Package splunk provides the log driver for forwarding server logs to
     2  // Splunk HTTP Event Collector endpoint.
     3  package splunk
     4  
     5  import (
     6  	"bytes"
     7  	"compress/gzip"
     8  	"context"
     9  	"crypto/tls"
    10  	"crypto/x509"
    11  	"encoding/json"
    12  	"fmt"
    13  	"io"
    14  	"io/ioutil"
    15  	"net/http"
    16  	"net/url"
    17  	"os"
    18  	"strconv"
    19  	"strings"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/docker/docker/daemon/logger"
    24  	"github.com/docker/docker/daemon/logger/loggerutils"
    25  	"github.com/docker/docker/pkg/urlutil"
    26  	"github.com/sirupsen/logrus"
    27  )
    28  
    29  const (
    30  	driverName                    = "splunk"
    31  	splunkURLKey                  = "splunk-url"
    32  	splunkTokenKey                = "splunk-token"
    33  	splunkSourceKey               = "splunk-source"
    34  	splunkSourceTypeKey           = "splunk-sourcetype"
    35  	splunkIndexKey                = "splunk-index"
    36  	splunkCAPathKey               = "splunk-capath"
    37  	splunkCANameKey               = "splunk-caname"
    38  	splunkInsecureSkipVerifyKey   = "splunk-insecureskipverify"
    39  	splunkFormatKey               = "splunk-format"
    40  	splunkVerifyConnectionKey     = "splunk-verify-connection"
    41  	splunkGzipCompressionKey      = "splunk-gzip"
    42  	splunkGzipCompressionLevelKey = "splunk-gzip-level"
    43  	envKey                        = "env"
    44  	envRegexKey                   = "env-regex"
    45  	labelsKey                     = "labels"
    46  	tagKey                        = "tag"
    47  )
    48  
    49  const (
    50  	// How often do we send messages (if we are not reaching batch size)
    51  	defaultPostMessagesFrequency = 5 * time.Second
    52  	// How big can be batch of messages
    53  	defaultPostMessagesBatchSize = 1000
    54  	// Maximum number of messages we can store in buffer
    55  	defaultBufferMaximum = 10 * defaultPostMessagesBatchSize
    56  	// Number of messages allowed to be queued in the channel
    57  	defaultStreamChannelSize = 4 * defaultPostMessagesBatchSize
    58  )
    59  
    60  const (
    61  	envVarPostMessagesFrequency = "SPLUNK_LOGGING_DRIVER_POST_MESSAGES_FREQUENCY"
    62  	envVarPostMessagesBatchSize = "SPLUNK_LOGGING_DRIVER_POST_MESSAGES_BATCH_SIZE"
    63  	envVarBufferMaximum         = "SPLUNK_LOGGING_DRIVER_BUFFER_MAX"
    64  	envVarStreamChannelSize     = "SPLUNK_LOGGING_DRIVER_CHANNEL_SIZE"
    65  )
    66  
    67  var batchSendTimeout = 30 * time.Second
    68  
    69  type splunkLoggerInterface interface {
    70  	logger.Logger
    71  	worker()
    72  }
    73  
    74  type splunkLogger struct {
    75  	client    *http.Client
    76  	transport *http.Transport
    77  
    78  	url         string
    79  	auth        string
    80  	nullMessage *splunkMessage
    81  
    82  	// http compression
    83  	gzipCompression      bool
    84  	gzipCompressionLevel int
    85  
    86  	// Advanced options
    87  	postMessagesFrequency time.Duration
    88  	postMessagesBatchSize int
    89  	bufferMaximum         int
    90  
    91  	// For synchronization between background worker and logger.
    92  	// We use channel to send messages to worker go routine.
    93  	// All other variables for blocking Close call before we flush all messages to HEC
    94  	stream     chan *splunkMessage
    95  	lock       sync.RWMutex
    96  	closed     bool
    97  	closedCond *sync.Cond
    98  }
    99  
   100  type splunkLoggerInline struct {
   101  	*splunkLogger
   102  
   103  	nullEvent *splunkMessageEvent
   104  }
   105  
   106  type splunkLoggerJSON struct {
   107  	*splunkLoggerInline
   108  }
   109  
   110  type splunkLoggerRaw struct {
   111  	*splunkLogger
   112  
   113  	prefix []byte
   114  }
   115  
   116  type splunkMessage struct {
   117  	Event      interface{} `json:"event"`
   118  	Time       string      `json:"time"`
   119  	Host       string      `json:"host"`
   120  	Source     string      `json:"source,omitempty"`
   121  	SourceType string      `json:"sourcetype,omitempty"`
   122  	Index      string      `json:"index,omitempty"`
   123  }
   124  
   125  type splunkMessageEvent struct {
   126  	Line   interface{}       `json:"line"`
   127  	Source string            `json:"source"`
   128  	Tag    string            `json:"tag,omitempty"`
   129  	Attrs  map[string]string `json:"attrs,omitempty"`
   130  }
   131  
   132  const (
   133  	splunkFormatRaw    = "raw"
   134  	splunkFormatJSON   = "json"
   135  	splunkFormatInline = "inline"
   136  )
   137  
   138  func init() {
   139  	if err := logger.RegisterLogDriver(driverName, New); err != nil {
   140  		logrus.Fatal(err)
   141  	}
   142  	if err := logger.RegisterLogOptValidator(driverName, ValidateLogOpt); err != nil {
   143  		logrus.Fatal(err)
   144  	}
   145  }
   146  
   147  // New creates splunk logger driver using configuration passed in context
   148  func New(info logger.Info) (logger.Logger, error) {
   149  	hostname, err := info.Hostname()
   150  	if err != nil {
   151  		return nil, fmt.Errorf("%s: cannot access hostname to set source field", driverName)
   152  	}
   153  
   154  	// Parse and validate Splunk URL
   155  	splunkURL, err := parseURL(info)
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  
   160  	// Splunk Token is required parameter
   161  	splunkToken, ok := info.Config[splunkTokenKey]
   162  	if !ok {
   163  		return nil, fmt.Errorf("%s: %s is expected", driverName, splunkTokenKey)
   164  	}
   165  
   166  	tlsConfig := &tls.Config{}
   167  
   168  	// Splunk is using autogenerated certificates by default,
   169  	// allow users to trust them with skipping verification
   170  	if insecureSkipVerifyStr, ok := info.Config[splunkInsecureSkipVerifyKey]; ok {
   171  		insecureSkipVerify, err := strconv.ParseBool(insecureSkipVerifyStr)
   172  		if err != nil {
   173  			return nil, err
   174  		}
   175  		tlsConfig.InsecureSkipVerify = insecureSkipVerify
   176  	}
   177  
   178  	// If path to the root certificate is provided - load it
   179  	if caPath, ok := info.Config[splunkCAPathKey]; ok {
   180  		caCert, err := ioutil.ReadFile(caPath)
   181  		if err != nil {
   182  			return nil, err
   183  		}
   184  		caPool := x509.NewCertPool()
   185  		caPool.AppendCertsFromPEM(caCert)
   186  		tlsConfig.RootCAs = caPool
   187  	}
   188  
   189  	if caName, ok := info.Config[splunkCANameKey]; ok {
   190  		tlsConfig.ServerName = caName
   191  	}
   192  
   193  	gzipCompression := false
   194  	if gzipCompressionStr, ok := info.Config[splunkGzipCompressionKey]; ok {
   195  		gzipCompression, err = strconv.ParseBool(gzipCompressionStr)
   196  		if err != nil {
   197  			return nil, err
   198  		}
   199  	}
   200  
   201  	gzipCompressionLevel := gzip.DefaultCompression
   202  	if gzipCompressionLevelStr, ok := info.Config[splunkGzipCompressionLevelKey]; ok {
   203  		var err error
   204  		gzipCompressionLevel64, err := strconv.ParseInt(gzipCompressionLevelStr, 10, 32)
   205  		if err != nil {
   206  			return nil, err
   207  		}
   208  		gzipCompressionLevel = int(gzipCompressionLevel64)
   209  		if gzipCompressionLevel < gzip.DefaultCompression || gzipCompressionLevel > gzip.BestCompression {
   210  			err := fmt.Errorf("not supported level '%s' for %s (supported values between %d and %d)",
   211  				gzipCompressionLevelStr, splunkGzipCompressionLevelKey, gzip.DefaultCompression, gzip.BestCompression)
   212  			return nil, err
   213  		}
   214  	}
   215  
   216  	transport := &http.Transport{
   217  		TLSClientConfig: tlsConfig,
   218  		Proxy:           http.ProxyFromEnvironment,
   219  	}
   220  	client := &http.Client{
   221  		Transport: transport,
   222  	}
   223  
   224  	source := info.Config[splunkSourceKey]
   225  	sourceType := info.Config[splunkSourceTypeKey]
   226  	index := info.Config[splunkIndexKey]
   227  
   228  	var nullMessage = &splunkMessage{
   229  		Host:       hostname,
   230  		Source:     source,
   231  		SourceType: sourceType,
   232  		Index:      index,
   233  	}
   234  
   235  	// Allow user to remove tag from the messages by setting tag to empty string
   236  	tag := ""
   237  	if tagTemplate, ok := info.Config[tagKey]; !ok || tagTemplate != "" {
   238  		tag, err = loggerutils.ParseLogTag(info, loggerutils.DefaultTemplate)
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  	}
   243  
   244  	attrs, err := info.ExtraAttributes(nil)
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  
   249  	var (
   250  		postMessagesFrequency = getAdvancedOptionDuration(envVarPostMessagesFrequency, defaultPostMessagesFrequency)
   251  		postMessagesBatchSize = getAdvancedOptionInt(envVarPostMessagesBatchSize, defaultPostMessagesBatchSize)
   252  		bufferMaximum         = getAdvancedOptionInt(envVarBufferMaximum, defaultBufferMaximum)
   253  		streamChannelSize     = getAdvancedOptionInt(envVarStreamChannelSize, defaultStreamChannelSize)
   254  	)
   255  
   256  	logger := &splunkLogger{
   257  		client:                client,
   258  		transport:             transport,
   259  		url:                   splunkURL.String(),
   260  		auth:                  "Splunk " + splunkToken,
   261  		nullMessage:           nullMessage,
   262  		gzipCompression:       gzipCompression,
   263  		gzipCompressionLevel:  gzipCompressionLevel,
   264  		stream:                make(chan *splunkMessage, streamChannelSize),
   265  		postMessagesFrequency: postMessagesFrequency,
   266  		postMessagesBatchSize: postMessagesBatchSize,
   267  		bufferMaximum:         bufferMaximum,
   268  	}
   269  
   270  	// By default we verify connection, but we allow use to skip that
   271  	verifyConnection := true
   272  	if verifyConnectionStr, ok := info.Config[splunkVerifyConnectionKey]; ok {
   273  		var err error
   274  		verifyConnection, err = strconv.ParseBool(verifyConnectionStr)
   275  		if err != nil {
   276  			return nil, err
   277  		}
   278  	}
   279  	if verifyConnection {
   280  		err = verifySplunkConnection(logger)
   281  		if err != nil {
   282  			return nil, err
   283  		}
   284  	}
   285  
   286  	var splunkFormat string
   287  	if splunkFormatParsed, ok := info.Config[splunkFormatKey]; ok {
   288  		switch splunkFormatParsed {
   289  		case splunkFormatInline:
   290  		case splunkFormatJSON:
   291  		case splunkFormatRaw:
   292  		default:
   293  			return nil, fmt.Errorf("Unknown format specified %s, supported formats are inline, json and raw", splunkFormat)
   294  		}
   295  		splunkFormat = splunkFormatParsed
   296  	} else {
   297  		splunkFormat = splunkFormatInline
   298  	}
   299  
   300  	var loggerWrapper splunkLoggerInterface
   301  
   302  	switch splunkFormat {
   303  	case splunkFormatInline:
   304  		nullEvent := &splunkMessageEvent{
   305  			Tag:   tag,
   306  			Attrs: attrs,
   307  		}
   308  
   309  		loggerWrapper = &splunkLoggerInline{logger, nullEvent}
   310  	case splunkFormatJSON:
   311  		nullEvent := &splunkMessageEvent{
   312  			Tag:   tag,
   313  			Attrs: attrs,
   314  		}
   315  
   316  		loggerWrapper = &splunkLoggerJSON{&splunkLoggerInline{logger, nullEvent}}
   317  	case splunkFormatRaw:
   318  		var prefix bytes.Buffer
   319  		if tag != "" {
   320  			prefix.WriteString(tag)
   321  			prefix.WriteString(" ")
   322  		}
   323  		for key, value := range attrs {
   324  			prefix.WriteString(key)
   325  			prefix.WriteString("=")
   326  			prefix.WriteString(value)
   327  			prefix.WriteString(" ")
   328  		}
   329  
   330  		loggerWrapper = &splunkLoggerRaw{logger, prefix.Bytes()}
   331  	default:
   332  		return nil, fmt.Errorf("Unexpected format %s", splunkFormat)
   333  	}
   334  
   335  	go loggerWrapper.worker()
   336  
   337  	return loggerWrapper, nil
   338  }
   339  
   340  func (l *splunkLoggerInline) Log(msg *logger.Message) error {
   341  	message := l.createSplunkMessage(msg)
   342  
   343  	event := *l.nullEvent
   344  	event.Line = string(msg.Line)
   345  	event.Source = msg.Source
   346  
   347  	message.Event = &event
   348  	logger.PutMessage(msg)
   349  	return l.queueMessageAsync(message)
   350  }
   351  
   352  func (l *splunkLoggerJSON) Log(msg *logger.Message) error {
   353  	message := l.createSplunkMessage(msg)
   354  	event := *l.nullEvent
   355  
   356  	var rawJSONMessage json.RawMessage
   357  	if err := json.Unmarshal(msg.Line, &rawJSONMessage); err == nil {
   358  		event.Line = &rawJSONMessage
   359  	} else {
   360  		event.Line = string(msg.Line)
   361  	}
   362  
   363  	event.Source = msg.Source
   364  
   365  	message.Event = &event
   366  	logger.PutMessage(msg)
   367  	return l.queueMessageAsync(message)
   368  }
   369  
   370  func (l *splunkLoggerRaw) Log(msg *logger.Message) error {
   371  	// empty or whitespace-only messages are not accepted by HEC
   372  	if strings.TrimSpace(string(msg.Line)) == "" {
   373  		return nil
   374  	}
   375  
   376  	message := l.createSplunkMessage(msg)
   377  
   378  	message.Event = string(append(l.prefix, msg.Line...))
   379  	logger.PutMessage(msg)
   380  	return l.queueMessageAsync(message)
   381  }
   382  
   383  func (l *splunkLogger) queueMessageAsync(message *splunkMessage) error {
   384  	l.lock.RLock()
   385  	defer l.lock.RUnlock()
   386  	if l.closedCond != nil {
   387  		return fmt.Errorf("%s: driver is closed", driverName)
   388  	}
   389  	l.stream <- message
   390  	return nil
   391  }
   392  
   393  func (l *splunkLogger) worker() {
   394  	timer := time.NewTicker(l.postMessagesFrequency)
   395  	var messages []*splunkMessage
   396  	for {
   397  		select {
   398  		case message, open := <-l.stream:
   399  			if !open {
   400  				l.postMessages(messages, true)
   401  				l.lock.Lock()
   402  				defer l.lock.Unlock()
   403  				l.transport.CloseIdleConnections()
   404  				l.closed = true
   405  				l.closedCond.Signal()
   406  				return
   407  			}
   408  			messages = append(messages, message)
   409  			// Only sending when we get exactly to the batch size,
   410  			// This also helps not to fire postMessages on every new message,
   411  			// when previous try failed.
   412  			if len(messages)%l.postMessagesBatchSize == 0 {
   413  				messages = l.postMessages(messages, false)
   414  			}
   415  		case <-timer.C:
   416  			messages = l.postMessages(messages, false)
   417  		}
   418  	}
   419  }
   420  
   421  func (l *splunkLogger) postMessages(messages []*splunkMessage, lastChance bool) []*splunkMessage {
   422  	messagesLen := len(messages)
   423  
   424  	ctx, cancel := context.WithTimeout(context.Background(), batchSendTimeout)
   425  	defer cancel()
   426  
   427  	for i := 0; i < messagesLen; i += l.postMessagesBatchSize {
   428  		upperBound := i + l.postMessagesBatchSize
   429  		if upperBound > messagesLen {
   430  			upperBound = messagesLen
   431  		}
   432  
   433  		if err := l.tryPostMessages(ctx, messages[i:upperBound]); err != nil {
   434  			logrus.WithError(err).WithField("module", "logger/splunk").Warn("Error while sending logs")
   435  			if messagesLen-i >= l.bufferMaximum || lastChance {
   436  				// If this is last chance - print them all to the daemon log
   437  				if lastChance {
   438  					upperBound = messagesLen
   439  				}
   440  				// Not all sent, but buffer has got to its maximum, let's log all messages
   441  				// we could not send and return buffer minus one batch size
   442  				for j := i; j < upperBound; j++ {
   443  					if jsonEvent, err := json.Marshal(messages[j]); err != nil {
   444  						logrus.Error(err)
   445  					} else {
   446  						logrus.Error(fmt.Errorf("Failed to send a message '%s'", string(jsonEvent)))
   447  					}
   448  				}
   449  				return messages[upperBound:messagesLen]
   450  			}
   451  			// Not all sent, returning buffer from where we have not sent messages
   452  			return messages[i:messagesLen]
   453  		}
   454  	}
   455  	// All sent, return empty buffer
   456  	return messages[:0]
   457  }
   458  
   459  func (l *splunkLogger) tryPostMessages(ctx context.Context, messages []*splunkMessage) error {
   460  	if len(messages) == 0 {
   461  		return nil
   462  	}
   463  	var buffer bytes.Buffer
   464  	var writer io.Writer
   465  	var gzipWriter *gzip.Writer
   466  	var err error
   467  	// If gzip compression is enabled - create gzip writer with specified compression
   468  	// level. If gzip compression is disabled, use standard buffer as a writer
   469  	if l.gzipCompression {
   470  		gzipWriter, err = gzip.NewWriterLevel(&buffer, l.gzipCompressionLevel)
   471  		if err != nil {
   472  			return err
   473  		}
   474  		writer = gzipWriter
   475  	} else {
   476  		writer = &buffer
   477  	}
   478  	for _, message := range messages {
   479  		jsonEvent, err := json.Marshal(message)
   480  		if err != nil {
   481  			return err
   482  		}
   483  		if _, err := writer.Write(jsonEvent); err != nil {
   484  			return err
   485  		}
   486  	}
   487  	// If gzip compression is enabled, tell it, that we are done
   488  	if l.gzipCompression {
   489  		err = gzipWriter.Close()
   490  		if err != nil {
   491  			return err
   492  		}
   493  	}
   494  	req, err := http.NewRequest("POST", l.url, bytes.NewBuffer(buffer.Bytes()))
   495  	if err != nil {
   496  		return err
   497  	}
   498  	req = req.WithContext(ctx)
   499  	req.Header.Set("Authorization", l.auth)
   500  	// Tell if we are sending gzip compressed body
   501  	if l.gzipCompression {
   502  		req.Header.Set("Content-Encoding", "gzip")
   503  	}
   504  	res, err := l.client.Do(req)
   505  	if err != nil {
   506  		return err
   507  	}
   508  	defer res.Body.Close()
   509  	if res.StatusCode != http.StatusOK {
   510  		var body []byte
   511  		body, err = ioutil.ReadAll(res.Body)
   512  		if err != nil {
   513  			return err
   514  		}
   515  		return fmt.Errorf("%s: failed to send event - %s - %s", driverName, res.Status, body)
   516  	}
   517  	io.Copy(ioutil.Discard, res.Body)
   518  	return nil
   519  }
   520  
   521  func (l *splunkLogger) Close() error {
   522  	l.lock.Lock()
   523  	defer l.lock.Unlock()
   524  	if l.closedCond == nil {
   525  		l.closedCond = sync.NewCond(&l.lock)
   526  		close(l.stream)
   527  		for !l.closed {
   528  			l.closedCond.Wait()
   529  		}
   530  	}
   531  	return nil
   532  }
   533  
   534  func (l *splunkLogger) Name() string {
   535  	return driverName
   536  }
   537  
   538  func (l *splunkLogger) createSplunkMessage(msg *logger.Message) *splunkMessage {
   539  	message := *l.nullMessage
   540  	message.Time = fmt.Sprintf("%f", float64(msg.Timestamp.UnixNano())/float64(time.Second))
   541  	return &message
   542  }
   543  
   544  // ValidateLogOpt looks for all supported by splunk driver options
   545  func ValidateLogOpt(cfg map[string]string) error {
   546  	for key := range cfg {
   547  		switch key {
   548  		case splunkURLKey:
   549  		case splunkTokenKey:
   550  		case splunkSourceKey:
   551  		case splunkSourceTypeKey:
   552  		case splunkIndexKey:
   553  		case splunkCAPathKey:
   554  		case splunkCANameKey:
   555  		case splunkInsecureSkipVerifyKey:
   556  		case splunkFormatKey:
   557  		case splunkVerifyConnectionKey:
   558  		case splunkGzipCompressionKey:
   559  		case splunkGzipCompressionLevelKey:
   560  		case envKey:
   561  		case envRegexKey:
   562  		case labelsKey:
   563  		case tagKey:
   564  		default:
   565  			return fmt.Errorf("unknown log opt '%s' for %s log driver", key, driverName)
   566  		}
   567  	}
   568  	return nil
   569  }
   570  
   571  func parseURL(info logger.Info) (*url.URL, error) {
   572  	splunkURLStr, ok := info.Config[splunkURLKey]
   573  	if !ok {
   574  		return nil, fmt.Errorf("%s: %s is expected", driverName, splunkURLKey)
   575  	}
   576  
   577  	splunkURL, err := url.Parse(splunkURLStr)
   578  	if err != nil {
   579  		return nil, fmt.Errorf("%s: failed to parse %s as url value in %s", driverName, splunkURLStr, splunkURLKey)
   580  	}
   581  
   582  	if !urlutil.IsURL(splunkURLStr) ||
   583  		!splunkURL.IsAbs() ||
   584  		(splunkURL.Path != "" && splunkURL.Path != "/") ||
   585  		splunkURL.RawQuery != "" ||
   586  		splunkURL.Fragment != "" {
   587  		return nil, fmt.Errorf("%s: expected format scheme://dns_name_or_ip:port for %s", driverName, splunkURLKey)
   588  	}
   589  
   590  	splunkURL.Path = "/services/collector/event/1.0"
   591  
   592  	return splunkURL, nil
   593  }
   594  
   595  func verifySplunkConnection(l *splunkLogger) error {
   596  	req, err := http.NewRequest(http.MethodOptions, l.url, nil)
   597  	if err != nil {
   598  		return err
   599  	}
   600  	res, err := l.client.Do(req)
   601  	if err != nil {
   602  		return err
   603  	}
   604  	if res.Body != nil {
   605  		defer res.Body.Close()
   606  	}
   607  	if res.StatusCode != http.StatusOK {
   608  		var body []byte
   609  		body, err = ioutil.ReadAll(res.Body)
   610  		if err != nil {
   611  			return err
   612  		}
   613  		return fmt.Errorf("%s: failed to verify connection - %s - %s", driverName, res.Status, body)
   614  	}
   615  	return nil
   616  }
   617  
   618  func getAdvancedOptionDuration(envName string, defaultValue time.Duration) time.Duration {
   619  	valueStr := os.Getenv(envName)
   620  	if valueStr == "" {
   621  		return defaultValue
   622  	}
   623  	parsedValue, err := time.ParseDuration(valueStr)
   624  	if err != nil {
   625  		logrus.Error(fmt.Sprintf("Failed to parse value of %s as duration. Using default %v. %v", envName, defaultValue, err))
   626  		return defaultValue
   627  	}
   628  	return parsedValue
   629  }
   630  
   631  func getAdvancedOptionInt(envName string, defaultValue int) int {
   632  	valueStr := os.Getenv(envName)
   633  	if valueStr == "" {
   634  		return defaultValue
   635  	}
   636  	parsedValue, err := strconv.ParseInt(valueStr, 10, 32)
   637  	if err != nil {
   638  		logrus.Error(fmt.Sprintf("Failed to parse value of %s as integer. Using default %d. %v", envName, defaultValue, err))
   639  		return defaultValue
   640  	}
   641  	return int(parsedValue)
   642  }