github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/logger/target/http/http.go (about)

     1  // Copyright (c) 2015-2024 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package http
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"net/http"
    26  	"net/url"
    27  	"os"
    28  	"path/filepath"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	jsoniter "github.com/json-iterator/go"
    34  	xhttp "github.com/minio/minio/internal/http"
    35  	xioutil "github.com/minio/minio/internal/ioutil"
    36  	"github.com/minio/minio/internal/logger/target/types"
    37  	"github.com/minio/minio/internal/once"
    38  	"github.com/minio/minio/internal/store"
    39  	xnet "github.com/minio/pkg/v2/net"
    40  	"github.com/valyala/bytebufferpool"
    41  )
    42  
    43  const (
    44  	// Timeout for the webhook http call
    45  	webhookCallTimeout = 3 * time.Second
    46  
    47  	// maxWorkers is the maximum number of concurrent http loggers
    48  	maxWorkers = 16
    49  
    50  	// maxWorkers is the maximum number of concurrent batch http loggers
    51  	maxWorkersWithBatchEvents = 4
    52  
    53  	// the suffix for the configured queue dir where the logs will be persisted.
    54  	httpLoggerExtension = ".http.log"
    55  )
    56  
    57  const (
    58  	statusOffline = iota
    59  	statusOnline
    60  	statusClosed
    61  )
    62  
    63  var (
    64  	logChBuffers = make(map[string]chan interface{})
    65  	logChLock    = sync.Mutex{}
    66  )
    67  
    68  // Config http logger target
    69  type Config struct {
    70  	Enabled    bool              `json:"enabled"`
    71  	Name       string            `json:"name"`
    72  	UserAgent  string            `json:"userAgent"`
    73  	Endpoint   *xnet.URL         `json:"endpoint"`
    74  	AuthToken  string            `json:"authToken"`
    75  	ClientCert string            `json:"clientCert"`
    76  	ClientKey  string            `json:"clientKey"`
    77  	BatchSize  int               `json:"batchSize"`
    78  	QueueSize  int               `json:"queueSize"`
    79  	QueueDir   string            `json:"queueDir"`
    80  	Proxy      string            `json:"string"`
    81  	Transport  http.RoundTripper `json:"-"`
    82  
    83  	// Custom logger
    84  	LogOnceIf func(ctx context.Context, err error, id string, errKind ...interface{}) `json:"-"`
    85  }
    86  
    87  // Target implements logger.Target and sends the json
    88  // format of a log entry to the configured http endpoint.
    89  // An internal buffer of logs is maintained but when the
    90  // buffer is full, new logs are just ignored and an error
    91  // is returned to the caller.
    92  type Target struct {
    93  	totalMessages  int64
    94  	failedMessages int64
    95  	status         int32
    96  
    97  	// Worker control
    98  	workers    int64
    99  	maxWorkers int64
   100  	// workerStartMu sync.Mutex
   101  	lastStarted time.Time
   102  
   103  	wg sync.WaitGroup
   104  
   105  	// Channel of log entries.
   106  	// Reading logCh must hold read lock on logChMu (to avoid read race)
   107  	// Sending a value on logCh must hold read lock on logChMu (to avoid closing)
   108  	logCh   chan interface{}
   109  	logChMu sync.RWMutex
   110  
   111  	// If this webhook is being re-configured we will
   112  	// assign the new webhook target to this field.
   113  	// The Send() method will then re-direct entries
   114  	// to the new target when the current one
   115  	// has been set to status "statusClosed".
   116  	// Once the glogal target slice has been migrated
   117  	// the current target will stop receiving entries.
   118  	migrateTarget *Target
   119  
   120  	// Number of events per HTTP send to webhook target
   121  	// this is ideally useful only if your endpoint can
   122  	// support reading multiple events on a stream for example
   123  	// like : Splunk HTTP Event collector, if you are unsure
   124  	// set this to '1'.
   125  	batchSize   int
   126  	payloadType string
   127  
   128  	// store to persist and replay the logs to the target
   129  	// to avoid missing events when the target is down.
   130  	store          store.Store[interface{}]
   131  	storeCtxCancel context.CancelFunc
   132  
   133  	initQueueOnce once.Init
   134  
   135  	config Config
   136  	client *http.Client
   137  }
   138  
   139  // Name returns the name of the target
   140  func (h *Target) Name() string {
   141  	return "minio-http-" + h.config.Name
   142  }
   143  
   144  // Type - returns type of the target
   145  func (h *Target) Type() types.TargetType {
   146  	return types.TargetHTTP
   147  }
   148  
   149  // Endpoint returns the backend endpoint
   150  func (h *Target) Endpoint() string {
   151  	return h.config.Endpoint.String()
   152  }
   153  
   154  func (h *Target) String() string {
   155  	return h.config.Name
   156  }
   157  
   158  // IsOnline returns true if the target is reachable using a cached value
   159  func (h *Target) IsOnline(ctx context.Context) bool {
   160  	return atomic.LoadInt32(&h.status) == statusOnline
   161  }
   162  
   163  // Stats returns the target statistics.
   164  func (h *Target) Stats() types.TargetStats {
   165  	h.logChMu.RLock()
   166  	queueLength := len(h.logCh)
   167  	h.logChMu.RUnlock()
   168  	stats := types.TargetStats{
   169  		TotalMessages:  atomic.LoadInt64(&h.totalMessages),
   170  		FailedMessages: atomic.LoadInt64(&h.failedMessages),
   171  		QueueLength:    queueLength,
   172  	}
   173  
   174  	return stats
   175  }
   176  
   177  // AssignMigrateTarget assigns a target
   178  // which will eventually replace the current target.
   179  func (h *Target) AssignMigrateTarget(migrateTgt *Target) {
   180  	h.migrateTarget = migrateTgt
   181  }
   182  
   183  // Init validate and initialize the http target
   184  func (h *Target) Init(ctx context.Context) (err error) {
   185  	if h.config.QueueDir != "" {
   186  		return h.initQueueOnce.DoWithContext(ctx, h.initDiskStore)
   187  	}
   188  	return h.initQueueOnce.DoWithContext(ctx, h.initMemoryStore)
   189  }
   190  
   191  func (h *Target) initDiskStore(ctx context.Context) (err error) {
   192  	ctx, cancel := context.WithCancel(ctx)
   193  	h.storeCtxCancel = cancel
   194  	h.lastStarted = time.Now()
   195  	go h.startQueueProcessor(ctx, true)
   196  	store.StreamItems(h.store, h, ctx.Done(), h.config.LogOnceIf)
   197  	return nil
   198  }
   199  
   200  func (h *Target) initMemoryStore(ctx context.Context) (err error) {
   201  	ctx, cancel := context.WithCancel(ctx)
   202  	h.storeCtxCancel = cancel
   203  	h.lastStarted = time.Now()
   204  	go h.startQueueProcessor(ctx, true)
   205  	return nil
   206  }
   207  
   208  func (h *Target) send(ctx context.Context, payload []byte, payloadType string, timeout time.Duration) (err error) {
   209  	defer func() {
   210  		if err != nil {
   211  			atomic.StoreInt32(&h.status, statusOffline)
   212  		} else {
   213  			atomic.StoreInt32(&h.status, statusOnline)
   214  		}
   215  	}()
   216  
   217  	ctx, cancel := context.WithTimeout(ctx, timeout)
   218  	defer cancel()
   219  	req, err := http.NewRequestWithContext(ctx, http.MethodPost,
   220  		h.Endpoint(), bytes.NewReader(payload))
   221  	if err != nil {
   222  		return fmt.Errorf("invalid configuration for '%s'; %v", h.Endpoint(), err)
   223  	}
   224  	if payloadType != "" {
   225  		req.Header.Set(xhttp.ContentType, payloadType)
   226  	}
   227  	req.Header.Set(xhttp.MinIOVersion, xhttp.GlobalMinIOVersion)
   228  	req.Header.Set(xhttp.MinioDeploymentID, xhttp.GlobalDeploymentID)
   229  
   230  	// Set user-agent to indicate MinIO release
   231  	// version to the configured log endpoint
   232  	req.Header.Set("User-Agent", h.config.UserAgent)
   233  
   234  	if h.config.AuthToken != "" {
   235  		req.Header.Set("Authorization", h.config.AuthToken)
   236  	}
   237  
   238  	resp, err := h.client.Do(req)
   239  	if err != nil {
   240  		return fmt.Errorf("%s returned '%w', please check your endpoint configuration", h.Endpoint(), err)
   241  	}
   242  
   243  	// Drain any response.
   244  	xhttp.DrainBody(resp.Body)
   245  
   246  	switch resp.StatusCode {
   247  	case http.StatusOK, http.StatusCreated, http.StatusAccepted, http.StatusNoContent:
   248  		// accepted HTTP status codes.
   249  		return nil
   250  	case http.StatusForbidden:
   251  		return fmt.Errorf("%s returned '%s', please check if your auth token is correctly set", h.Endpoint(), resp.Status)
   252  	default:
   253  		return fmt.Errorf("%s returned '%s', please check your endpoint configuration", h.Endpoint(), resp.Status)
   254  	}
   255  }
   256  
   257  func (h *Target) startQueueProcessor(ctx context.Context, mainWorker bool) {
   258  	h.logChMu.RLock()
   259  	if h.logCh == nil {
   260  		h.logChMu.RUnlock()
   261  		return
   262  	}
   263  	h.logChMu.RUnlock()
   264  
   265  	atomic.AddInt64(&h.workers, 1)
   266  	defer atomic.AddInt64(&h.workers, -1)
   267  
   268  	h.wg.Add(1)
   269  	defer h.wg.Done()
   270  
   271  	entries := make([]interface{}, 0)
   272  	name := h.Name()
   273  
   274  	defer func() {
   275  		// re-load the global buffer pointer
   276  		// in case it was modified by a new target.
   277  		logChLock.Lock()
   278  		currentGlobalBuffer, ok := logChBuffers[name]
   279  		logChLock.Unlock()
   280  		if !ok {
   281  			return
   282  		}
   283  
   284  		for _, v := range entries {
   285  			select {
   286  			case currentGlobalBuffer <- v:
   287  			default:
   288  			}
   289  		}
   290  
   291  		if mainWorker {
   292  		drain:
   293  			for {
   294  				select {
   295  				case v, ok := <-h.logCh:
   296  					if !ok {
   297  						break drain
   298  					}
   299  
   300  					currentGlobalBuffer <- v
   301  				default:
   302  					break drain
   303  				}
   304  			}
   305  		}
   306  	}()
   307  
   308  	var entry interface{}
   309  	var ok bool
   310  	var err error
   311  	lastBatchProcess := time.Now()
   312  
   313  	buf := bytebufferpool.Get()
   314  	enc := jsoniter.ConfigCompatibleWithStandardLibrary.NewEncoder(buf)
   315  	defer bytebufferpool.Put(buf)
   316  
   317  	isDirQueue := false
   318  	if h.config.QueueDir != "" {
   319  		isDirQueue = true
   320  	}
   321  
   322  	// globalBuffer is always created or adjusted
   323  	// before this method is launched.
   324  	logChLock.Lock()
   325  	globalBuffer := logChBuffers[name]
   326  	logChLock.Unlock()
   327  
   328  	newTicker := time.NewTicker(time.Second)
   329  	isTick := false
   330  
   331  	for {
   332  		isTick = false
   333  		select {
   334  		case _ = <-newTicker.C:
   335  			isTick = true
   336  		case entry, _ = <-globalBuffer:
   337  		case entry, ok = <-h.logCh:
   338  			if !ok {
   339  				return
   340  			}
   341  		case <-ctx.Done():
   342  			return
   343  		}
   344  
   345  		if !isTick {
   346  			atomic.AddInt64(&h.totalMessages, 1)
   347  
   348  			if !isDirQueue {
   349  				if err := enc.Encode(&entry); err != nil {
   350  					h.config.LogOnceIf(
   351  						ctx,
   352  						fmt.Errorf("unable to encode webhook log entry, err  '%w' entry: %v\n", err, entry),
   353  						h.Name(),
   354  					)
   355  					atomic.AddInt64(&h.failedMessages, 1)
   356  					continue
   357  				}
   358  			}
   359  
   360  			entries = append(entries, entry)
   361  		}
   362  
   363  		if len(entries) != h.batchSize {
   364  			if len(h.logCh) > 0 || len(globalBuffer) > 0 || len(entries) == 0 {
   365  				continue
   366  			}
   367  
   368  			if h.batchSize > 1 {
   369  				// If we are doing batching, we should wait
   370  				// at least one second before sending.
   371  				// Even if there is nothing in the queue.
   372  				if time.Since(lastBatchProcess).Seconds() < 1 {
   373  					continue
   374  				}
   375  			}
   376  		}
   377  
   378  		lastBatchProcess = time.Now()
   379  
   380  	retry:
   381  		// If the channel reaches above half capacity
   382  		// we spawn more workers. The workers spawned
   383  		// from this main worker routine will exit
   384  		// once the channel drops below half capacity
   385  		// and when it's been at least 30 seconds since
   386  		// we launched a new worker.
   387  		if mainWorker && len(h.logCh) > cap(h.logCh)/2 {
   388  			nWorkers := atomic.LoadInt64(&h.workers)
   389  			if nWorkers < h.maxWorkers {
   390  				if time.Since(h.lastStarted).Milliseconds() > 10 {
   391  					h.lastStarted = time.Now()
   392  					go h.startQueueProcessor(ctx, false)
   393  				}
   394  			}
   395  		}
   396  
   397  		if !isDirQueue {
   398  			err = h.send(ctx, buf.Bytes(), h.payloadType, webhookCallTimeout)
   399  		} else {
   400  			err = h.store.PutMultiple(entries)
   401  		}
   402  
   403  		if err != nil {
   404  
   405  			h.config.LogOnceIf(
   406  				context.Background(),
   407  				fmt.Errorf("unable to send webhook log entry to '%s' err '%w'", name, err),
   408  				name,
   409  			)
   410  
   411  			if errors.Is(err, context.Canceled) {
   412  				return
   413  			}
   414  
   415  			time.Sleep(3 * time.Second)
   416  			goto retry
   417  		}
   418  
   419  		entries = make([]interface{}, 0)
   420  
   421  		if !isDirQueue {
   422  			buf.Reset()
   423  		}
   424  
   425  		if !mainWorker && len(h.logCh) < cap(h.logCh)/2 {
   426  			if time.Since(h.lastStarted).Seconds() > 30 {
   427  				return
   428  			}
   429  		}
   430  
   431  	}
   432  }
   433  
   434  // CreateOrAdjustGlobalBuffer will create or adjust the global log entry buffers
   435  // which are used to migrate log entries between old and new targets.
   436  func CreateOrAdjustGlobalBuffer(currentTgt *Target, newTgt *Target) {
   437  	logChLock.Lock()
   438  	defer logChLock.Unlock()
   439  
   440  	requiredCap := currentTgt.config.QueueSize + (currentTgt.config.BatchSize * int(currentTgt.maxWorkers))
   441  	currentCap := 0
   442  	name := newTgt.Name()
   443  
   444  	currentBuff, ok := logChBuffers[name]
   445  	if !ok {
   446  		logChBuffers[name] = make(chan interface{}, requiredCap)
   447  		currentCap = requiredCap
   448  	} else {
   449  		currentCap = cap(currentBuff)
   450  		requiredCap += len(currentBuff)
   451  	}
   452  
   453  	if requiredCap > currentCap {
   454  		logChBuffers[name] = make(chan interface{}, requiredCap)
   455  
   456  		if len(currentBuff) > 0 {
   457  		drain:
   458  			for {
   459  				select {
   460  				case v, ok := <-currentBuff:
   461  					if !ok {
   462  						break drain
   463  					}
   464  					logChBuffers[newTgt.Name()] <- v
   465  				default:
   466  					break drain
   467  				}
   468  			}
   469  		}
   470  	}
   471  }
   472  
   473  // New initializes a new logger target which
   474  // sends log over http to the specified endpoint
   475  func New(config Config) (*Target, error) {
   476  	maxWorkers := maxWorkers
   477  	if config.BatchSize > 100 {
   478  		maxWorkers = maxWorkersWithBatchEvents
   479  	} else if config.BatchSize <= 0 {
   480  		config.BatchSize = 1
   481  	}
   482  
   483  	h := &Target{
   484  		logCh:      make(chan interface{}, config.QueueSize),
   485  		config:     config,
   486  		status:     statusOffline,
   487  		batchSize:  config.BatchSize,
   488  		maxWorkers: int64(maxWorkers),
   489  	}
   490  
   491  	if config.BatchSize > 1 {
   492  		h.payloadType = ""
   493  	} else {
   494  		h.payloadType = "application/json"
   495  	}
   496  
   497  	// If proxy available, set the same
   498  	if h.config.Proxy != "" {
   499  		proxyURL, _ := url.Parse(h.config.Proxy)
   500  		transport := h.config.Transport
   501  		ctransport := transport.(*http.Transport).Clone()
   502  		ctransport.Proxy = http.ProxyURL(proxyURL)
   503  		h.config.Transport = ctransport
   504  	}
   505  
   506  	h.client = &http.Client{Transport: h.config.Transport}
   507  
   508  	if h.config.QueueDir != "" {
   509  
   510  		queueStore := store.NewQueueStore[interface{}](
   511  			filepath.Join(h.config.QueueDir, h.Name()),
   512  			uint64(h.config.QueueSize),
   513  			httpLoggerExtension,
   514  		)
   515  
   516  		if err := queueStore.Open(); err != nil {
   517  			return h, fmt.Errorf("unable to initialize the queue store of %s webhook: %w", h.Name(), err)
   518  		}
   519  
   520  		h.store = queueStore
   521  
   522  	}
   523  
   524  	return h, nil
   525  }
   526  
   527  // SendFromStore - reads the log from store and sends it to webhook.
   528  func (h *Target) SendFromStore(key store.Key) (err error) {
   529  	var eventData []byte
   530  	eventData, err = h.store.GetRaw(key.Name)
   531  	if err != nil {
   532  		if os.IsNotExist(err) {
   533  			return nil
   534  		}
   535  		return err
   536  	}
   537  
   538  	if err := h.send(context.Background(), eventData, h.payloadType, webhookCallTimeout); err != nil {
   539  		atomic.AddInt64(&h.failedMessages, 1)
   540  		return err
   541  	}
   542  	// Delete the event from store.
   543  	return h.store.Del(key.Name)
   544  }
   545  
   546  // Send the log message 'entry' to the http target.
   547  // Messages are queued in the disk if the store is enabled
   548  // If Cancel has been called the message is ignored.
   549  func (h *Target) Send(ctx context.Context, entry interface{}) error {
   550  	if atomic.LoadInt32(&h.status) == statusClosed {
   551  		if h.migrateTarget != nil {
   552  			return h.migrateTarget.Send(ctx, entry)
   553  		}
   554  		return nil
   555  	}
   556  
   557  	h.logChMu.RLock()
   558  	defer h.logChMu.RUnlock()
   559  	if h.logCh == nil {
   560  		// We are closing...
   561  		return nil
   562  	}
   563  
   564  retry:
   565  	select {
   566  	case h.logCh <- entry:
   567  		atomic.AddInt64(&h.totalMessages, 1)
   568  	case <-ctx.Done():
   569  		// return error only for context timedout.
   570  		if errors.Is(ctx.Err(), context.DeadlineExceeded) {
   571  			return ctx.Err()
   572  		}
   573  		return nil
   574  	default:
   575  		if h.workers < h.maxWorkers {
   576  			goto retry
   577  		}
   578  		atomic.AddInt64(&h.totalMessages, 1)
   579  		atomic.AddInt64(&h.failedMessages, 1)
   580  		return errors.New("log buffer full")
   581  	}
   582  
   583  	return nil
   584  }
   585  
   586  // Cancel - cancels the target.
   587  // All queued messages are flushed and the function returns afterwards.
   588  // All messages sent to the target after this function has been called will be dropped.
   589  func (h *Target) Cancel() {
   590  	atomic.StoreInt32(&h.status, statusClosed)
   591  	h.storeCtxCancel()
   592  
   593  	// Wait for messages to be sent...
   594  	h.wg.Wait()
   595  
   596  	// Set logch to nil and close it.
   597  	// This will block all Send operations,
   598  	// and finish the existing ones.
   599  	// All future ones will be discarded.
   600  	h.logChMu.Lock()
   601  	xioutil.SafeClose(h.logCh)
   602  	h.logCh = nil
   603  	h.logChMu.Unlock()
   604  }