github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/cache.go (about)

     1  package processor
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/bloblang/field"
    10  	"github.com/Jeffail/benthos/v3/internal/docs"
    11  	"github.com/Jeffail/benthos/v3/internal/interop"
    12  	"github.com/Jeffail/benthos/v3/internal/tracing"
    13  	"github.com/Jeffail/benthos/v3/lib/log"
    14  	"github.com/Jeffail/benthos/v3/lib/metrics"
    15  	"github.com/Jeffail/benthos/v3/lib/types"
    16  )
    17  
    18  //------------------------------------------------------------------------------
    19  
    20  func init() {
    21  	Constructors[TypeCache] = TypeSpec{
    22  		constructor: NewCache,
    23  		Categories: []Category{
    24  			CategoryIntegration,
    25  		},
    26  		Summary: `
    27  Performs operations against a [cache resource](/docs/components/caches/about) for each message, allowing you to store or retrieve data within message payloads.`,
    28  		Description: `
    29  This processor will interpolate functions within the ` + "`key` and `value`" + ` fields individually for each message. This allows you to specify dynamic keys and values based on the contents of the message payloads and metadata. You can find a list of functions [here](/docs/configuration/interpolation#bloblang-queries).`,
    30  		FieldSpecs: docs.FieldSpecs{
    31  			docs.FieldCommon("resource", "The [`cache` resource](/docs/components/caches/about) to target with this processor."),
    32  			docs.FieldDeprecated("cache"),
    33  			docs.FieldCommon("operator", "The [operation](#operators) to perform with the cache.").HasOptions("set", "add", "get", "delete"),
    34  			docs.FieldCommon("key", "A key to use with the cache.").IsInterpolated(),
    35  			docs.FieldCommon("value", "A value to use with the cache (when applicable).").IsInterpolated(),
    36  			docs.FieldAdvanced(
    37  				"ttl", "The TTL of each individual item as a duration string. After this period an item will be eligible for removal during the next compaction. Not all caches support per-key TTLs, and those that do not will fall back to their generally configured TTL setting.",
    38  				"60s", "5m", "36h",
    39  			).IsInterpolated().AtVersion("3.33.0"),
    40  			PartsFieldSpec,
    41  		},
    42  		Examples: []docs.AnnotatedExample{
    43  			{
    44  				Title: "Deduplication",
    45  				Summary: `
    46  Deduplication can be done using the add operator with a key extracted from the
    47  message payload, since it fails when a key already exists we can remove the
    48  duplicates using a
    49  [` + "`bloblang` processor" + `](/docs/components/processors/bloblang):`,
    50  				Config: `
    51  pipeline:
    52    processors:
    53      - cache:
    54          resource: foocache
    55          operator: add
    56          key: '${! json("message.id") }'
    57          value: "storeme"
    58      - bloblang: root = if errored() { deleted() }
    59  
    60  cache_resources:
    61    - label: foocache
    62      redis:
    63        url: tcp://TODO:6379
    64  `,
    65  			},
    66  			{
    67  				Title: "Hydration",
    68  				Summary: `
    69  It's possible to enrich payloads with content previously stored in a cache by
    70  using the [` + "`branch`" + `](/docs/components/processors/branch) processor:`,
    71  				Config: `
    72  pipeline:
    73    processors:
    74      - branch:
    75          processors:
    76            - cache:
    77                resource: foocache
    78                operator: get
    79                key: '${! json("message.document_id") }'
    80          result_map: 'root.message.document = this'
    81  
    82          # NOTE: If the data stored in the cache is not valid JSON then use
    83          # something like this instead:
    84          # result_map: 'root.message.document = content().string()'
    85  
    86  cache_resources:
    87    - label: foocache
    88      memcached:
    89        addresses: [ "TODO:11211" ]
    90  `,
    91  			},
    92  		},
    93  		Footnotes: `
    94  ## Operators
    95  
    96  ### ` + "`set`" + `
    97  
    98  Set a key in the cache to a value. If the key already exists the contents are
    99  overridden.
   100  
   101  ### ` + "`add`" + `
   102  
   103  Set a key in the cache to a value. If the key already exists the action fails
   104  with a 'key already exists' error, which can be detected with
   105  [processor error handling](/docs/configuration/error_handling).
   106  
   107  ### ` + "`get`" + `
   108  
   109  Retrieve the contents of a cached key and replace the original message payload
   110  with the result. If the key does not exist the action fails with an error, which
   111  can be detected with [processor error handling](/docs/configuration/error_handling).
   112  
   113  ### ` + "`delete`" + `
   114  
   115  Delete a key and its contents from the cache.  If the key does not exist the
   116  action is a no-op and will not fail with an error.`,
   117  	}
   118  }
   119  
   120  //------------------------------------------------------------------------------
   121  
   122  // CacheConfig contains configuration fields for the Cache processor.
   123  type CacheConfig struct {
   124  	Cache    string `json:"cache" yaml:"cache"`
   125  	Resource string `json:"resource" yaml:"resource"`
   126  	Parts    []int  `json:"parts" yaml:"parts"`
   127  	Operator string `json:"operator" yaml:"operator"`
   128  	Key      string `json:"key" yaml:"key"`
   129  	Value    string `json:"value" yaml:"value"`
   130  	TTL      string `json:"ttl" yaml:"ttl"`
   131  }
   132  
   133  // NewCacheConfig returns a CacheConfig with default values.
   134  func NewCacheConfig() CacheConfig {
   135  	return CacheConfig{
   136  		Cache:    "",
   137  		Resource: "",
   138  		Parts:    []int{},
   139  		Operator: "set",
   140  		Key:      "",
   141  		Value:    "",
   142  		TTL:      "",
   143  	}
   144  }
   145  
   146  //------------------------------------------------------------------------------
   147  
   148  // Cache is a processor that stores or retrieves data from a cache for each
   149  // message of a batch via an interpolated key.
   150  type Cache struct {
   151  	conf  Config
   152  	log   log.Modular
   153  	stats metrics.Type
   154  
   155  	parts []int
   156  
   157  	key   *field.Expression
   158  	value *field.Expression
   159  	ttl   *field.Expression
   160  
   161  	mgr       types.Manager
   162  	cacheName string
   163  	operator  cacheOperator
   164  
   165  	mCount            metrics.StatCounter
   166  	mErr              metrics.StatCounter
   167  	mKeyAlreadyExists metrics.StatCounter
   168  	mSent             metrics.StatCounter
   169  	mBatchSent        metrics.StatCounter
   170  }
   171  
   172  // NewCache returns a Cache processor.
   173  func NewCache(
   174  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   175  ) (Type, error) {
   176  	cacheName := conf.Cache.Resource
   177  	if cacheName == "" {
   178  		cacheName = conf.Cache.Cache
   179  	}
   180  	if cacheName == "" {
   181  		return nil, errors.New("cache name must be specified")
   182  	}
   183  
   184  	op, err := cacheOperatorFromString(conf.Cache.Operator)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	key, err := interop.NewBloblangField(mgr, conf.Cache.Key)
   190  	if err != nil {
   191  		return nil, fmt.Errorf("failed to parse key expression: %v", err)
   192  	}
   193  
   194  	value, err := interop.NewBloblangField(mgr, conf.Cache.Value)
   195  	if err != nil {
   196  		return nil, fmt.Errorf("failed to parse value expression: %v", err)
   197  	}
   198  
   199  	ttl, err := interop.NewBloblangField(mgr, conf.Cache.TTL)
   200  	if err != nil {
   201  		return nil, fmt.Errorf("failed to parse ttl expression: %v", err)
   202  	}
   203  
   204  	if err := interop.ProbeCache(context.Background(), mgr, cacheName); err != nil {
   205  		return nil, err
   206  	}
   207  
   208  	return &Cache{
   209  		conf:  conf,
   210  		log:   log,
   211  		stats: stats,
   212  
   213  		parts: conf.Cache.Parts,
   214  
   215  		key:   key,
   216  		value: value,
   217  		ttl:   ttl,
   218  
   219  		mgr:       mgr,
   220  		cacheName: cacheName,
   221  		operator:  op,
   222  
   223  		mCount:            stats.GetCounter("count"),
   224  		mErr:              stats.GetCounter("error"),
   225  		mKeyAlreadyExists: stats.GetCounter("key_already_exists"),
   226  		mSent:             stats.GetCounter("sent"),
   227  		mBatchSent:        stats.GetCounter("batch.sent"),
   228  	}, nil
   229  }
   230  
   231  //------------------------------------------------------------------------------
   232  
   233  type cacheOperator func(cache types.Cache, key string, value []byte, ttl *time.Duration) ([]byte, bool, error)
   234  
   235  func newCacheSetOperator() cacheOperator {
   236  	return func(cache types.Cache, key string, value []byte, ttl *time.Duration) ([]byte, bool, error) {
   237  		var err error
   238  		if cttl, ok := cache.(types.CacheWithTTL); ok {
   239  			err = cttl.SetWithTTL(key, value, ttl)
   240  		} else {
   241  			err = cache.Set(key, value)
   242  		}
   243  		return nil, false, err
   244  	}
   245  }
   246  
   247  func newCacheAddOperator() cacheOperator {
   248  	return func(cache types.Cache, key string, value []byte, ttl *time.Duration) ([]byte, bool, error) {
   249  		var err error
   250  		if cttl, ok := cache.(types.CacheWithTTL); ok {
   251  			err = cttl.AddWithTTL(key, value, ttl)
   252  		} else {
   253  			err = cache.Add(key, value)
   254  		}
   255  		return nil, false, err
   256  	}
   257  }
   258  
   259  func newCacheGetOperator() cacheOperator {
   260  	return func(cache types.Cache, key string, _ []byte, _ *time.Duration) ([]byte, bool, error) {
   261  		result, err := cache.Get(key)
   262  		return result, true, err
   263  	}
   264  }
   265  
   266  func newCacheDeleteOperator() cacheOperator {
   267  	return func(cache types.Cache, key string, _ []byte, ttl *time.Duration) ([]byte, bool, error) {
   268  		err := cache.Delete(key)
   269  		return nil, false, err
   270  	}
   271  }
   272  
   273  func cacheOperatorFromString(operator string) (cacheOperator, error) {
   274  	switch operator {
   275  	case "set":
   276  		return newCacheSetOperator(), nil
   277  	case "add":
   278  		return newCacheAddOperator(), nil
   279  	case "get":
   280  		return newCacheGetOperator(), nil
   281  	case "delete":
   282  		return newCacheDeleteOperator(), nil
   283  	}
   284  	return nil, fmt.Errorf("operator not recognised: %v", operator)
   285  }
   286  
   287  //------------------------------------------------------------------------------
   288  
   289  // ProcessMessage applies the processor to a message, either creating >0
   290  // resulting messages or a response to be sent back to the message source.
   291  func (c *Cache) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   292  	c.mCount.Incr(1)
   293  	newMsg := msg.Copy()
   294  
   295  	proc := func(index int, span *tracing.Span, part types.Part) error {
   296  		key := c.key.String(index, msg)
   297  		value := c.value.Bytes(index, msg)
   298  
   299  		var ttl *time.Duration
   300  		if ttls := c.ttl.String(index, msg); ttls != "" {
   301  			td, err := time.ParseDuration(ttls)
   302  			if err != nil {
   303  				c.mErr.Incr(1)
   304  				c.log.Debugf("TTL must be a duration: %v\n", err)
   305  				return err
   306  			}
   307  			ttl = &td
   308  		}
   309  
   310  		var result []byte
   311  		var useResult bool
   312  		var err error
   313  		if cerr := interop.AccessCache(context.Background(), c.mgr, c.cacheName, func(cache types.Cache) {
   314  			result, useResult, err = c.operator(cache, key, value, ttl)
   315  		}); cerr != nil {
   316  			err = cerr
   317  		}
   318  		if err != nil {
   319  			if err != types.ErrKeyAlreadyExists {
   320  				c.mErr.Incr(1)
   321  				c.log.Debugf("Operator failed for key '%s': %v\n", key, err)
   322  			} else {
   323  				c.mKeyAlreadyExists.Incr(1)
   324  				c.log.Debugf("Key already exists: %v\n", key)
   325  			}
   326  			return err
   327  		}
   328  
   329  		if useResult {
   330  			part.Set(result)
   331  		}
   332  		return nil
   333  	}
   334  
   335  	IteratePartsWithSpanV2(TypeCache, c.parts, newMsg, proc)
   336  
   337  	c.mBatchSent.Incr(1)
   338  	c.mSent.Incr(int64(newMsg.Len()))
   339  	msgs := [1]types.Message{newMsg}
   340  	return msgs[:], nil
   341  }
   342  
   343  // CloseAsync shuts down the processor and stops processing requests.
   344  func (c *Cache) CloseAsync() {
   345  }
   346  
   347  // WaitForClose blocks until the processor has closed down.
   348  func (c *Cache) WaitForClose(_ time.Duration) error {
   349  	return nil
   350  }
   351  
   352  //------------------------------------------------------------------------------