github.com/Jeffail/benthos/v3@v3.65.0/lib/input/generate.go (about)

     1  package input
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync/atomic"
     8  	"time"
     9  
    10  	"github.com/Jeffail/benthos/v3/internal/bloblang/mapping"
    11  	"github.com/Jeffail/benthos/v3/internal/bloblang/parser"
    12  	"github.com/Jeffail/benthos/v3/internal/docs"
    13  	"github.com/Jeffail/benthos/v3/internal/interop"
    14  	"github.com/Jeffail/benthos/v3/lib/input/reader"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/message"
    17  	"github.com/Jeffail/benthos/v3/lib/metrics"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	"github.com/robfig/cron/v3"
    20  )
    21  
    22  //------------------------------------------------------------------------------
    23  
    24  func init() {
    25  	Constructors[TypeGenerate] = TypeSpec{
    26  		constructor: fromSimpleConstructor(func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
    27  			b, err := newBloblang(mgr, conf.Generate)
    28  			if err != nil {
    29  				return nil, err
    30  			}
    31  			return NewAsyncReader(TypeGenerate, false, reader.NewAsyncPreserver(b), log, stats)
    32  		}),
    33  		Version: "3.40.0",
    34  		Status:  docs.StatusStable,
    35  		Summary: `
    36  Generates messages at a given interval using a [Bloblang](/docs/guides/bloblang/about)
    37  mapping executed without a context. This allows you to generate messages for
    38  testing your pipeline configs.`,
    39  		FieldSpecs: docs.FieldSpecs{
    40  			docs.FieldBloblang(
    41  				"mapping", "A [bloblang](/docs/guides/bloblang/about) mapping to use for generating messages.",
    42  				`root = "hello world"`,
    43  				`root = {"test":"message","id":uuid_v4()}`,
    44  			),
    45  			docs.FieldCommon(
    46  				"interval",
    47  				"The time interval at which messages should be generated, expressed either as a duration string or as a cron expression. If set to an empty string messages will be generated as fast as downstream services can process them. Cron expressions can specify a timezone by prefixing the expression with `TZ=<location name>`, where the location name corresponds to a file within the IANA Time Zone database.",
    48  				"5s", "1m", "1h",
    49  				"@every 1s", "0,30 */2 * * * *", "TZ=Europe/London 30 3-6,20-23 * * *",
    50  			),
    51  			docs.FieldCommon("count", "An optional number of messages to generate, if set above 0 the specified number of messages is generated and then the input will shut down."),
    52  		},
    53  		Categories: []Category{
    54  			CategoryUtility,
    55  		},
    56  		Examples: []docs.AnnotatedExample{
    57  			{
    58  				Title:   "Cron Scheduled Processing",
    59  				Summary: "A common use case for the generate input is to trigger processors on a schedule so that the processors themselves can behave similarly to an input. The following configuration reads rows from a PostgreSQL table every 5 minutes.",
    60  				Config: `
    61  input:
    62    generate:
    63      interval: '@every 5m'
    64      mapping: 'root = {}'
    65    processors:
    66      - sql_select:
    67          driver: postgres
    68          dsn: postgres://foouser:foopass@localhost:5432/testdb?sslmode=disable
    69          table: foo
    70          columns: [ "*" ]
    71  `,
    72  			},
    73  			{
    74  				Title:   "Generate 100 Rows",
    75  				Summary: "The generate input can be used as a convenient way to generate test data. The following example generates 100 rows of structured data by setting an explicit count. The interval field is set to empty, which means data is generated as fast as the downstream components can consume it.",
    76  				Config: `
    77  input:
    78    generate:
    79      count: 100
    80      interval: ""
    81      mapping: |
    82        root = if random_int() % 2 == 0 {
    83          {
    84            "type": "foo",
    85            "foo": "is yummy"
    86          }
    87        } else {
    88          {
    89            "type": "bar",
    90            "bar": "is gross"
    91          }
    92        }
    93  `,
    94  			},
    95  		},
    96  	}
    97  
    98  	Constructors[TypeBloblang] = TypeSpec{
    99  		constructor: fromSimpleConstructor(func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) {
   100  			b, err := newBloblang(mgr, conf.Bloblang)
   101  			if err != nil {
   102  				return nil, err
   103  			}
   104  			return NewAsyncReader(TypeBloblang, true, b, log, stats)
   105  		}),
   106  		Status: docs.StatusDeprecated,
   107  		Summary: `
   108  Generates messages at a given interval using a [Bloblang](/docs/guides/bloblang/about)
   109  mapping executed without a context. This allows you to generate messages for
   110  testing your pipeline configs.`,
   111  		Description: `
   112  ## Alternatives
   113  
   114  This input has been ` + "[renamed to `generate`](/docs/components/inputs/generate)" + `.
   115  `,
   116  		FieldSpecs: docs.FieldSpecs{
   117  			docs.FieldBloblang(
   118  				"mapping", "A [bloblang](/docs/guides/bloblang/about) mapping to use for generating messages.",
   119  				`root = "hello world"`,
   120  				`root = {"test":"message","id":uuid_v4()}`,
   121  			),
   122  			docs.FieldCommon(
   123  				"interval",
   124  				"The time interval at which messages should be generated, expressed either as a duration string or as a cron expression. If set to an empty string messages will be generated as fast as downstream services can process them.",
   125  				"5s", "1m", "1h",
   126  				"@every 1s", "0,30 */2 * * * *", "30 3-6,20-23 * * *",
   127  			),
   128  			docs.FieldCommon("count", "An optional number of messages to generate, if set above 0 the specified number of messages is generated and then the input will shut down."),
   129  		},
   130  		Categories: []Category{
   131  			CategoryUtility,
   132  		},
   133  	}
   134  }
   135  
   136  //------------------------------------------------------------------------------
   137  
   138  // BloblangConfig contains configuration for the Bloblang input type.
   139  type BloblangConfig struct {
   140  	Mapping string `json:"mapping" yaml:"mapping"`
   141  	// internal can be both duration string or cron expression
   142  	Interval string `json:"interval" yaml:"interval"`
   143  	Count    int    `json:"count" yaml:"count"`
   144  }
   145  
   146  // NewBloblangConfig creates a new BloblangConfig with default values.
   147  func NewBloblangConfig() BloblangConfig {
   148  	return BloblangConfig{
   149  		Mapping:  "",
   150  		Interval: "1s",
   151  		Count:    0,
   152  	}
   153  }
   154  
   155  // Bloblang executes a bloblang mapping with an empty context each time this
   156  // input is read from. An interval period must be specified that determines how
   157  // often a message is generated.
   158  type Bloblang struct {
   159  	remaining   int64
   160  	limited     bool
   161  	firstIsFree bool
   162  	exec        *mapping.Executor
   163  	timer       *time.Ticker
   164  	schedule    *cron.Schedule
   165  	location    *time.Location
   166  }
   167  
   168  // newBloblang creates a new bloblang input reader type.
   169  func newBloblang(mgr types.Manager, conf BloblangConfig) (*Bloblang, error) {
   170  	var (
   171  		duration    time.Duration
   172  		timer       *time.Ticker
   173  		schedule    *cron.Schedule
   174  		location    *time.Location
   175  		err         error
   176  		firstIsFree = true
   177  	)
   178  
   179  	if len(conf.Interval) > 0 {
   180  		if duration, err = time.ParseDuration(conf.Interval); err != nil {
   181  			// interval is not a duration so try to parse as a cron expression
   182  			var cerr error
   183  			if schedule, location, cerr = parseCronExpression(conf.Interval); cerr != nil {
   184  				return nil, fmt.Errorf("failed to parse interval as duration string: %v, or as cron expression: %w", err, cerr)
   185  			}
   186  			firstIsFree = false
   187  			duration = getDurationTillNextSchedule(*schedule, location)
   188  		}
   189  		if duration > 0 {
   190  			timer = time.NewTicker(duration)
   191  		}
   192  	}
   193  	exec, err := interop.NewBloblangMapping(mgr, conf.Mapping)
   194  	if err != nil {
   195  		if perr, ok := err.(*parser.Error); ok {
   196  			return nil, fmt.Errorf("failed to parse mapping: %v", perr.ErrorAtPosition([]rune(conf.Mapping)))
   197  		}
   198  		return nil, fmt.Errorf("failed to parse mapping: %v", err)
   199  	}
   200  	remaining := int64(conf.Count)
   201  	return &Bloblang{
   202  		exec:        exec,
   203  		remaining:   remaining,
   204  		limited:     remaining > 0,
   205  		timer:       timer,
   206  		schedule:    schedule,
   207  		location:    location,
   208  		firstIsFree: firstIsFree,
   209  	}, nil
   210  }
   211  
   212  func getDurationTillNextSchedule(schedule cron.Schedule, location *time.Location) time.Duration {
   213  	now := time.Now().In(location)
   214  	return schedule.Next(now).Sub(now)
   215  }
   216  
   217  func parseCronExpression(cronExpression string) (*cron.Schedule, *time.Location, error) {
   218  	// If time zone is not included, set default to UTC
   219  	if !strings.HasPrefix(cronExpression, "TZ=") {
   220  		cronExpression = fmt.Sprintf("TZ=%s %s", "UTC", cronExpression)
   221  	}
   222  
   223  	end := strings.Index(cronExpression, " ")
   224  	eq := strings.Index(cronExpression, "=")
   225  	tz := cronExpression[eq+1 : end]
   226  
   227  	loc, err := time.LoadLocation(tz)
   228  	if err != nil {
   229  		return nil, nil, err
   230  	}
   231  	parser := cron.NewParser(cron.SecondOptional | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow | cron.Descriptor)
   232  
   233  	cronSchedule, err := parser.Parse(cronExpression)
   234  	if err != nil {
   235  		return nil, nil, err
   236  	}
   237  
   238  	return &cronSchedule, loc, nil
   239  }
   240  
   241  // ConnectWithContext establishes a Bloblang reader.
   242  func (b *Bloblang) ConnectWithContext(ctx context.Context) error {
   243  	return nil
   244  }
   245  
   246  // ReadWithContext a new bloblang generated message.
   247  func (b *Bloblang) ReadWithContext(ctx context.Context) (types.Message, reader.AsyncAckFn, error) {
   248  	if b.limited {
   249  		if remaining := atomic.AddInt64(&b.remaining, -1); remaining < 0 {
   250  			return nil, nil, types.ErrTypeClosed
   251  		}
   252  	}
   253  
   254  	if !b.firstIsFree && b.timer != nil {
   255  		select {
   256  		case _, open := <-b.timer.C:
   257  			if !open {
   258  				return nil, nil, types.ErrTypeClosed
   259  			}
   260  			if b.schedule != nil {
   261  				b.timer.Reset(getDurationTillNextSchedule(*b.schedule, b.location))
   262  			}
   263  		case <-ctx.Done():
   264  			return nil, nil, types.ErrTimeout
   265  		}
   266  	}
   267  
   268  	b.firstIsFree = false
   269  	p, err := b.exec.MapPart(0, message.New(nil))
   270  	if err != nil {
   271  		return nil, nil, err
   272  	}
   273  	if p == nil {
   274  		return nil, nil, types.ErrTimeout
   275  	}
   276  
   277  	msg := message.New(nil)
   278  	msg.Append(p)
   279  
   280  	return msg, func(context.Context, types.Response) error { return nil }, nil
   281  }
   282  
   283  // CloseAsync shuts down the bloblang reader.
   284  func (b *Bloblang) CloseAsync() {
   285  	if b.timer != nil {
   286  		b.timer.Stop()
   287  	}
   288  }
   289  
   290  // WaitForClose blocks until the bloblang input has closed down.
   291  func (b *Bloblang) WaitForClose(timeout time.Duration) error {
   292  	return nil
   293  }