github.com/Jeffail/benthos/v3@v3.65.0/template/inputs/twitter_search.yaml (about)

     1  name: twitter_search
     2  type: input
     3  status: experimental
     4  categories: [ Services, Social ]
     5  summary: Consumes tweets matching a given search using the Twitter recent search V2 API.
     6  description: |
     7    Continuously polls the [Twitter recent search V2 API](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent) for tweets that match a given search query.
     8  
     9    Each tweet received is emitted as a JSON object message, with a field `id` and `text` by default. Extra fields [can be obtained from the search API](https://developer.twitter.com/en/docs/twitter-api/fields) when listed with the `tweet_fields` field.
    10  
    11    In order to paginate requests that are made the ID of the latest received tweet is stored in a [cache resource](/docs/components/caches/about), which is then used by subsequent requests to ensure only tweets after it are consumed. It is recommended that the cache you use is persistent so that Benthos can resume searches at the correct place on a restart.
    12  
    13    Authentication is done using OAuth 2.0 credentials which can be generated within the [Twitter developer portal](https://developer.twitter.com).
    14  
    15  fields:
    16    - name: query
    17      description: A search expression to use.
    18      type: string
    19  
    20    - name: tweet_fields
    21      description: An optional list of additional fields to obtain for each tweet, by default only the fields `id` and `text` are returned. For more info refer to the [twitter API docs.](https://developer.twitter.com/en/docs/twitter-api/fields)
    22      type: string
    23      kind: list
    24      default: []
    25  
    26    - name: poll_period
    27      description: The length of time (as a duration string) to wait between each search request. This field can be set empty, in which case requests are made at the limit set by the rate limit. This field also supports cron expressions.
    28      type: string
    29      default: "1m"
    30  
    31    - name: backfill_period
    32      description: A duration string indicating the maximum age of tweets to acquire when starting a search.
    33      type: string
    34      default: "5m"
    35  
    36    - name: cache
    37      description: A cache resource to use for request pagination.
    38      type: string
    39  
    40    - name: cache_key
    41      description: The key identifier used when storing the ID of the last tweet received.
    42      type: string
    43      default: last_tweet_id
    44      advanced: true
    45  
    46    - name: rate_limit
    47      description: An optional rate limit resource to restrict API requests with.
    48      type: string
    49      default: ""
    50      advanced: true
    51  
    52    - name: api_key
    53      description: An API key for OAuth 2.0 authentication. It is recommended that you populate this field using [environment variables](/docs/configuration/interpolation).
    54      type: string
    55  
    56    - name: api_secret
    57      description: An API secret for OAuth 2.0 authentication. It is recommended that you populate this field using [environment variables](/docs/configuration/interpolation).
    58      type: string
    59  
    60  mapping: |
    61    let _ = if this.poll_period == "" && this.rate_limit == "" {
    62      throw("either a poll_period, a rate_limit, or both must be specified")
    63    }
    64  
    65    let backfill_seconds = this.backfill_period.parse_duration() / 1000000000
    66  
    67    let query = "?max_results=100&query=" + this.query.escape_url_query()
    68  
    69    let query = if this.tweet_fields.length() > 0 {
    70      $query + "&tweet.fields=" + this.tweet_fields.join(",").escape_url_query()
    71    }
    72  
    73    let url = "https://api.twitter.com/2/tweets/search/recent" + $query
    74  
    75    root.generate.interval = this.poll_period
    76    root.generate.mapping = "root = \"\""
    77  
    78    root.processors = []
    79  
    80    root.processors."-".cache = {
    81      "resource": this.cache,
    82      "operator": "get",
    83      "key": this.cache_key,
    84    }
    85  
    86    root.processors."-".catch = [] # Don't care if the cache is empty
    87  
    88    root.processors."-".bloblang = """let pagination_params = if content().length() == 0 {
    89      "&start_time="+(timestamp_unix()-%v).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query()
    90    } else {
    91      "&since_id="+content().string()
    92    }
    93    meta tweet_search_url = "%v" + $pagination_params
    94    root = ""
    95    """.format($backfill_seconds, $url)
    96  
    97    root.processors."-".http = {
    98      "url": """${! meta("tweet_search_url") }""",
    99      "verb": "GET",
   100      "rate_limit": this.rate_limit,
   101      "oauth2": {
   102        "enabled": true,
   103        "token_url": "https://api.twitter.com/oauth2/token",
   104        "client_key": this.api_key,
   105        "client_secret": this.api_secret,
   106      },
   107    }
   108  
   109    root.processors."-".switch = [
   110      {
   111        "check": """root = error().or("").contains("'since_id' must be a tweet id created after")""",
   112        "processors": [
   113          {
   114            "cache": {
   115              "resource": this.cache,
   116              "operator": "delete",
   117              "key": this.cache_key,
   118            },
   119          },
   120          { "bloblang": "root = deleted()" },
   121        ],
   122      },
   123    ]
   124  
   125    root.processors."-".bloblang = "root = if (this.data | []).length() > 0 { this.data } else { deleted() }"
   126  
   127    root.processors."-".unarchive = {
   128      "format": "json_array"
   129    }
   130  
   131    root.processors."-".cache = {
   132      "parts": [ -1 ],
   133      "resource": this.cache,
   134      "operator": "set",
   135      "key": this.cache_key,
   136      "value": """${! json("id") }""",
   137    }
   138  
   139    root.processors."-".catch = [
   140      {
   141        "log": {
   142          "level": "ERROR",
   143          "message": "Failed to write latest tweet ID to cache: ${! error() }",
   144        }
   145      }
   146    ]
   147  
   148    root.processors."-".split = {}
   149  
   150  metrics_mapping: |
   151    root = match this {
   152      this.has_suffix("processor.7.count") => this.replace("processor.7.count", "count"),
   153      this.has_suffix("processor.3.client.error") => this.replace("processor.3.client.error", "error"),
   154      this.contains("processor.3.client.error") => deleted(), # Ignore more specialized client errors
   155      this.contains("processor.3.client") => this.replace("processor.3.client", "request"),
   156      _ => deleted(),
   157    }
   158  
   159  tests:
   160    - name: Basic fields
   161      config:
   162        query: benthos.dev
   163        cache: foocache
   164        rate_limit: foolimit
   165        api_key: fookey
   166        api_secret: foosecret
   167  
   168      expected:
   169        generate:
   170          interval: '1m'
   171          mapping: root = ""
   172        processors:
   173          - cache:
   174              resource: foocache
   175              operator: get
   176              key: last_tweet_id
   177  
   178          - catch: []
   179  
   180          - bloblang: |
   181              let pagination_params = if content().length() == 0 {
   182                "&start_time="+(timestamp_unix()-300).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query()
   183              } else {
   184                "&since_id="+content().string()
   185              }
   186              meta tweet_search_url = "https://api.twitter.com/2/tweets/search/recent?max_results=100&query=benthos.dev" + $pagination_params
   187              root = ""
   188  
   189          - http:
   190              url: ${! meta("tweet_search_url") }
   191              verb: GET
   192              rate_limit: foolimit
   193              oauth2:
   194                enabled: true
   195                token_url: https://api.twitter.com/oauth2/token
   196                client_key: fookey
   197                client_secret: foosecret
   198  
   199          - switch:
   200            - check: 'root = error().or("").contains("''since_id'' must be a tweet id created after")'
   201              processors:
   202                - cache:
   203                    resource: foocache
   204                    operator: delete
   205                    key: last_tweet_id
   206                - bloblang: root = deleted()
   207  
   208          - bloblang: root = if (this.data | []).length() > 0 { this.data } else { deleted() }
   209  
   210          - unarchive:
   211              format: json_array
   212  
   213          - cache:
   214              # Only bother caching the latest tweet ID (last of the batch).
   215              parts: [ -1 ]
   216              resource: foocache
   217              operator: set
   218              key: last_tweet_id
   219              value: ${! json("id") }
   220  
   221          - catch:
   222            - log:
   223                level: ERROR
   224                message: "Failed to write latest tweet ID to cache: ${! error() }"
   225  
   226          - split: {}
   227  
   228    - name: With tweet fields set
   229      config:
   230        query: hello world
   231        cache: barcache
   232        backfill_period: 600s
   233        api_key: barkey
   234        api_secret: barsecret
   235        tweet_fields:
   236          - created_at
   237          - public_metrics
   238  
   239      expected:
   240        generate:
   241          interval: '1m'
   242          mapping: root = ""
   243        processors:
   244          - cache:
   245              resource: barcache
   246              operator: get
   247              key: last_tweet_id
   248  
   249          - catch: []
   250  
   251          - bloblang: |
   252              let pagination_params = if content().length() == 0 {
   253                "&start_time="+(timestamp_unix()-600).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query()
   254              } else {
   255                "&since_id="+content().string()
   256              }
   257              meta tweet_search_url = "https://api.twitter.com/2/tweets/search/recent?max_results=100&query=hello+world&tweet.fields=created_at%2Cpublic_metrics" + $pagination_params
   258              root = ""
   259  
   260          - http:
   261              url: ${! meta("tweet_search_url") }
   262              verb: GET
   263              rate_limit: ""
   264              oauth2:
   265                enabled: true
   266                token_url: https://api.twitter.com/oauth2/token
   267                client_key: barkey
   268                client_secret: barsecret
   269  
   270          - switch:
   271            - check: 'root = error().or("").contains("''since_id'' must be a tweet id created after")'
   272              processors:
   273                - cache:
   274                    resource: barcache
   275                    operator: delete
   276                    key: last_tweet_id
   277                - bloblang: root = deleted()
   278  
   279          - bloblang: root = if (this.data | []).length() > 0 { this.data } else { deleted() }
   280  
   281          - unarchive:
   282              format: json_array
   283  
   284          - cache:
   285              # Only bother caching the latest tweet ID (last of the batch).
   286              parts: [ -1 ]
   287              resource: barcache
   288              operator: set
   289              key: last_tweet_id
   290              value: ${! json("id") }
   291  
   292          - catch:
   293            - log:
   294                level: ERROR
   295                message: "Failed to write latest tweet ID to cache: ${! error() }"
   296  
   297          - split: {}