github.com/Jeffail/benthos/v3@v3.65.0/template/inputs/twitter_search.yaml (about) 1 name: twitter_search 2 type: input 3 status: experimental 4 categories: [ Services, Social ] 5 summary: Consumes tweets matching a given search using the Twitter recent search V2 API. 6 description: | 7 Continuously polls the [Twitter recent search V2 API](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent) for tweets that match a given search query. 8 9 Each tweet received is emitted as a JSON object message, with a field `id` and `text` by default. Extra fields [can be obtained from the search API](https://developer.twitter.com/en/docs/twitter-api/fields) when listed with the `tweet_fields` field. 10 11 In order to paginate requests that are made the ID of the latest received tweet is stored in a [cache resource](/docs/components/caches/about), which is then used by subsequent requests to ensure only tweets after it are consumed. It is recommended that the cache you use is persistent so that Benthos can resume searches at the correct place on a restart. 12 13 Authentication is done using OAuth 2.0 credentials which can be generated within the [Twitter developer portal](https://developer.twitter.com). 14 15 fields: 16 - name: query 17 description: A search expression to use. 18 type: string 19 20 - name: tweet_fields 21 description: An optional list of additional fields to obtain for each tweet, by default only the fields `id` and `text` are returned. For more info refer to the [twitter API docs.](https://developer.twitter.com/en/docs/twitter-api/fields) 22 type: string 23 kind: list 24 default: [] 25 26 - name: poll_period 27 description: The length of time (as a duration string) to wait between each search request. This field can be set empty, in which case requests are made at the limit set by the rate limit. This field also supports cron expressions. 28 type: string 29 default: "1m" 30 31 - name: backfill_period 32 description: A duration string indicating the maximum age of tweets to acquire when starting a search. 33 type: string 34 default: "5m" 35 36 - name: cache 37 description: A cache resource to use for request pagination. 38 type: string 39 40 - name: cache_key 41 description: The key identifier used when storing the ID of the last tweet received. 42 type: string 43 default: last_tweet_id 44 advanced: true 45 46 - name: rate_limit 47 description: An optional rate limit resource to restrict API requests with. 48 type: string 49 default: "" 50 advanced: true 51 52 - name: api_key 53 description: An API key for OAuth 2.0 authentication. It is recommended that you populate this field using [environment variables](/docs/configuration/interpolation). 54 type: string 55 56 - name: api_secret 57 description: An API secret for OAuth 2.0 authentication. It is recommended that you populate this field using [environment variables](/docs/configuration/interpolation). 58 type: string 59 60 mapping: | 61 let _ = if this.poll_period == "" && this.rate_limit == "" { 62 throw("either a poll_period, a rate_limit, or both must be specified") 63 } 64 65 let backfill_seconds = this.backfill_period.parse_duration() / 1000000000 66 67 let query = "?max_results=100&query=" + this.query.escape_url_query() 68 69 let query = if this.tweet_fields.length() > 0 { 70 $query + "&tweet.fields=" + this.tweet_fields.join(",").escape_url_query() 71 } 72 73 let url = "https://api.twitter.com/2/tweets/search/recent" + $query 74 75 root.generate.interval = this.poll_period 76 root.generate.mapping = "root = \"\"" 77 78 root.processors = [] 79 80 root.processors."-".cache = { 81 "resource": this.cache, 82 "operator": "get", 83 "key": this.cache_key, 84 } 85 86 root.processors."-".catch = [] # Don't care if the cache is empty 87 88 root.processors."-".bloblang = """let pagination_params = if content().length() == 0 { 89 "&start_time="+(timestamp_unix()-%v).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query() 90 } else { 91 "&since_id="+content().string() 92 } 93 meta tweet_search_url = "%v" + $pagination_params 94 root = "" 95 """.format($backfill_seconds, $url) 96 97 root.processors."-".http = { 98 "url": """${! meta("tweet_search_url") }""", 99 "verb": "GET", 100 "rate_limit": this.rate_limit, 101 "oauth2": { 102 "enabled": true, 103 "token_url": "https://api.twitter.com/oauth2/token", 104 "client_key": this.api_key, 105 "client_secret": this.api_secret, 106 }, 107 } 108 109 root.processors."-".switch = [ 110 { 111 "check": """root = error().or("").contains("'since_id' must be a tweet id created after")""", 112 "processors": [ 113 { 114 "cache": { 115 "resource": this.cache, 116 "operator": "delete", 117 "key": this.cache_key, 118 }, 119 }, 120 { "bloblang": "root = deleted()" }, 121 ], 122 }, 123 ] 124 125 root.processors."-".bloblang = "root = if (this.data | []).length() > 0 { this.data } else { deleted() }" 126 127 root.processors."-".unarchive = { 128 "format": "json_array" 129 } 130 131 root.processors."-".cache = { 132 "parts": [ -1 ], 133 "resource": this.cache, 134 "operator": "set", 135 "key": this.cache_key, 136 "value": """${! json("id") }""", 137 } 138 139 root.processors."-".catch = [ 140 { 141 "log": { 142 "level": "ERROR", 143 "message": "Failed to write latest tweet ID to cache: ${! error() }", 144 } 145 } 146 ] 147 148 root.processors."-".split = {} 149 150 metrics_mapping: | 151 root = match this { 152 this.has_suffix("processor.7.count") => this.replace("processor.7.count", "count"), 153 this.has_suffix("processor.3.client.error") => this.replace("processor.3.client.error", "error"), 154 this.contains("processor.3.client.error") => deleted(), # Ignore more specialized client errors 155 this.contains("processor.3.client") => this.replace("processor.3.client", "request"), 156 _ => deleted(), 157 } 158 159 tests: 160 - name: Basic fields 161 config: 162 query: benthos.dev 163 cache: foocache 164 rate_limit: foolimit 165 api_key: fookey 166 api_secret: foosecret 167 168 expected: 169 generate: 170 interval: '1m' 171 mapping: root = "" 172 processors: 173 - cache: 174 resource: foocache 175 operator: get 176 key: last_tweet_id 177 178 - catch: [] 179 180 - bloblang: | 181 let pagination_params = if content().length() == 0 { 182 "&start_time="+(timestamp_unix()-300).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query() 183 } else { 184 "&since_id="+content().string() 185 } 186 meta tweet_search_url = "https://api.twitter.com/2/tweets/search/recent?max_results=100&query=benthos.dev" + $pagination_params 187 root = "" 188 189 - http: 190 url: ${! meta("tweet_search_url") } 191 verb: GET 192 rate_limit: foolimit 193 oauth2: 194 enabled: true 195 token_url: https://api.twitter.com/oauth2/token 196 client_key: fookey 197 client_secret: foosecret 198 199 - switch: 200 - check: 'root = error().or("").contains("''since_id'' must be a tweet id created after")' 201 processors: 202 - cache: 203 resource: foocache 204 operator: delete 205 key: last_tweet_id 206 - bloblang: root = deleted() 207 208 - bloblang: root = if (this.data | []).length() > 0 { this.data } else { deleted() } 209 210 - unarchive: 211 format: json_array 212 213 - cache: 214 # Only bother caching the latest tweet ID (last of the batch). 215 parts: [ -1 ] 216 resource: foocache 217 operator: set 218 key: last_tweet_id 219 value: ${! json("id") } 220 221 - catch: 222 - log: 223 level: ERROR 224 message: "Failed to write latest tweet ID to cache: ${! error() }" 225 226 - split: {} 227 228 - name: With tweet fields set 229 config: 230 query: hello world 231 cache: barcache 232 backfill_period: 600s 233 api_key: barkey 234 api_secret: barsecret 235 tweet_fields: 236 - created_at 237 - public_metrics 238 239 expected: 240 generate: 241 interval: '1m' 242 mapping: root = "" 243 processors: 244 - cache: 245 resource: barcache 246 operator: get 247 key: last_tweet_id 248 249 - catch: [] 250 251 - bloblang: | 252 let pagination_params = if content().length() == 0 { 253 "&start_time="+(timestamp_unix()-600).format_timestamp("2006-01-02T15:04:05Z","UTC").escape_url_query() 254 } else { 255 "&since_id="+content().string() 256 } 257 meta tweet_search_url = "https://api.twitter.com/2/tweets/search/recent?max_results=100&query=hello+world&tweet.fields=created_at%2Cpublic_metrics" + $pagination_params 258 root = "" 259 260 - http: 261 url: ${! meta("tweet_search_url") } 262 verb: GET 263 rate_limit: "" 264 oauth2: 265 enabled: true 266 token_url: https://api.twitter.com/oauth2/token 267 client_key: barkey 268 client_secret: barsecret 269 270 - switch: 271 - check: 'root = error().or("").contains("''since_id'' must be a tweet id created after")' 272 processors: 273 - cache: 274 resource: barcache 275 operator: delete 276 key: last_tweet_id 277 - bloblang: root = deleted() 278 279 - bloblang: root = if (this.data | []).length() > 0 { this.data } else { deleted() } 280 281 - unarchive: 282 format: json_array 283 284 - cache: 285 # Only bother caching the latest tweet ID (last of the batch). 286 parts: [ -1 ] 287 resource: barcache 288 operator: set 289 key: last_tweet_id 290 value: ${! json("id") } 291 292 - catch: 293 - log: 294 level: ERROR 295 message: "Failed to write latest tweet ID to cache: ${! error() }" 296 297 - split: {}