github.com/diadata-org/diadata@v1.4.593/pkg/dia/scraper/foreign-scrapers/YahooFinanceScraper.go (about) 1 package foreignscrapers 2 3 import ( 4 "encoding/json" 5 "errors" 6 "io/ioutil" 7 "net/http" 8 "net/url" 9 "strconv" 10 "strings" 11 "time" 12 13 models "github.com/diadata-org/diadata/pkg/model" 14 "github.com/diadata-org/diadata/pkg/utils" 15 "github.com/gocolly/colly" 16 log "github.com/sirupsen/logrus" 17 ) 18 19 type YahooFinScraper struct { 20 ticker *time.Ticker 21 foreignScrapper ForeignScraper 22 updateRange string 23 updateInterval string 24 currenciesMap map[string]string 25 } 26 27 type yahooFinWebCurrency struct { 28 Symbol string 29 Name string 30 } 31 32 type yahooFinV6HttpQuoteResp struct { 33 QuoteResponse struct { 34 Result []struct { 35 Symbol string `json:"symbol"` 36 RegularMarketPrice float64 `json:"regularMarketPrice"` 37 RegularMarketTime int `json:"regularMarketTime"` 38 } `json:"result"` 39 Error interface{} `json:"error"` 40 } `json:"quoteResponse"` 41 } 42 43 type yahooFinV8HttpChartRespResult struct { 44 Meta struct { 45 Currency string `json:"currency"` 46 Symbol string `json:"symbol"` 47 ExchangeName string `json:"exchangeName"` 48 InstrumentType string `json:"instrumentType"` 49 FirstTradeDate int `json:"firstTradeDate"` 50 RegularMarketTime int `json:"regularMarketTime"` 51 Gmtoffset int `json:"gmtoffset"` 52 Timezone string `json:"timezone"` 53 ExchangeTimezoneName string `json:"exchangeTimezoneName"` 54 RegularMarketPrice float64 `json:"regularMarketPrice"` 55 ChartPreviousClose float64 `json:"chartPreviousClose"` 56 PreviousClose float64 `json:"previousClose"` 57 Scale int `json:"scale"` 58 PriceHint int `json:"priceHint"` 59 CurrentTradingPeriod struct { 60 Pre struct { 61 Timezone string `json:"timezone"` 62 Start int `json:"start"` 63 End int `json:"end"` 64 Gmtoffset int `json:"gmtoffset"` 65 } `json:"pre"` 66 Regular struct { 67 Timezone string `json:"timezone"` 68 Start int `json:"start"` 69 End int `json:"end"` 70 Gmtoffset int `json:"gmtoffset"` 71 } `json:"regular"` 72 Post struct { 73 Timezone string `json:"timezone"` 74 Start int `json:"start"` 75 End int `json:"end"` 76 Gmtoffset int `json:"gmtoffset"` 77 } `json:"post"` 78 } `json:"currentTradingPeriod"` 79 TradingPeriods [][]struct { 80 Timezone string `json:"timezone"` 81 Start int `json:"start"` 82 End int `json:"end"` 83 Gmtoffset int `json:"gmtoffset"` 84 } `json:"tradingPeriods"` 85 DataGranularity string `json:"dataGranularity"` 86 Range string `json:"range"` 87 ValidRanges []string `json:"validRanges"` 88 } `json:"meta"` 89 Timestamp []int `json:"timestamp"` 90 Indicators struct { 91 Quote []struct { 92 Volume []float64 `json:"volume"` 93 Open []float64 `json:"open"` 94 High []float64 `json:"high"` 95 Low []float64 `json:"low"` 96 Close []float64 `json:"close"` 97 } `json:"quote"` 98 } `json:"indicators"` 99 } 100 101 type yahooFinV8HttpChartResp struct { 102 Chart struct { 103 Result []yahooFinV8HttpChartRespResult `json:"result"` 104 Error string `json:"error"` 105 } `json:"chart"` 106 } 107 108 const ( 109 yahooFinSource = "YahooFinance" 110 yahooFinUpdateFreqDefault = 60 * 2 // Default update frequency (in seconds) 111 yahooFinUpdateFreqEnv = "YAHOOFIN_UPDATE_FREQ" 112 yahooFinUpdateRangeEnv = "YAHOOFIN_UPDATE_RANGE" 113 yahooFinUpdateIntervalEnv = "YAHOOFIN_UPDATE_INTERVAL" 114 yahooFinCurrenciesMapEnv = "YAHOOFIN_CURRENCIES_MAP" 115 yahooFinWebCurrencies = "https://finance.yahoo.com/currencies" 116 yahooFinHttpV10Host = "https://query1.finance.yahoo.com" 117 yahooFinHttpV11Host = "https://query2.finance.yahoo.com" 118 yahooFinTypeCurrency = "CURRENCY" 119 yahooFinV6HttpPathQuote = "/v6/finance/quote" 120 yahooFinV8HttpPathChart = "/v8/finance/chart" 121 ) 122 123 func NewYahooFinScraper(datastore models.Datastore) (s *YahooFinScraper) { 124 foreignScrapper := ForeignScraper{ 125 shutdown: make(chan nothing), 126 error: nil, 127 datastore: datastore, 128 chanQuotation: make(chan *models.ForeignQuotation), 129 } 130 131 // Define the defaults 132 updateRange := "1d" 133 log.Infof("Default update range set to %s\n", updateRange) 134 updateInterval := "1m" 135 log.Infof("Default update interval set to %s\n", updateInterval) 136 updateFreq := yahooFinUpdateFreqDefault * time.Second 137 log.Infof("Default update frequency set to %d seconds\n", yahooFinUpdateFreqDefault) 138 currencyMap := make(map[string]string) 139 140 // Read env variables and override defaults if needed 141 // // TODO: validate range and interval formats (1m, 1h, 1d, 1w, 1mo, 1y) 142 yahooFinUpdateRange := utils.Getenv(yahooFinUpdateRangeEnv, "") 143 if yahooFinUpdateRange != "" { 144 updateRange = yahooFinUpdateRange 145 log.Infof("Config set update range to %s\n", updateRange) 146 } 147 yahooFinUpdateInterval := utils.Getenv(yahooFinUpdateIntervalEnv, "") 148 if yahooFinUpdateInterval != "" { 149 updateInterval = yahooFinUpdateInterval 150 log.Infof("Config set update interval to %s\n", updateInterval) 151 } 152 yahooFinUpdateFreq, err := strconv.ParseInt(utils.Getenv(yahooFinUpdateFreqEnv, "0"), 10, 64) 153 if err != nil { 154 log.Errorf("fail to parse %v env variable: %v", yahooFinUpdateFreqEnv, err) 155 return 156 } 157 if yahooFinUpdateFreq != 0 { 158 updateFreq = time.Duration(yahooFinUpdateFreq) * time.Second 159 log.Infof("Config set update frequency to %f seconds\n", updateFreq.Seconds()) 160 } 161 // Because Yahoo Finance don't have any public endpoint to discover available currency's symbols, 162 // we need to scrape webpage to extract metadata. This map is the fallback in case the crawling process fails. 163 // Also, some of the pairs are not contained in the webpage. These need to be added to the env var manually. 164 // The data was extracted on Jan10 2023, maps the Yahoo Finance symbols to a pair of ISO 4217 friendly format. 165 // Use the <YAHOO_SYMBOL>:<AAA-BBB> format separeted by a comma: EURGBP=X:EUR-GBP,CNY=X:USD-CNY 166 currenciesList := utils.Getenv(yahooFinCurrenciesMapEnv, "") 167 if currenciesList != "" { 168 currenciesListSplit := strings.Split(currenciesList, ",") 169 if len(currenciesListSplit) > 1 { 170 currencyMapDefault := make(map[string]string) 171 log.Infof("Config set meta-info for %d currencies\n", len(currenciesListSplit)) 172 for _, c := range currenciesListSplit { 173 currency := strings.Split(c, ":") 174 if len(currency) != 2 { 175 log.Fatal("currency must have 2 identifier: ", currency) 176 } 177 symbol := currency[0] 178 if symbol[len(symbol)-2:] == "=X" { 179 if len(symbol) == 5 { 180 symbol = "USD" + symbol 181 } 182 } 183 currencyMapDefault[symbol] = currency[1] 184 log.Infof("- %s %s", currency[1], symbol) 185 186 } 187 currencyMap = currencyMapDefault 188 } 189 } 190 191 // Crawl currencies webpage to extract metadata 192 data, err := yahooCrawlCurrencies() 193 if err != nil { 194 log.Warnf("Failed to crawl currencies, using default map: %s", err) 195 } else { 196 log.Infof("Meta information for %d currencies found", len(data)) 197 for _, currency := range data { 198 if _, ok := currencyMap[currency.Symbol]; !ok { 199 currencyMap[currency.Symbol] = currency.Name 200 } 201 } 202 } 203 204 // Create the scraper 205 s = &YahooFinScraper{ 206 ticker: time.NewTicker(updateFreq), 207 updateRange: updateRange, 208 updateInterval: updateInterval, 209 foreignScrapper: foreignScrapper, 210 currenciesMap: currencyMap, 211 } 212 go s.mainLoop() 213 214 return s 215 } 216 217 // Closes any existing connections 218 func (scraper *YahooFinScraper) Close() error { 219 if scraper.foreignScrapper.closed { 220 return errors.New("scraper already closed") 221 } 222 close(scraper.foreignScrapper.shutdown) 223 <-scraper.foreignScrapper.shutdownDone 224 scraper.foreignScrapper.errorLock.RLock() 225 defer scraper.foreignScrapper.errorLock.RUnlock() 226 return scraper.foreignScrapper.error 227 } 228 229 // Returns the channel to which new quotes are pushed 230 func (scraper *YahooFinScraper) GetQuoteChannel() chan *models.ForeignQuotation { 231 return scraper.foreignScrapper.chanQuotation 232 } 233 234 // Retrieves new coin information from the Yahoo Finance API and stores it to influx 235 func (scraper *YahooFinScraper) UpdateQuotation() error { 236 updateStartTime := time.Now() 237 238 for k := range scraper.currenciesMap { 239 chartDataRes, err := scraper.fetchChartData(k) 240 if err != nil { 241 log.Error("Error fetching chart data: ", err) 242 return err 243 } 244 for _, result := range chartDataRes { 245 symbol := result.Meta.Symbol 246 if (len(symbol) == 8 || len(symbol) == 5) && symbol[len(symbol)-2:] == "=X" { 247 if _, ok := scraper.currenciesMap[symbol]; !ok { 248 if len(symbol) == 5 { 249 symbol = "USD" + symbol[len(symbol)-5:] 250 } 251 } 252 253 if _, ok := scraper.currenciesMap[symbol]; !ok { 254 log.Warnf("Symbol %s not found in the map", symbol) 255 continue 256 } 257 quoteSymbol := scraper.currenciesMap[symbol] 258 259 // Iterate through data starting at most recent timestamp with non-null price 260 // in order to store this one and continue with next pair. 261 numData := len(result.Timestamp) 262 for i := range result.Timestamp { 263 quoteDateTime := time.Unix(int64(result.Timestamp[numData-i-1]), 0) 264 quotePrice := result.Indicators.Quote[0].Close[numData-i-1] 265 if quotePrice == 0 { 266 continue 267 } 268 269 priceYesterday, err := scraper.foreignScrapper.datastore.GetForeignPriceYesterday(quoteSymbol, yahooFinSource) 270 if err != nil { 271 priceYesterday = 0 272 } 273 quote := models.ForeignQuotation{ 274 Symbol: quoteSymbol, 275 Name: quoteSymbol, 276 Price: quotePrice, 277 PriceYesterday: priceYesterday, 278 VolumeYesterdayUSD: 0.0, // Fetched volume data is always 0 (not available) 279 Source: yahooFinSource, 280 Time: quoteDateTime, 281 } 282 scraper.foreignScrapper.chanQuotation <- "e 283 break 284 } 285 } else { 286 log.Warnf("Warning, the received symbol %s was not parsed, ignoring it", result.Meta.Symbol) 287 } 288 } 289 } 290 291 updateElapsedTime := time.Since(updateStartTime) 292 log.Infof("Quotes updated in %f seconds", updateElapsedTime.Seconds()) 293 return nil 294 } 295 296 // Main loop runs in a goroutine until channel s is closed. 297 func (scraper *YahooFinScraper) mainLoop() { 298 299 // Update quotes on startup 300 log.Infof("Initializing scraper with %d currencies", len(scraper.currenciesMap)) 301 err := scraper.UpdateQuotation() 302 if err != nil { 303 log.Error(err) 304 } 305 306 // Start main loop with ticker 307 log.Infof("Starting main loop") 308 for { 309 select { 310 case <-scraper.ticker.C: 311 err := scraper.UpdateQuotation() 312 if err != nil { 313 log.Error(err) 314 } 315 case <-scraper.foreignScrapper.shutdown: // user requested shutdown 316 log.Printf("%s scraper shutting down", yahooFinSource) 317 return 318 } 319 } 320 } 321 322 // Fetches chart data for a given symbol 323 func (scraper *YahooFinScraper) fetchChartData(symbol string) (chart []yahooFinV8HttpChartRespResult, err error) { 324 325 // Prepare the request 326 reqUrl := yahooFinHttpV10Host + yahooFinV8HttpPathChart + "/" + symbol 327 req, err := http.NewRequest("GET", reqUrl, nil) 328 if err != nil { 329 log.Errorf("Error %s", err) 330 return chart, err 331 } 332 333 // Add URL query parameters and encode them 334 q := url.Values{} 335 q.Add("interval", scraper.updateInterval) 336 q.Add("range", scraper.updateRange) 337 req.URL.RawQuery = q.Encode() 338 // Make the request 339 client := &http.Client{} 340 resp, err := client.Do(req) 341 if err != nil { 342 log.Errorf("Error, cannot fetch %s", req.URL.String()) 343 return chart, err 344 } 345 346 // Reads the response 347 body, err := ioutil.ReadAll(resp.Body) 348 if err != nil { 349 log.Errorf("Error, cannot read the chart data response") 350 return chart, err 351 } 352 353 // Parse the response as json 354 var chartReq yahooFinV8HttpChartResp 355 err = json.Unmarshal(body, &chartReq) 356 if err != nil { 357 log.Errorf("Error, cannot unmarshall chart data") 358 return chart, err 359 } 360 361 return chartReq.Chart.Result, nil 362 } 363 364 // Crawl currencies webpage and return a slice of currency metadata 365 func yahooCrawlCurrencies() (currencies []yahooFinWebCurrency, err error) { 366 367 // Instantiate default cralwer collector 368 c := colly.NewCollector() 369 370 c.OnResponse(func(r *colly.Response) { 371 if r.StatusCode == 200 { 372 log.Debugf("%d: %s", r.StatusCode, r.Request.URL) 373 } else { 374 log.Debugf("%d: %s", r.StatusCode, r.Request.URL) 375 } 376 }) 377 378 c.OnError(func(r *colly.Response, err error) { 379 log.Errorln(err) 380 }) 381 382 // When a HTML element is found with the given selector 383 c.OnHTML("#list-res-table > div > table > tbody", func(e *colly.HTMLElement) { 384 e.ForEach("tr", func(_ int, el *colly.HTMLElement) { 385 symbol := el.ChildText("td:nth-child(1)") 386 name := el.ChildText("td:nth-child(2)") 387 if (len(symbol) == 5 || len(symbol) == 8) && symbol[len(symbol)-2:] == "=X" { 388 if len(symbol) == 5 { 389 symbol = "USD" + symbol 390 } 391 if nameSplit := strings.Split(name, "/"); len(nameSplit) != 2 { 392 log.Errorf("Cannot parse name %s", name) 393 return 394 } 395 currency := yahooFinWebCurrency{ 396 Symbol: symbol, 397 Name: strings.Split(name, "/")[0] + "-" + strings.Split(name, "/")[1], 398 } 399 log.Debugf("- %s: %s", currency.Name, currency.Symbol) 400 currencies = append(currencies, currency) 401 } else { 402 log.Warnf("Warning, cannot parse symbol %s", symbol) 403 } 404 }) 405 }) 406 407 // Visit the currencies webpage 408 log.Printf("Crawling currencies metadata") 409 err = c.Visit(yahooFinWebCurrencies) 410 if err != nil { 411 return currencies, err 412 } 413 414 return currencies, nil 415 }