github.com/diadata-org/diadata@v1.4.593/pkg/dia/scraper/foreign-scrapers/YahooFinanceScraper.go (about)

     1  package foreignscrapers
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"io/ioutil"
     7  	"net/http"
     8  	"net/url"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	models "github.com/diadata-org/diadata/pkg/model"
    14  	"github.com/diadata-org/diadata/pkg/utils"
    15  	"github.com/gocolly/colly"
    16  	log "github.com/sirupsen/logrus"
    17  )
    18  
    19  type YahooFinScraper struct {
    20  	ticker          *time.Ticker
    21  	foreignScrapper ForeignScraper
    22  	updateRange     string
    23  	updateInterval  string
    24  	currenciesMap   map[string]string
    25  }
    26  
    27  type yahooFinWebCurrency struct {
    28  	Symbol string
    29  	Name   string
    30  }
    31  
    32  type yahooFinV6HttpQuoteResp struct {
    33  	QuoteResponse struct {
    34  		Result []struct {
    35  			Symbol             string  `json:"symbol"`
    36  			RegularMarketPrice float64 `json:"regularMarketPrice"`
    37  			RegularMarketTime  int     `json:"regularMarketTime"`
    38  		} `json:"result"`
    39  		Error interface{} `json:"error"`
    40  	} `json:"quoteResponse"`
    41  }
    42  
    43  type yahooFinV8HttpChartRespResult struct {
    44  	Meta struct {
    45  		Currency             string  `json:"currency"`
    46  		Symbol               string  `json:"symbol"`
    47  		ExchangeName         string  `json:"exchangeName"`
    48  		InstrumentType       string  `json:"instrumentType"`
    49  		FirstTradeDate       int     `json:"firstTradeDate"`
    50  		RegularMarketTime    int     `json:"regularMarketTime"`
    51  		Gmtoffset            int     `json:"gmtoffset"`
    52  		Timezone             string  `json:"timezone"`
    53  		ExchangeTimezoneName string  `json:"exchangeTimezoneName"`
    54  		RegularMarketPrice   float64 `json:"regularMarketPrice"`
    55  		ChartPreviousClose   float64 `json:"chartPreviousClose"`
    56  		PreviousClose        float64 `json:"previousClose"`
    57  		Scale                int     `json:"scale"`
    58  		PriceHint            int     `json:"priceHint"`
    59  		CurrentTradingPeriod struct {
    60  			Pre struct {
    61  				Timezone  string `json:"timezone"`
    62  				Start     int    `json:"start"`
    63  				End       int    `json:"end"`
    64  				Gmtoffset int    `json:"gmtoffset"`
    65  			} `json:"pre"`
    66  			Regular struct {
    67  				Timezone  string `json:"timezone"`
    68  				Start     int    `json:"start"`
    69  				End       int    `json:"end"`
    70  				Gmtoffset int    `json:"gmtoffset"`
    71  			} `json:"regular"`
    72  			Post struct {
    73  				Timezone  string `json:"timezone"`
    74  				Start     int    `json:"start"`
    75  				End       int    `json:"end"`
    76  				Gmtoffset int    `json:"gmtoffset"`
    77  			} `json:"post"`
    78  		} `json:"currentTradingPeriod"`
    79  		TradingPeriods [][]struct {
    80  			Timezone  string `json:"timezone"`
    81  			Start     int    `json:"start"`
    82  			End       int    `json:"end"`
    83  			Gmtoffset int    `json:"gmtoffset"`
    84  		} `json:"tradingPeriods"`
    85  		DataGranularity string   `json:"dataGranularity"`
    86  		Range           string   `json:"range"`
    87  		ValidRanges     []string `json:"validRanges"`
    88  	} `json:"meta"`
    89  	Timestamp  []int `json:"timestamp"`
    90  	Indicators struct {
    91  		Quote []struct {
    92  			Volume []float64 `json:"volume"`
    93  			Open   []float64 `json:"open"`
    94  			High   []float64 `json:"high"`
    95  			Low    []float64 `json:"low"`
    96  			Close  []float64 `json:"close"`
    97  		} `json:"quote"`
    98  	} `json:"indicators"`
    99  }
   100  
   101  type yahooFinV8HttpChartResp struct {
   102  	Chart struct {
   103  		Result []yahooFinV8HttpChartRespResult `json:"result"`
   104  		Error  string                          `json:"error"`
   105  	} `json:"chart"`
   106  }
   107  
   108  const (
   109  	yahooFinSource            = "YahooFinance"
   110  	yahooFinUpdateFreqDefault = 60 * 2 // Default update frequency (in seconds)
   111  	yahooFinUpdateFreqEnv     = "YAHOOFIN_UPDATE_FREQ"
   112  	yahooFinUpdateRangeEnv    = "YAHOOFIN_UPDATE_RANGE"
   113  	yahooFinUpdateIntervalEnv = "YAHOOFIN_UPDATE_INTERVAL"
   114  	yahooFinCurrenciesMapEnv  = "YAHOOFIN_CURRENCIES_MAP"
   115  	yahooFinWebCurrencies     = "https://finance.yahoo.com/currencies"
   116  	yahooFinHttpV10Host       = "https://query1.finance.yahoo.com"
   117  	yahooFinHttpV11Host       = "https://query2.finance.yahoo.com"
   118  	yahooFinTypeCurrency      = "CURRENCY"
   119  	yahooFinV6HttpPathQuote   = "/v6/finance/quote"
   120  	yahooFinV8HttpPathChart   = "/v8/finance/chart"
   121  )
   122  
   123  func NewYahooFinScraper(datastore models.Datastore) (s *YahooFinScraper) {
   124  	foreignScrapper := ForeignScraper{
   125  		shutdown:      make(chan nothing),
   126  		error:         nil,
   127  		datastore:     datastore,
   128  		chanQuotation: make(chan *models.ForeignQuotation),
   129  	}
   130  
   131  	// Define the defaults
   132  	updateRange := "1d"
   133  	log.Infof("Default update range set to %s\n", updateRange)
   134  	updateInterval := "1m"
   135  	log.Infof("Default update interval set to %s\n", updateInterval)
   136  	updateFreq := yahooFinUpdateFreqDefault * time.Second
   137  	log.Infof("Default update frequency set to %d seconds\n", yahooFinUpdateFreqDefault)
   138  	currencyMap := make(map[string]string)
   139  
   140  	// Read env variables and override defaults if needed
   141  	// // TODO: validate range and interval formats (1m, 1h, 1d, 1w, 1mo, 1y)
   142  	yahooFinUpdateRange := utils.Getenv(yahooFinUpdateRangeEnv, "")
   143  	if yahooFinUpdateRange != "" {
   144  		updateRange = yahooFinUpdateRange
   145  		log.Infof("Config set update range to %s\n", updateRange)
   146  	}
   147  	yahooFinUpdateInterval := utils.Getenv(yahooFinUpdateIntervalEnv, "")
   148  	if yahooFinUpdateInterval != "" {
   149  		updateInterval = yahooFinUpdateInterval
   150  		log.Infof("Config set update interval to %s\n", updateInterval)
   151  	}
   152  	yahooFinUpdateFreq, err := strconv.ParseInt(utils.Getenv(yahooFinUpdateFreqEnv, "0"), 10, 64)
   153  	if err != nil {
   154  		log.Errorf("fail to parse %v env variable: %v", yahooFinUpdateFreqEnv, err)
   155  		return
   156  	}
   157  	if yahooFinUpdateFreq != 0 {
   158  		updateFreq = time.Duration(yahooFinUpdateFreq) * time.Second
   159  		log.Infof("Config set update frequency to %f seconds\n", updateFreq.Seconds())
   160  	}
   161  	// Because Yahoo Finance don't have any public endpoint to discover available currency's symbols,
   162  	// we need to scrape webpage to extract metadata. This map is the fallback in case the crawling process fails.
   163  	// Also, some of the pairs are not contained in the webpage. These need to be added to the env var manually.
   164  	// The data was extracted on Jan10 2023, maps the Yahoo Finance symbols to a pair of ISO 4217 friendly format.
   165  	// Use the <YAHOO_SYMBOL>:<AAA-BBB> format separeted by a comma: EURGBP=X:EUR-GBP,CNY=X:USD-CNY
   166  	currenciesList := utils.Getenv(yahooFinCurrenciesMapEnv, "")
   167  	if currenciesList != "" {
   168  		currenciesListSplit := strings.Split(currenciesList, ",")
   169  		if len(currenciesListSplit) > 1 {
   170  			currencyMapDefault := make(map[string]string)
   171  			log.Infof("Config set meta-info for %d currencies\n", len(currenciesListSplit))
   172  			for _, c := range currenciesListSplit {
   173  				currency := strings.Split(c, ":")
   174  				if len(currency) != 2 {
   175  					log.Fatal("currency must have 2 identifier: ", currency)
   176  				}
   177  				symbol := currency[0]
   178  				if symbol[len(symbol)-2:] == "=X" {
   179  					if len(symbol) == 5 {
   180  						symbol = "USD" + symbol
   181  					}
   182  				}
   183  				currencyMapDefault[symbol] = currency[1]
   184  				log.Infof("- %s %s", currency[1], symbol)
   185  
   186  			}
   187  			currencyMap = currencyMapDefault
   188  		}
   189  	}
   190  
   191  	// Crawl currencies webpage to extract metadata
   192  	data, err := yahooCrawlCurrencies()
   193  	if err != nil {
   194  		log.Warnf("Failed to crawl currencies, using default map: %s", err)
   195  	} else {
   196  		log.Infof("Meta information for %d currencies found", len(data))
   197  		for _, currency := range data {
   198  			if _, ok := currencyMap[currency.Symbol]; !ok {
   199  				currencyMap[currency.Symbol] = currency.Name
   200  			}
   201  		}
   202  	}
   203  
   204  	// Create the scraper
   205  	s = &YahooFinScraper{
   206  		ticker:          time.NewTicker(updateFreq),
   207  		updateRange:     updateRange,
   208  		updateInterval:  updateInterval,
   209  		foreignScrapper: foreignScrapper,
   210  		currenciesMap:   currencyMap,
   211  	}
   212  	go s.mainLoop()
   213  
   214  	return s
   215  }
   216  
   217  // Closes any existing connections
   218  func (scraper *YahooFinScraper) Close() error {
   219  	if scraper.foreignScrapper.closed {
   220  		return errors.New("scraper already closed")
   221  	}
   222  	close(scraper.foreignScrapper.shutdown)
   223  	<-scraper.foreignScrapper.shutdownDone
   224  	scraper.foreignScrapper.errorLock.RLock()
   225  	defer scraper.foreignScrapper.errorLock.RUnlock()
   226  	return scraper.foreignScrapper.error
   227  }
   228  
   229  // Returns the channel to which new quotes are pushed
   230  func (scraper *YahooFinScraper) GetQuoteChannel() chan *models.ForeignQuotation {
   231  	return scraper.foreignScrapper.chanQuotation
   232  }
   233  
   234  // Retrieves new coin information from the Yahoo Finance API and stores it to influx
   235  func (scraper *YahooFinScraper) UpdateQuotation() error {
   236  	updateStartTime := time.Now()
   237  
   238  	for k := range scraper.currenciesMap {
   239  		chartDataRes, err := scraper.fetchChartData(k)
   240  		if err != nil {
   241  			log.Error("Error fetching chart data: ", err)
   242  			return err
   243  		}
   244  		for _, result := range chartDataRes {
   245  			symbol := result.Meta.Symbol
   246  			if (len(symbol) == 8 || len(symbol) == 5) && symbol[len(symbol)-2:] == "=X" {
   247  				if _, ok := scraper.currenciesMap[symbol]; !ok {
   248  					if len(symbol) == 5 {
   249  						symbol = "USD" + symbol[len(symbol)-5:]
   250  					}
   251  				}
   252  
   253  				if _, ok := scraper.currenciesMap[symbol]; !ok {
   254  					log.Warnf("Symbol %s not found in the map", symbol)
   255  					continue
   256  				}
   257  				quoteSymbol := scraper.currenciesMap[symbol]
   258  
   259  				// Iterate through data starting at most recent timestamp with non-null price
   260  				// in order to store this one and continue with next pair.
   261  				numData := len(result.Timestamp)
   262  				for i := range result.Timestamp {
   263  					quoteDateTime := time.Unix(int64(result.Timestamp[numData-i-1]), 0)
   264  					quotePrice := result.Indicators.Quote[0].Close[numData-i-1]
   265  					if quotePrice == 0 {
   266  						continue
   267  					}
   268  
   269  					priceYesterday, err := scraper.foreignScrapper.datastore.GetForeignPriceYesterday(quoteSymbol, yahooFinSource)
   270  					if err != nil {
   271  						priceYesterday = 0
   272  					}
   273  					quote := models.ForeignQuotation{
   274  						Symbol:             quoteSymbol,
   275  						Name:               quoteSymbol,
   276  						Price:              quotePrice,
   277  						PriceYesterday:     priceYesterday,
   278  						VolumeYesterdayUSD: 0.0, // Fetched volume data is always 0 (not available)
   279  						Source:             yahooFinSource,
   280  						Time:               quoteDateTime,
   281  					}
   282  					scraper.foreignScrapper.chanQuotation <- &quote
   283  					break
   284  				}
   285  			} else {
   286  				log.Warnf("Warning, the received symbol %s was not parsed, ignoring it", result.Meta.Symbol)
   287  			}
   288  		}
   289  	}
   290  
   291  	updateElapsedTime := time.Since(updateStartTime)
   292  	log.Infof("Quotes updated in %f seconds", updateElapsedTime.Seconds())
   293  	return nil
   294  }
   295  
   296  // Main loop runs in a goroutine until channel s is closed.
   297  func (scraper *YahooFinScraper) mainLoop() {
   298  
   299  	// Update quotes on startup
   300  	log.Infof("Initializing scraper with %d currencies", len(scraper.currenciesMap))
   301  	err := scraper.UpdateQuotation()
   302  	if err != nil {
   303  		log.Error(err)
   304  	}
   305  
   306  	// Start main loop with ticker
   307  	log.Infof("Starting main loop")
   308  	for {
   309  		select {
   310  		case <-scraper.ticker.C:
   311  			err := scraper.UpdateQuotation()
   312  			if err != nil {
   313  				log.Error(err)
   314  			}
   315  		case <-scraper.foreignScrapper.shutdown: // user requested shutdown
   316  			log.Printf("%s scraper shutting down", yahooFinSource)
   317  			return
   318  		}
   319  	}
   320  }
   321  
   322  // Fetches chart data for a given symbol
   323  func (scraper *YahooFinScraper) fetchChartData(symbol string) (chart []yahooFinV8HttpChartRespResult, err error) {
   324  
   325  	// Prepare the request
   326  	reqUrl := yahooFinHttpV10Host + yahooFinV8HttpPathChart + "/" + symbol
   327  	req, err := http.NewRequest("GET", reqUrl, nil)
   328  	if err != nil {
   329  		log.Errorf("Error %s", err)
   330  		return chart, err
   331  	}
   332  
   333  	// Add URL query parameters and encode them
   334  	q := url.Values{}
   335  	q.Add("interval", scraper.updateInterval)
   336  	q.Add("range", scraper.updateRange)
   337  	req.URL.RawQuery = q.Encode()
   338  	// Make the request
   339  	client := &http.Client{}
   340  	resp, err := client.Do(req)
   341  	if err != nil {
   342  		log.Errorf("Error, cannot fetch %s", req.URL.String())
   343  		return chart, err
   344  	}
   345  
   346  	// Reads the response
   347  	body, err := ioutil.ReadAll(resp.Body)
   348  	if err != nil {
   349  		log.Errorf("Error, cannot read the chart data response")
   350  		return chart, err
   351  	}
   352  
   353  	// Parse the response as json
   354  	var chartReq yahooFinV8HttpChartResp
   355  	err = json.Unmarshal(body, &chartReq)
   356  	if err != nil {
   357  		log.Errorf("Error, cannot unmarshall chart data")
   358  		return chart, err
   359  	}
   360  
   361  	return chartReq.Chart.Result, nil
   362  }
   363  
   364  // Crawl currencies webpage and return a slice of currency metadata
   365  func yahooCrawlCurrencies() (currencies []yahooFinWebCurrency, err error) {
   366  
   367  	// Instantiate default cralwer collector
   368  	c := colly.NewCollector()
   369  
   370  	c.OnResponse(func(r *colly.Response) {
   371  		if r.StatusCode == 200 {
   372  			log.Debugf("%d: %s", r.StatusCode, r.Request.URL)
   373  		} else {
   374  			log.Debugf("%d: %s", r.StatusCode, r.Request.URL)
   375  		}
   376  	})
   377  
   378  	c.OnError(func(r *colly.Response, err error) {
   379  		log.Errorln(err)
   380  	})
   381  
   382  	// When a HTML element is found with the given selector
   383  	c.OnHTML("#list-res-table > div > table > tbody", func(e *colly.HTMLElement) {
   384  		e.ForEach("tr", func(_ int, el *colly.HTMLElement) {
   385  			symbol := el.ChildText("td:nth-child(1)")
   386  			name := el.ChildText("td:nth-child(2)")
   387  			if (len(symbol) == 5 || len(symbol) == 8) && symbol[len(symbol)-2:] == "=X" {
   388  				if len(symbol) == 5 {
   389  					symbol = "USD" + symbol
   390  				}
   391  				if nameSplit := strings.Split(name, "/"); len(nameSplit) != 2 {
   392  					log.Errorf("Cannot parse name %s", name)
   393  					return
   394  				}
   395  				currency := yahooFinWebCurrency{
   396  					Symbol: symbol,
   397  					Name:   strings.Split(name, "/")[0] + "-" + strings.Split(name, "/")[1],
   398  				}
   399  				log.Debugf("- %s: %s", currency.Name, currency.Symbol)
   400  				currencies = append(currencies, currency)
   401  			} else {
   402  				log.Warnf("Warning, cannot parse symbol %s", symbol)
   403  			}
   404  		})
   405  	})
   406  
   407  	// Visit the currencies webpage
   408  	log.Printf("Crawling currencies metadata")
   409  	err = c.Visit(yahooFinWebCurrencies)
   410  	if err != nil {
   411  		return currencies, err
   412  	}
   413  
   414  	return currencies, nil
   415  }