github.com/omniscale/go-osm@v0.3.1/replication/internal/source/source.go (about)

     1  package source
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"net"
    10  	"net/http"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"time"
    15  
    16  	"gopkg.in/fsnotify.v1"
    17  
    18  	"github.com/omniscale/go-osm/replication"
    19  )
    20  
    21  var isDebug = false
    22  
    23  func debug(v ...interface{}) {
    24  	if isDebug {
    25  		log.Println(v...)
    26  	}
    27  }
    28  
    29  type NotAvailable struct {
    30  	url string
    31  }
    32  
    33  func (e *NotAvailable) Error() string {
    34  	return fmt.Sprintf("File not available: %s", e.url)
    35  }
    36  
    37  // N = AAA*1000000 + BBB*1000 + CCC
    38  func seqPath(seq int) string {
    39  	c := seq % 1000
    40  	b := seq / 1000 % 1000
    41  	a := seq / 1000000
    42  
    43  	return fmt.Sprintf("%03d/%03d/%03d", a, b, c)
    44  }
    45  
    46  var _ replication.Source = &downloader{}
    47  
    48  type downloader struct {
    49  	baseUrl      string
    50  	dest         string
    51  	FileExt      string
    52  	StateExt     string
    53  	lastSequence int
    54  	StateTime    func(string) (time.Time, error)
    55  	interval     time.Duration
    56  	errWaittime  time.Duration
    57  	naWaittime   time.Duration
    58  	sequences    chan replication.Sequence
    59  	client       *http.Client
    60  	ctx          context.Context
    61  	cancel       context.CancelFunc
    62  }
    63  
    64  func NewDownloader(dest, url string, seq int, interval time.Duration) *downloader {
    65  	client := &http.Client{
    66  		Transport: &http.Transport{
    67  			Proxy: http.ProxyFromEnvironment,
    68  			Dial: (&net.Dialer{
    69  				Timeout:   30 * time.Second,
    70  				KeepAlive: 1 * time.Second, // do not keep alive till next interval
    71  			}).Dial,
    72  			TLSHandshakeTimeout:   10 * time.Second,
    73  			ResponseHeaderTimeout: 10 * time.Second,
    74  			ExpectContinueTimeout: 1 * time.Second,
    75  		},
    76  	}
    77  
    78  	var naWaittime time.Duration
    79  	switch {
    80  	case interval >= 24*time.Hour:
    81  		naWaittime = 5 * time.Minute
    82  	case interval >= time.Hour:
    83  		naWaittime = 60 * time.Second
    84  	default:
    85  		naWaittime = 10 * time.Second
    86  	}
    87  
    88  	ctx, cancel := context.WithCancel(context.Background())
    89  	dl := &downloader{
    90  		baseUrl:      url,
    91  		dest:         dest,
    92  		lastSequence: seq - 1, // we want to start with seq, so lastSequence is -1
    93  		interval:     interval,
    94  		errWaittime:  60 * time.Second,
    95  		naWaittime:   naWaittime,
    96  		sequences:    make(chan replication.Sequence, 4),
    97  		client:       client,
    98  		ctx:          ctx,
    99  		cancel:       cancel,
   100  	}
   101  
   102  	return dl
   103  }
   104  
   105  func (d *downloader) Sequences() <-chan replication.Sequence {
   106  	return d.sequences
   107  }
   108  
   109  func (d *downloader) download(seq int, ext string) error {
   110  	dest := path.Join(d.dest, seqPath(seq)+ext)
   111  	url := d.baseUrl + seqPath(seq) + ext
   112  	debug("[debug] Downloading diff file from ", url)
   113  
   114  	if _, err := os.Stat(dest); err == nil {
   115  		return nil
   116  	}
   117  
   118  	if err := os.MkdirAll(path.Dir(dest), 0755); err != nil {
   119  		return err
   120  	}
   121  
   122  	req, err := http.NewRequest("GET", url, nil)
   123  	if err != nil {
   124  		return err
   125  	}
   126  	req.Header.Set("User-Agent", "github.com/omniscale/go-osm")
   127  	resp, err := d.client.Do(req)
   128  	if err != nil {
   129  		return err
   130  	}
   131  
   132  	defer resp.Body.Close()
   133  
   134  	if resp.StatusCode == 404 {
   135  		return &NotAvailable{url}
   136  	}
   137  
   138  	if resp.StatusCode != 200 {
   139  		return errors.New(fmt.Sprintf("invalid response: %v", resp))
   140  	}
   141  
   142  	tmpDest := fmt.Sprintf("%s~%d", dest, os.Getpid())
   143  	out, err := os.Create(tmpDest)
   144  	if err != nil {
   145  		return err
   146  	}
   147  	defer out.Close()
   148  
   149  	_, err = io.Copy(out, resp.Body)
   150  	if err != nil {
   151  		return err
   152  	}
   153  	out.Close()
   154  
   155  	err = os.Rename(tmpDest, dest)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  // downloadTillSuccess tries to download file till it is available, returns
   164  // true if available on first try.
   165  func (d *downloader) downloadTillSuccess(ctx context.Context, seq int, ext string) bool {
   166  	for tries := 0; ; tries++ {
   167  		if ctx.Err() != nil {
   168  			return false
   169  		}
   170  		err := d.download(seq, ext)
   171  		if err == nil {
   172  			return tries == 0
   173  		}
   174  		if _, ok := err.(*NotAvailable); ok {
   175  			wait(ctx, d.naWaittime)
   176  		} else {
   177  			debug("[error] Downloading file:", err)
   178  			d.sequences <- replication.Sequence{
   179  				Sequence: seq,
   180  				Error:    err,
   181  			}
   182  			wait(ctx, d.errWaittime)
   183  		}
   184  	}
   185  }
   186  
   187  func wait(ctx context.Context, duration time.Duration) {
   188  	select {
   189  	case <-ctx.Done():
   190  	case <-time.After(duration):
   191  	}
   192  }
   193  
   194  func (d *downloader) Start() {
   195  	d.fetchNextLoop()
   196  }
   197  
   198  func (d *downloader) Stop() {
   199  	d.cancel()
   200  }
   201  
   202  func (d *downloader) fetchNextLoop() {
   203  	stateFile := path.Join(d.dest, seqPath(d.lastSequence)+d.StateExt)
   204  	lastTime, err := d.StateTime(stateFile)
   205  	for {
   206  		nextSeq := d.lastSequence + 1
   207  		debug("[debug] Processing download for sequence", nextSeq)
   208  		if err == nil {
   209  			nextDiffTime := lastTime.Add(d.interval)
   210  			if nextDiffTime.After(time.Now()) {
   211  				// we catched up and the next diff file is in the future.
   212  				// wait till last diff time + interval, before fetching next
   213  				nextDiffTime = lastTime.Add(d.interval + 2*time.Second /* allow small time diff between servers */)
   214  				waitFor := nextDiffTime.Sub(time.Now())
   215  				debug("[debug] Waiting for next download in", waitFor)
   216  				wait(d.ctx, waitFor)
   217  			}
   218  		}
   219  		// download will retry until they succeed
   220  		d.downloadTillSuccess(d.ctx, nextSeq, d.StateExt)
   221  		noWait := d.downloadTillSuccess(d.ctx, nextSeq, d.FileExt)
   222  		if d.ctx.Err() != nil {
   223  			close(d.sequences)
   224  			return
   225  		}
   226  		d.lastSequence = nextSeq
   227  		base := path.Join(d.dest, seqPath(d.lastSequence))
   228  		lastTime, _ = d.StateTime(base + d.StateExt)
   229  
   230  		var latest bool
   231  		if noWait {
   232  			if d.download(nextSeq+1, d.StateExt) == nil {
   233  				// next sequence is immediately available
   234  				latest = false
   235  			} else {
   236  				// download of next seq failed (404 or error)
   237  				latest = true
   238  			}
   239  		} else { // waited for this seq, so assume it's the latest
   240  			latest = true
   241  		}
   242  
   243  		d.sequences <- replication.Sequence{
   244  			Sequence:      d.lastSequence,
   245  			Filename:      base + d.FileExt,
   246  			StateFilename: base + d.StateExt,
   247  			Time:          lastTime,
   248  			Latest:        latest,
   249  		}
   250  	}
   251  }
   252  
   253  var _ replication.Source = &reader{}
   254  
   255  type reader struct {
   256  	dest         string
   257  	FileExt      string
   258  	StateExt     string
   259  	lastSequence int
   260  	StateTime    func(string) (time.Time, error)
   261  	errWaittime  time.Duration
   262  	sequences    chan replication.Sequence
   263  	ctx          context.Context
   264  	cancel       context.CancelFunc
   265  }
   266  
   267  func NewReader(dest string, seq int) *reader {
   268  	ctx, cancel := context.WithCancel(context.Background())
   269  	r := &reader{
   270  		dest:         dest,
   271  		lastSequence: seq,
   272  		sequences:    make(chan replication.Sequence, 1),
   273  		errWaittime:  60 * time.Second,
   274  		ctx:          ctx,
   275  		cancel:       cancel,
   276  	}
   277  
   278  	return r
   279  }
   280  
   281  func (d *reader) Sequences() <-chan replication.Sequence {
   282  	return d.sequences
   283  }
   284  
   285  func (d *reader) waitTillPresent(ctx context.Context, seq int, ext string) error {
   286  	filename := path.Join(d.dest, seqPath(seq)+ext)
   287  	return waitTillPresent(ctx, filename)
   288  }
   289  
   290  func (d *reader) Start() {
   291  	d.fetchNextLoop()
   292  }
   293  
   294  func (d *reader) Stop() {
   295  	d.cancel()
   296  }
   297  
   298  func (d *reader) fetchNextLoop() {
   299  	for {
   300  		nextSeq := d.lastSequence + 1
   301  		if err := d.waitTillPresent(d.ctx, nextSeq, d.StateExt); err != nil {
   302  			d.sequences <- replication.Sequence{
   303  				Sequence: nextSeq,
   304  				Error:    err,
   305  			}
   306  			wait(d.ctx, d.errWaittime)
   307  			continue
   308  		}
   309  		if err := d.waitTillPresent(d.ctx, nextSeq, d.FileExt); err != nil {
   310  			d.sequences <- replication.Sequence{
   311  				Sequence: nextSeq,
   312  				Error:    err,
   313  			}
   314  			wait(d.ctx, d.errWaittime)
   315  			continue
   316  		}
   317  		if d.ctx.Err() != nil {
   318  			close(d.sequences)
   319  			return
   320  		}
   321  		d.lastSequence = nextSeq
   322  		base := path.Join(d.dest, seqPath(d.lastSequence))
   323  		lastTime, _ := d.StateTime(base + d.StateExt)
   324  
   325  		latest := !d.seqIsAvailable(d.lastSequence+1, d.StateExt)
   326  		d.sequences <- replication.Sequence{
   327  			Sequence:      d.lastSequence,
   328  			Filename:      base + d.FileExt,
   329  			StateFilename: base + d.StateExt,
   330  			Time:          lastTime,
   331  			Latest:        latest,
   332  		}
   333  	}
   334  }
   335  
   336  func (d *reader) seqIsAvailable(seq int, ext string) bool {
   337  	filename := path.Join(d.dest, seqPath(seq)+ext)
   338  	_, err := os.Stat(filename)
   339  	return err == nil
   340  }
   341  
   342  // waitTillPresent blocks till file is present. Returns without error if context was canceled.
   343  func waitTillPresent(ctx context.Context, filename string) error {
   344  	if _, err := os.Stat(filename); err == nil {
   345  		return nil
   346  	}
   347  
   348  	// fsnotify does not work recursive. wait for parent dirs first (e.g. 002/134)
   349  	parent := filepath.Dir(filename)
   350  	if err := waitTillPresent(ctx, parent); err != nil {
   351  		return err
   352  	}
   353  	if ctx.Err() != nil {
   354  		return nil
   355  	}
   356  
   357  	w, err := fsnotify.NewWatcher()
   358  	if err != nil {
   359  		return err
   360  	}
   361  	defer w.Close()
   362  	// need to watch on parent if we want to get events for new file
   363  	w.Add(parent)
   364  
   365  	// check again, in case file was created before we added the file
   366  	if _, err := os.Stat(filename); err == nil {
   367  		return nil
   368  	}
   369  
   370  	for {
   371  		select {
   372  		case <-ctx.Done():
   373  			return nil
   374  		case evt := <-w.Events:
   375  			if evt.Op&fsnotify.Create == fsnotify.Create && evt.Name == filename {
   376  				return nil
   377  			}
   378  		}
   379  	}
   380  	return nil
   381  }