github.com/omniscale/go-osm@v0.3.1/replication/internal/source/source.go (about) 1 package source 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "log" 9 "net" 10 "net/http" 11 "os" 12 "path" 13 "path/filepath" 14 "time" 15 16 "gopkg.in/fsnotify.v1" 17 18 "github.com/omniscale/go-osm/replication" 19 ) 20 21 var isDebug = false 22 23 func debug(v ...interface{}) { 24 if isDebug { 25 log.Println(v...) 26 } 27 } 28 29 type NotAvailable struct { 30 url string 31 } 32 33 func (e *NotAvailable) Error() string { 34 return fmt.Sprintf("File not available: %s", e.url) 35 } 36 37 // N = AAA*1000000 + BBB*1000 + CCC 38 func seqPath(seq int) string { 39 c := seq % 1000 40 b := seq / 1000 % 1000 41 a := seq / 1000000 42 43 return fmt.Sprintf("%03d/%03d/%03d", a, b, c) 44 } 45 46 var _ replication.Source = &downloader{} 47 48 type downloader struct { 49 baseUrl string 50 dest string 51 FileExt string 52 StateExt string 53 lastSequence int 54 StateTime func(string) (time.Time, error) 55 interval time.Duration 56 errWaittime time.Duration 57 naWaittime time.Duration 58 sequences chan replication.Sequence 59 client *http.Client 60 ctx context.Context 61 cancel context.CancelFunc 62 } 63 64 func NewDownloader(dest, url string, seq int, interval time.Duration) *downloader { 65 client := &http.Client{ 66 Transport: &http.Transport{ 67 Proxy: http.ProxyFromEnvironment, 68 Dial: (&net.Dialer{ 69 Timeout: 30 * time.Second, 70 KeepAlive: 1 * time.Second, // do not keep alive till next interval 71 }).Dial, 72 TLSHandshakeTimeout: 10 * time.Second, 73 ResponseHeaderTimeout: 10 * time.Second, 74 ExpectContinueTimeout: 1 * time.Second, 75 }, 76 } 77 78 var naWaittime time.Duration 79 switch { 80 case interval >= 24*time.Hour: 81 naWaittime = 5 * time.Minute 82 case interval >= time.Hour: 83 naWaittime = 60 * time.Second 84 default: 85 naWaittime = 10 * time.Second 86 } 87 88 ctx, cancel := context.WithCancel(context.Background()) 89 dl := &downloader{ 90 baseUrl: url, 91 dest: dest, 92 lastSequence: seq - 1, // we want to start with seq, so lastSequence is -1 93 interval: interval, 94 errWaittime: 60 * time.Second, 95 naWaittime: naWaittime, 96 sequences: make(chan replication.Sequence, 4), 97 client: client, 98 ctx: ctx, 99 cancel: cancel, 100 } 101 102 return dl 103 } 104 105 func (d *downloader) Sequences() <-chan replication.Sequence { 106 return d.sequences 107 } 108 109 func (d *downloader) download(seq int, ext string) error { 110 dest := path.Join(d.dest, seqPath(seq)+ext) 111 url := d.baseUrl + seqPath(seq) + ext 112 debug("[debug] Downloading diff file from ", url) 113 114 if _, err := os.Stat(dest); err == nil { 115 return nil 116 } 117 118 if err := os.MkdirAll(path.Dir(dest), 0755); err != nil { 119 return err 120 } 121 122 req, err := http.NewRequest("GET", url, nil) 123 if err != nil { 124 return err 125 } 126 req.Header.Set("User-Agent", "github.com/omniscale/go-osm") 127 resp, err := d.client.Do(req) 128 if err != nil { 129 return err 130 } 131 132 defer resp.Body.Close() 133 134 if resp.StatusCode == 404 { 135 return &NotAvailable{url} 136 } 137 138 if resp.StatusCode != 200 { 139 return errors.New(fmt.Sprintf("invalid response: %v", resp)) 140 } 141 142 tmpDest := fmt.Sprintf("%s~%d", dest, os.Getpid()) 143 out, err := os.Create(tmpDest) 144 if err != nil { 145 return err 146 } 147 defer out.Close() 148 149 _, err = io.Copy(out, resp.Body) 150 if err != nil { 151 return err 152 } 153 out.Close() 154 155 err = os.Rename(tmpDest, dest) 156 if err != nil { 157 return err 158 } 159 160 return nil 161 } 162 163 // downloadTillSuccess tries to download file till it is available, returns 164 // true if available on first try. 165 func (d *downloader) downloadTillSuccess(ctx context.Context, seq int, ext string) bool { 166 for tries := 0; ; tries++ { 167 if ctx.Err() != nil { 168 return false 169 } 170 err := d.download(seq, ext) 171 if err == nil { 172 return tries == 0 173 } 174 if _, ok := err.(*NotAvailable); ok { 175 wait(ctx, d.naWaittime) 176 } else { 177 debug("[error] Downloading file:", err) 178 d.sequences <- replication.Sequence{ 179 Sequence: seq, 180 Error: err, 181 } 182 wait(ctx, d.errWaittime) 183 } 184 } 185 } 186 187 func wait(ctx context.Context, duration time.Duration) { 188 select { 189 case <-ctx.Done(): 190 case <-time.After(duration): 191 } 192 } 193 194 func (d *downloader) Start() { 195 d.fetchNextLoop() 196 } 197 198 func (d *downloader) Stop() { 199 d.cancel() 200 } 201 202 func (d *downloader) fetchNextLoop() { 203 stateFile := path.Join(d.dest, seqPath(d.lastSequence)+d.StateExt) 204 lastTime, err := d.StateTime(stateFile) 205 for { 206 nextSeq := d.lastSequence + 1 207 debug("[debug] Processing download for sequence", nextSeq) 208 if err == nil { 209 nextDiffTime := lastTime.Add(d.interval) 210 if nextDiffTime.After(time.Now()) { 211 // we catched up and the next diff file is in the future. 212 // wait till last diff time + interval, before fetching next 213 nextDiffTime = lastTime.Add(d.interval + 2*time.Second /* allow small time diff between servers */) 214 waitFor := nextDiffTime.Sub(time.Now()) 215 debug("[debug] Waiting for next download in", waitFor) 216 wait(d.ctx, waitFor) 217 } 218 } 219 // download will retry until they succeed 220 d.downloadTillSuccess(d.ctx, nextSeq, d.StateExt) 221 noWait := d.downloadTillSuccess(d.ctx, nextSeq, d.FileExt) 222 if d.ctx.Err() != nil { 223 close(d.sequences) 224 return 225 } 226 d.lastSequence = nextSeq 227 base := path.Join(d.dest, seqPath(d.lastSequence)) 228 lastTime, _ = d.StateTime(base + d.StateExt) 229 230 var latest bool 231 if noWait { 232 if d.download(nextSeq+1, d.StateExt) == nil { 233 // next sequence is immediately available 234 latest = false 235 } else { 236 // download of next seq failed (404 or error) 237 latest = true 238 } 239 } else { // waited for this seq, so assume it's the latest 240 latest = true 241 } 242 243 d.sequences <- replication.Sequence{ 244 Sequence: d.lastSequence, 245 Filename: base + d.FileExt, 246 StateFilename: base + d.StateExt, 247 Time: lastTime, 248 Latest: latest, 249 } 250 } 251 } 252 253 var _ replication.Source = &reader{} 254 255 type reader struct { 256 dest string 257 FileExt string 258 StateExt string 259 lastSequence int 260 StateTime func(string) (time.Time, error) 261 errWaittime time.Duration 262 sequences chan replication.Sequence 263 ctx context.Context 264 cancel context.CancelFunc 265 } 266 267 func NewReader(dest string, seq int) *reader { 268 ctx, cancel := context.WithCancel(context.Background()) 269 r := &reader{ 270 dest: dest, 271 lastSequence: seq, 272 sequences: make(chan replication.Sequence, 1), 273 errWaittime: 60 * time.Second, 274 ctx: ctx, 275 cancel: cancel, 276 } 277 278 return r 279 } 280 281 func (d *reader) Sequences() <-chan replication.Sequence { 282 return d.sequences 283 } 284 285 func (d *reader) waitTillPresent(ctx context.Context, seq int, ext string) error { 286 filename := path.Join(d.dest, seqPath(seq)+ext) 287 return waitTillPresent(ctx, filename) 288 } 289 290 func (d *reader) Start() { 291 d.fetchNextLoop() 292 } 293 294 func (d *reader) Stop() { 295 d.cancel() 296 } 297 298 func (d *reader) fetchNextLoop() { 299 for { 300 nextSeq := d.lastSequence + 1 301 if err := d.waitTillPresent(d.ctx, nextSeq, d.StateExt); err != nil { 302 d.sequences <- replication.Sequence{ 303 Sequence: nextSeq, 304 Error: err, 305 } 306 wait(d.ctx, d.errWaittime) 307 continue 308 } 309 if err := d.waitTillPresent(d.ctx, nextSeq, d.FileExt); err != nil { 310 d.sequences <- replication.Sequence{ 311 Sequence: nextSeq, 312 Error: err, 313 } 314 wait(d.ctx, d.errWaittime) 315 continue 316 } 317 if d.ctx.Err() != nil { 318 close(d.sequences) 319 return 320 } 321 d.lastSequence = nextSeq 322 base := path.Join(d.dest, seqPath(d.lastSequence)) 323 lastTime, _ := d.StateTime(base + d.StateExt) 324 325 latest := !d.seqIsAvailable(d.lastSequence+1, d.StateExt) 326 d.sequences <- replication.Sequence{ 327 Sequence: d.lastSequence, 328 Filename: base + d.FileExt, 329 StateFilename: base + d.StateExt, 330 Time: lastTime, 331 Latest: latest, 332 } 333 } 334 } 335 336 func (d *reader) seqIsAvailable(seq int, ext string) bool { 337 filename := path.Join(d.dest, seqPath(seq)+ext) 338 _, err := os.Stat(filename) 339 return err == nil 340 } 341 342 // waitTillPresent blocks till file is present. Returns without error if context was canceled. 343 func waitTillPresent(ctx context.Context, filename string) error { 344 if _, err := os.Stat(filename); err == nil { 345 return nil 346 } 347 348 // fsnotify does not work recursive. wait for parent dirs first (e.g. 002/134) 349 parent := filepath.Dir(filename) 350 if err := waitTillPresent(ctx, parent); err != nil { 351 return err 352 } 353 if ctx.Err() != nil { 354 return nil 355 } 356 357 w, err := fsnotify.NewWatcher() 358 if err != nil { 359 return err 360 } 361 defer w.Close() 362 // need to watch on parent if we want to get events for new file 363 w.Add(parent) 364 365 // check again, in case file was created before we added the file 366 if _, err := os.Stat(filename); err == nil { 367 return nil 368 } 369 370 for { 371 select { 372 case <-ctx.Done(): 373 return nil 374 case evt := <-w.Events: 375 if evt.Op&fsnotify.Create == fsnotify.Create && evt.Name == filename { 376 return nil 377 } 378 } 379 } 380 return nil 381 }