github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/talks/2013/advconc/dedupermain/dedupermain.go (about) 1 // +build OMIT 2 3 // dedupermain runs the Subscribe example with several duplicate 4 // subscriptions to demonstrate deduping. 5 package main 6 7 import ( 8 "fmt" 9 "math/rand" 10 "time" 11 ) 12 13 // STARTITEM OMIT 14 // An Item is a stripped-down RSS item. 15 type Item struct{ Title, Channel, GUID string } 16 17 // STOPITEM OMIT 18 19 // STARTFETCHER OMIT 20 // A Fetcher fetches Items and returns the time when the next fetch should be 21 // attempted. On failure, Fetch returns a non-nil error. 22 type Fetcher interface { 23 Fetch() (items []Item, next time.Time, err error) 24 } 25 26 // STOPFETCHER OMIT 27 28 // STARTSUBSCRIPTION OMIT 29 // A Subscription delivers Items over a channel. Close cancels the 30 // subscription, closes the Updates channel, and returns the last fetch error, 31 // if any. 32 type Subscription interface { 33 Updates() <-chan Item 34 Close() error 35 } 36 37 // STOPSUBSCRIPTION OMIT 38 39 // STARTSUBSCRIBE OMIT 40 // Subscribe returns a new Subscription that uses fetcher to fetch Items. 41 func Subscribe(fetcher Fetcher) Subscription { 42 s := &sub{ 43 fetcher: fetcher, 44 updates: make(chan Item), // for Updates 45 closing: make(chan chan error), // for Close 46 } 47 go s.loop() 48 return s 49 } 50 51 // STOPSUBSCRIBE OMIT 52 53 // sub implements the Subscription interface. 54 type sub struct { 55 fetcher Fetcher // fetches items 56 updates chan Item // sends items to the user 57 closing chan chan error // for Close 58 } 59 60 // STARTUPDATES OMIT 61 func (s *sub) Updates() <-chan Item { 62 return s.updates 63 } 64 65 // STOPUPDATES OMIT 66 67 // STARTCLOSE OMIT 68 // STARTCLOSESIG OMIT 69 func (s *sub) Close() error { 70 // STOPCLOSESIG OMIT 71 errc := make(chan error) 72 s.closing <- errc // HLchan 73 return <-errc // HLchan 74 } 75 76 // STOPCLOSE OMIT 77 78 // loopCloseOnly is a version of loop that includes only the logic 79 // that handles Close. 80 func (s *sub) loopCloseOnly() { 81 // STARTCLOSEONLY OMIT 82 var err error // set when Fetch fails 83 for { 84 select { 85 case errc := <-s.closing: // HLchan 86 errc <- err // HLchan 87 close(s.updates) // tells receiver we're done 88 return 89 } 90 } 91 // STOPCLOSEONLY OMIT 92 } 93 94 // loopFetchOnly is a version of loop that includes only the logic 95 // that calls Fetch. 96 func (s *sub) loopFetchOnly() { 97 // STARTFETCHONLY OMIT 98 var pending []Item // appended by fetch; consumed by send 99 var next time.Time // initially January 1, year 0 100 var err error 101 for { 102 var fetchDelay time.Duration // initally 0 (no delay) 103 if now := time.Now(); next.After(now) { 104 fetchDelay = next.Sub(now) 105 } 106 startFetch := time.After(fetchDelay) 107 108 select { 109 case <-startFetch: 110 var fetched []Item 111 fetched, next, err = s.fetcher.Fetch() 112 if err != nil { 113 next = time.Now().Add(10 * time.Second) 114 break 115 } 116 pending = append(pending, fetched...) 117 } 118 } 119 // STOPFETCHONLY OMIT 120 } 121 122 // loopSendOnly is a version of loop that includes only the logic for 123 // sending items to s.updates. 124 func (s *sub) loopSendOnly() { 125 // STARTSENDONLY OMIT 126 var pending []Item // appended by fetch; consumed by send 127 for { 128 var first Item 129 var updates chan Item // HLupdates 130 if len(pending) > 0 { 131 first = pending[0] 132 updates = s.updates // enable send case // HLupdates 133 } 134 135 select { 136 case updates <- first: 137 pending = pending[1:] 138 } 139 } 140 // STOPSENDONLY OMIT 141 } 142 143 // mergedLoop is a version of loop that combines loopCloseOnly, 144 // loopFetchOnly, and loopSendOnly. 145 func (s *sub) mergedLoop() { 146 // STARTFETCHVARS OMIT 147 var pending []Item 148 var next time.Time 149 var err error 150 // STOPFETCHVARS OMIT 151 for { 152 // STARTNOCAP OMIT 153 var fetchDelay time.Duration 154 if now := time.Now(); next.After(now) { 155 fetchDelay = next.Sub(now) 156 } 157 startFetch := time.After(fetchDelay) 158 // STOPNOCAP OMIT 159 var first Item 160 var updates chan Item 161 if len(pending) > 0 { 162 first = pending[0] 163 updates = s.updates // enable send case 164 } 165 166 // STARTSELECT OMIT 167 select { 168 case errc := <-s.closing: // HLcases 169 errc <- err 170 close(s.updates) 171 return 172 // STARTFETCHCASE OMIT 173 case <-startFetch: // HLcases 174 var fetched []Item 175 fetched, next, err = s.fetcher.Fetch() // HLfetch 176 if err != nil { 177 next = time.Now().Add(10 * time.Second) 178 break 179 } 180 pending = append(pending, fetched...) // HLfetch 181 // STOPFETCHCASE OMIT 182 case updates <- first: // HLcases 183 pending = pending[1:] 184 } 185 // STOPSELECT OMIT 186 } 187 } 188 189 // dedupeLoop extends mergedLoop with deduping of fetched items. 190 func (s *sub) dedupeLoop() { 191 const maxPending = 10 192 // STARTSEEN OMIT 193 var pending []Item 194 var next time.Time 195 var err error 196 var seen = make(map[string]bool) // set of item.GUIDs // HLseen 197 // STOPSEEN OMIT 198 for { 199 // STARTCAP OMIT 200 var fetchDelay time.Duration 201 if now := time.Now(); next.After(now) { 202 fetchDelay = next.Sub(now) 203 } 204 var startFetch <-chan time.Time // HLcap 205 if len(pending) < maxPending { // HLcap 206 startFetch = time.After(fetchDelay) // enable fetch case // HLcap 207 } // HLcap 208 // STOPCAP OMIT 209 var first Item 210 var updates chan Item 211 if len(pending) > 0 { 212 first = pending[0] 213 updates = s.updates // enable send case 214 } 215 select { 216 case errc := <-s.closing: 217 errc <- err 218 close(s.updates) 219 return 220 // STARTDEDUPE OMIT 221 case <-startFetch: 222 var fetched []Item 223 fetched, next, err = s.fetcher.Fetch() // HLfetch 224 if err != nil { 225 next = time.Now().Add(10 * time.Second) 226 break 227 } 228 for _, item := range fetched { 229 if !seen[item.GUID] { // HLdupe 230 pending = append(pending, item) // HLdupe 231 seen[item.GUID] = true // HLdupe 232 } // HLdupe 233 } 234 // STOPDEDUPE OMIT 235 case updates <- first: 236 pending = pending[1:] 237 } 238 } 239 } 240 241 // loop periodically fecthes Items, sends them on s.updates, and exits 242 // when Close is called. It extends dedupeLoop with logic to run 243 // Fetch asynchronously. 244 func (s *sub) loop() { 245 const maxPending = 10 246 type fetchResult struct { 247 fetched []Item 248 next time.Time 249 err error 250 } 251 // STARTFETCHDONE OMIT 252 var fetchDone chan fetchResult // if non-nil, Fetch is running // HL 253 // STOPFETCHDONE OMIT 254 var pending []Item 255 var next time.Time 256 var err error 257 var seen = make(map[string]bool) 258 for { 259 var fetchDelay time.Duration 260 if now := time.Now(); next.After(now) { 261 fetchDelay = next.Sub(now) 262 } 263 // STARTFETCHIF OMIT 264 var startFetch <-chan time.Time 265 if fetchDone == nil && len(pending) < maxPending { // HLfetch 266 startFetch = time.After(fetchDelay) // enable fetch case 267 } 268 // STOPFETCHIF OMIT 269 var first Item 270 var updates chan Item 271 if len(pending) > 0 { 272 first = pending[0] 273 updates = s.updates // enable send case 274 } 275 // STARTFETCHASYNC OMIT 276 select { 277 case <-startFetch: // HLfetch 278 fetchDone = make(chan fetchResult, 1) // HLfetch 279 go func() { 280 fetched, next, err := s.fetcher.Fetch() 281 fetchDone <- fetchResult{fetched, next, err} 282 }() 283 case result := <-fetchDone: // HLfetch 284 fetchDone = nil // HLfetch 285 // Use result.fetched, result.next, result.err 286 // STOPFETCHASYNC OMIT 287 fetched := result.fetched 288 next, err = result.next, result.err 289 if err != nil { 290 next = time.Now().Add(10 * time.Second) 291 break 292 } 293 for _, item := range fetched { 294 if id := item.GUID; !seen[id] { // HLdupe 295 pending = append(pending, item) 296 seen[id] = true // HLdupe 297 } 298 } 299 case errc := <-s.closing: 300 errc <- err 301 close(s.updates) 302 return 303 case updates <- first: 304 pending = pending[1:] 305 } 306 } 307 } 308 309 // naiveMerge is a version of Merge that doesn't quite work right. In 310 // particular, the goroutines it starts may block forever on m.updates 311 // if the receiver stops receiving. 312 type naiveMerge struct { 313 subs []Subscription 314 updates chan Item 315 } 316 317 // STARTNAIVEMERGE OMIT 318 func NaiveMerge(subs ...Subscription) Subscription { 319 m := &naiveMerge{ 320 subs: subs, 321 updates: make(chan Item), 322 } 323 // STARTNAIVEMERGELOOP OMIT 324 for _, sub := range subs { 325 go func(s Subscription) { 326 for it := range s.Updates() { 327 m.updates <- it // HL 328 } 329 }(sub) 330 } 331 // STOPNAIVEMERGELOOP OMIT 332 return m 333 } 334 335 // STOPNAIVEMERGE OMIT 336 337 // STARTNAIVEMERGECLOSE OMIT 338 func (m *naiveMerge) Close() (err error) { 339 for _, sub := range m.subs { 340 if e := sub.Close(); err == nil && e != nil { 341 err = e 342 } 343 } 344 close(m.updates) // HL 345 return 346 } 347 348 // STOPNAIVEMERGECLOSE OMIT 349 350 func (m *naiveMerge) Updates() <-chan Item { 351 return m.updates 352 } 353 354 type merge struct { 355 subs []Subscription 356 updates chan Item 357 quit chan struct{} 358 errs chan error 359 } 360 361 // STARTMERGESIG OMIT 362 // Merge returns a Subscription that merges the item streams from subs. 363 // Closing the merged subscription closes subs. 364 func Merge(subs ...Subscription) Subscription { 365 // STOPMERGESIG OMIT 366 m := &merge{ 367 subs: subs, 368 updates: make(chan Item), 369 quit: make(chan struct{}), 370 errs: make(chan error), 371 } 372 // STARTMERGE OMIT 373 for _, sub := range subs { 374 go func(s Subscription) { 375 for { 376 var it Item 377 select { 378 case it = <-s.Updates(): 379 case <-m.quit: // HL 380 m.errs <- s.Close() // HL 381 return // HL 382 } 383 select { 384 case m.updates <- it: 385 case <-m.quit: // HL 386 m.errs <- s.Close() // HL 387 return // HL 388 } 389 } 390 }(sub) 391 } 392 // STOPMERGE OMIT 393 return m 394 } 395 396 func (m *merge) Updates() <-chan Item { 397 return m.updates 398 } 399 400 // STARTMERGECLOSE OMIT 401 func (m *merge) Close() (err error) { 402 close(m.quit) // HL 403 for _ = range m.subs { 404 if e := <-m.errs; e != nil { // HL 405 err = e 406 } 407 } 408 close(m.updates) // HL 409 return 410 } 411 412 // STOPMERGECLOSE OMIT 413 414 // NaiveDedupe converts a stream of Items that may contain duplicates 415 // into one that doesn't. 416 func NaiveDedupe(in <-chan Item) <-chan Item { 417 out := make(chan Item) 418 go func() { 419 seen := make(map[string]bool) 420 for it := range in { 421 if !seen[it.GUID] { 422 // BUG: this send blocks if the 423 // receiver closes the Subscription 424 // and stops receiving. 425 out <- it // HL 426 seen[it.GUID] = true 427 } 428 } 429 close(out) 430 }() 431 return out 432 } 433 434 type deduper struct { 435 s Subscription 436 updates chan Item 437 closing chan chan error 438 } 439 440 // Dedupe converts a Subscription that may send duplicate Items into 441 // one that doesn't. 442 func Dedupe(s Subscription) Subscription { 443 d := &deduper{ 444 s: s, 445 updates: make(chan Item), 446 closing: make(chan chan error), 447 } 448 go d.loop() 449 return d 450 } 451 452 func (d *deduper) loop() { 453 in := d.s.Updates() // enable receive 454 var pending Item 455 var out chan Item // disable send 456 seen := make(map[string]bool) 457 for { 458 select { 459 case it := <-in: 460 if !seen[it.GUID] { 461 pending = it 462 in = nil // disable receive 463 out = d.updates // enable send 464 seen[it.GUID] = true 465 } 466 case out <- pending: 467 in = d.s.Updates() // enable receive 468 out = nil // disable send 469 case errc := <-d.closing: 470 err := d.s.Close() 471 errc <- err 472 close(d.updates) 473 return 474 } 475 } 476 } 477 478 func (d *deduper) Close() error { 479 errc := make(chan error) 480 d.closing <- errc 481 return <-errc 482 } 483 484 func (d *deduper) Updates() <-chan Item { 485 return d.updates 486 } 487 488 // Fetch returns a Fetcher for Items from domain. 489 func Fetch(domain string) Fetcher { 490 return fakeFetch(domain) 491 } 492 493 func fakeFetch(domain string) Fetcher { 494 return &fakeFetcher{channel: domain} 495 } 496 497 type fakeFetcher struct { 498 channel string 499 items []Item 500 } 501 502 // FakeDuplicates causes the fake fetcher to return duplicate items. 503 var FakeDuplicates bool 504 505 func (f *fakeFetcher) Fetch() (items []Item, next time.Time, err error) { 506 now := time.Now() 507 next = now.Add(time.Duration(rand.Intn(5)) * 500 * time.Millisecond) 508 item := Item{ 509 Channel: f.channel, 510 Title: fmt.Sprintf("Item %d", len(f.items)), 511 } 512 item.GUID = item.Channel + "/" + item.Title 513 f.items = append(f.items, item) 514 if FakeDuplicates { 515 items = f.items 516 } else { 517 items = []Item{item} 518 } 519 return 520 } 521 522 func init() { 523 rand.Seed(time.Now().UnixNano()) 524 } 525 526 // STARTMAIN OMIT 527 func main() { 528 // STARTMERGECALL OMIT 529 // Subscribe to some feeds, and create a merged update stream. 530 merged := Dedupe(Merge( 531 Subscribe(Fetch("blog.golang.org")), 532 Subscribe(Fetch("blog.golang.org")), 533 Subscribe(Fetch("blog.golang.org")), 534 Subscribe(Fetch("googleblog.blogspot.com")), 535 Subscribe(Fetch("googledevelopers.blogspot.com")))) 536 // STOPMERGECALL OMIT 537 538 // Close the subscriptions after some time. 539 time.AfterFunc(3*time.Second, func() { 540 fmt.Println("closed:", merged.Close()) 541 }) 542 543 // Print the stream. 544 for it := range merged.Updates() { 545 fmt.Println(it.Channel, it.Title) 546 } 547 548 panic("show me the stacks") 549 } 550 551 // STOPMAIN OMIT