github.com/maenmax/kairep@v0.0.0-20210218001208-55bf3df36788/src/golang.org/x/tour/solutions/webcrawler.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 package main 8 9 import ( 10 "errors" 11 "fmt" 12 "sync" 13 ) 14 15 type Fetcher interface { 16 // Fetch returns the body of URL and 17 // a slice of URLs found on that page. 18 Fetch(url string) (body string, urls []string, err error) 19 } 20 21 // fetched tracks URLs that have been (or are being) fetched. 22 // The lock must be held while reading from or writing to the map. 23 // See http://golang.org/ref/spec#Struct_types section on embedded types. 24 var fetched = struct { 25 m map[string]error 26 sync.Mutex 27 }{m: make(map[string]error)} 28 29 var loading = errors.New("url load in progress") // sentinel value 30 31 // Crawl uses fetcher to recursively crawl 32 // pages starting with url, to a maximum of depth. 33 func Crawl(url string, depth int, fetcher Fetcher) { 34 if depth <= 0 { 35 fmt.Printf("<- Done with %v, depth 0.\n", url) 36 return 37 } 38 39 fetched.Lock() 40 if _, ok := fetched.m[url]; ok { 41 fetched.Unlock() 42 fmt.Printf("<- Done with %v, already fetched.\n", url) 43 return 44 } 45 // We mark the url to be loading to avoid others reloading it at the same time. 46 fetched.m[url] = loading 47 fetched.Unlock() 48 49 // We load it concurrently. 50 body, urls, err := fetcher.Fetch(url) 51 52 // And update the status in a synced zone. 53 fetched.Lock() 54 fetched.m[url] = err 55 fetched.Unlock() 56 57 if err != nil { 58 fmt.Printf("<- Error on %v: %v\n", url, err) 59 return 60 } 61 fmt.Printf("Found: %s %q\n", url, body) 62 done := make(chan bool) 63 for i, u := range urls { 64 fmt.Printf("-> Crawling child %v/%v of %v : %v.\n", i, len(urls), url, u) 65 go func(url string) { 66 Crawl(url, depth-1, fetcher) 67 done <- true 68 }(u) 69 } 70 for i, u := range urls { 71 fmt.Printf("<- [%v] %v/%v Waiting for child %v.\n", url, i, len(urls), u) 72 <-done 73 } 74 fmt.Printf("<- Done with %v\n", url) 75 } 76 77 func main() { 78 Crawl("http://golang.org/", 4, fetcher) 79 80 fmt.Println("Fetching stats\n--------------") 81 for url, err := range fetched.m { 82 if err != nil { 83 fmt.Printf("%v failed: %v\n", url, err) 84 } else { 85 fmt.Printf("%v was fetched\n", url) 86 } 87 } 88 } 89 90 // fakeFetcher is Fetcher that returns canned results. 91 type fakeFetcher map[string]*fakeResult 92 93 type fakeResult struct { 94 body string 95 urls []string 96 } 97 98 func (f *fakeFetcher) Fetch(url string) (string, []string, error) { 99 if res, ok := (*f)[url]; ok { 100 return res.body, res.urls, nil 101 } 102 return "", nil, fmt.Errorf("not found: %s", url) 103 } 104 105 // fetcher is a populated fakeFetcher. 106 var fetcher = &fakeFetcher{ 107 "http://golang.org/": &fakeResult{ 108 "The Go Programming Language", 109 []string{ 110 "http://golang.org/pkg/", 111 "http://golang.org/cmd/", 112 }, 113 }, 114 "http://golang.org/pkg/": &fakeResult{ 115 "Packages", 116 []string{ 117 "http://golang.org/", 118 "http://golang.org/cmd/", 119 "http://golang.org/pkg/fmt/", 120 "http://golang.org/pkg/os/", 121 }, 122 }, 123 "http://golang.org/pkg/fmt/": &fakeResult{ 124 "Package fmt", 125 []string{ 126 "http://golang.org/", 127 "http://golang.org/pkg/", 128 }, 129 }, 130 "http://golang.org/pkg/os/": &fakeResult{ 131 "Package os", 132 []string{ 133 "http://golang.org/", 134 "http://golang.org/pkg/", 135 }, 136 }, 137 }