github.com/bartle-stripe/trillian@v1.2.1/monitoring/prometheus/etcdiscover/main.go (about) 1 // Copyright 2017 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // The etcdiscover binary monitors etcd to track the set of instances that 16 // support a gRPC service, and updates a file so that Prometheus can track 17 // those instances. 18 package main 19 20 import ( 21 "context" 22 "encoding/json" 23 "flag" 24 "fmt" 25 "io/ioutil" 26 "os" 27 "path" 28 "path/filepath" 29 "strings" 30 "sync" 31 "time" 32 33 "github.com/coreos/etcd/clientv3" 34 etcdnaming "github.com/coreos/etcd/clientv3/naming" 35 "github.com/golang/glog" 36 "github.com/google/trillian/util" 37 "google.golang.org/grpc/naming" 38 ) 39 40 var ( 41 etcdServers = flag.String("etcd_servers", "", "Comma-separated list of etcd servers") 42 etcdServices = flag.String("etcd_services", "", "Comma-separated list of service names to monitor for endpoints") 43 targetFile = flag.String("target", "", "File to update with service endpoint locations") 44 ) 45 46 type serviceInstanceInfo struct { 47 servers []string 48 services []string 49 target string 50 51 mu sync.RWMutex 52 watcher map[string]naming.Watcher // nolint: megacheck 53 instances map[string]map[string]bool 54 } 55 56 func newServiceInstanceInfo(etcdServers, etcdServices, target string) *serviceInstanceInfo { 57 s := serviceInstanceInfo{ 58 servers: strings.Split(etcdServers, ","), 59 services: strings.Split(etcdServices, ","), 60 watcher: make(map[string]naming.Watcher), // nolint: megacheck 61 target: target, 62 instances: make(map[string]map[string]bool), 63 } 64 for _, service := range s.services { 65 s.instances[service] = make(map[string]bool) 66 } 67 return &s 68 } 69 70 // Watch starts a collection of goroutines (one per service) that monitor etcd for 71 // changes in the endpoints serving the services. Blocks until Close() called. 72 func (s *serviceInstanceInfo) Watch() { 73 var wg sync.WaitGroup 74 for _, service := range s.services { 75 wg.Add(1) 76 go func(service string) { 77 defer wg.Done() 78 s.watchService(service) 79 }(service) 80 } 81 wg.Wait() 82 } 83 84 // Close terminates monitoring. 85 func (s *serviceInstanceInfo) Close() { 86 s.mu.Lock() 87 defer s.mu.Unlock() 88 for _, service := range s.services { 89 glog.Infof("close watcher for %s", service) 90 if s.watcher[service] != nil { 91 s.watcher[service].Close() 92 } 93 } 94 } 95 96 type prometheusJobInfo struct { 97 Targets []string `json:"targets,omitempty"` 98 Labels map[string]string `json:"labels,omitempty"` 99 } 100 101 // Export produces a JSON format description of the services and their endpoints 102 // in a format suitable for use as Prometheus targets. 103 func (s *serviceInstanceInfo) Export() ([]byte, error) { 104 s.mu.RLock() 105 defer s.mu.RUnlock() 106 jobs := make([]*prometheusJobInfo, len(s.services)) 107 for i, service := range s.services { 108 info := prometheusJobInfo{ 109 Labels: map[string]string{"job": service}, 110 } 111 for endpoint, present := range s.instances[service] { 112 if present { 113 info.Targets = append(info.Targets, endpoint) 114 } 115 } 116 jobs[i] = &info 117 } 118 return json.MarshalIndent(jobs, "", "\t") 119 } 120 121 // Update updates the target file with the current state. 122 func (s *serviceInstanceInfo) Update() { 123 jsonData, err := s.Export() 124 if err != nil { 125 glog.Errorf("failed to export JSON data: %v", err) 126 return 127 } 128 if s.target == "" { 129 fmt.Printf("State:\n%s\n", jsonData) 130 return 131 } 132 glog.V(1).Infof("Writing current state:\n%s", string(jsonData)) 133 134 // Write to a temporary file. 135 tempFile, err := ioutil.TempFile(filepath.Dir(s.target), "pending-"+path.Base(s.target)) 136 if err != nil { 137 glog.Errorf("failed to create tempfile: %v", err) 138 return 139 } 140 if _, err := tempFile.Write(jsonData); err != nil { 141 glog.Errorf("failed to write JSON data to tempfile %q: %v", tempFile.Name(), err) 142 } 143 tempFile.Close() 144 145 // Rename the temporary file to the target so it is updated more atomically. 146 if err := os.Rename(tempFile.Name(), s.target); err != nil { 147 glog.Errorf("failed to rename tempfile %q to %q: %v", tempFile.Name(), s.target, err) 148 } 149 } 150 151 func (s *serviceInstanceInfo) watchService(service string) { 152 cfg := clientv3.Config{Endpoints: s.servers, DialTimeout: 5 * time.Second} 153 client, err := clientv3.New(cfg) 154 if err != nil { 155 glog.Exitf("Failed to connect to etcd at %v: %v", s.servers, err) 156 } 157 res := &etcdnaming.GRPCResolver{Client: client} 158 watcher, err := res.Resolve(service) 159 if err != nil { 160 glog.Exitf("Failed to watch %s for updates: %v", service, err) 161 } 162 163 // Save the watcher so external code can Close() it. 164 s.mu.Lock() 165 s.watcher[service] = watcher 166 s.mu.Unlock() 167 168 for { 169 updates, err := watcher.Next() 170 if err != nil { 171 glog.Errorf("Failed on Next(): %v", err) 172 return 173 } 174 for _, update := range updates { 175 switch update.Op { 176 case naming.Add: 177 glog.V(1).Infof("Add(%s, +%s)", service, update.Addr) 178 s.mu.Lock() 179 s.instances[service][update.Addr] = true 180 s.mu.Unlock() 181 case naming.Delete: 182 glog.V(1).Infof("Delete(%s, -%s)", service, update.Addr) 183 s.mu.Lock() 184 s.instances[service][update.Addr] = false 185 s.mu.Unlock() 186 } 187 } 188 s.Update() 189 } 190 } 191 192 func main() { 193 flag.Parse() 194 defer glog.Flush() 195 196 if *etcdServers == "" { 197 glog.Exitf("No etcd servers configured with --etcd_servers") 198 } 199 if *etcdServices == "" { 200 glog.Exitf("No etcd services configured with --etcd_services") 201 } 202 203 state := newServiceInstanceInfo(*etcdServers, *etcdServices, *targetFile) 204 ctx, cancel := context.WithCancel(context.Background()) 205 defer cancel() 206 go util.AwaitSignal(ctx, func() { 207 state.Close() 208 }) 209 state.Watch() 210 }