github.com/johnathanhowell/sia@v0.5.1-beta.0.20160524050156-83dcc3d37c94/modules/renter/repair.go (about) 1 package renter 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "time" 8 9 "github.com/NebulousLabs/Sia/build" 10 "github.com/NebulousLabs/Sia/modules" 11 "github.com/NebulousLabs/Sia/modules/renter/contractor" 12 "github.com/NebulousLabs/Sia/types" 13 ) 14 15 const ( 16 // repairThreads is the number of repairs that can run concurrently. 17 repairThreads = 10 18 ) 19 20 // When a file contract is within 'renewThreshold' blocks of expiring, the renter 21 // will attempt to renew the contract. 22 var renewThreshold = func() types.BlockHeight { 23 switch build.Release { 24 case "testing": 25 return 10 26 case "dev": 27 return 200 28 default: 29 return 144 * 7 * 3 // 3 weeks - to soon be 6 weeks. 30 } 31 }() 32 33 // hostErr and hostErrs are helpers for reporting repair errors. The actual 34 // Error implementations aren't that important; we just need to be able to 35 // extract the NetAddress of the failed host. 36 37 type hostErr struct { 38 host modules.NetAddress 39 err error 40 } 41 42 func (he hostErr) Error() string { 43 return fmt.Sprintf("host %v failed: %v", he.host, he.err) 44 } 45 46 type hostErrs []*hostErr 47 48 func (hs hostErrs) Error() string { 49 var errs []error 50 for _, h := range hs { 51 errs = append(errs, h) 52 } 53 return build.JoinErrors(errs, "\n").Error() 54 } 55 56 // repair attempts to repair a file chunk by uploading its pieces to more 57 // hosts. 58 func (f *file) repair(chunkIndex uint64, missingPieces []uint64, r io.ReaderAt, hosts []contractor.Editor) error { 59 // read chunk data and encode 60 chunk := make([]byte, f.chunkSize()) 61 _, err := r.ReadAt(chunk, int64(chunkIndex*f.chunkSize())) 62 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { 63 return err 64 } 65 pieces, err := f.erasureCode.Encode(chunk) 66 if err != nil { 67 return err 68 } 69 // encrypt pieces 70 for i := range pieces { 71 key := deriveKey(f.masterKey, chunkIndex, uint64(i)) 72 pieces[i], err = key.EncryptBytes(pieces[i]) 73 if err != nil { 74 return err 75 } 76 } 77 78 // upload one piece per host 79 numPieces := len(missingPieces) 80 if len(hosts) < numPieces { 81 numPieces = len(hosts) 82 } 83 errChan := make(chan *hostErr) 84 for i := 0; i < numPieces; i++ { 85 go func(pieceIndex uint64, host contractor.Editor) { 86 // upload data to host 87 root, err := host.Upload(pieces[pieceIndex]) 88 if err != nil { 89 errChan <- &hostErr{host.Address(), err} 90 return 91 } 92 93 // create contract entry, if necessary 94 f.mu.Lock() 95 contract, ok := f.contracts[host.ContractID()] 96 if !ok { 97 contract = fileContract{ 98 ID: host.ContractID(), 99 IP: host.Address(), 100 WindowStart: host.EndHeight(), 101 } 102 } 103 104 // update contract 105 contract.Pieces = append(contract.Pieces, pieceData{ 106 Chunk: chunkIndex, 107 Piece: pieceIndex, 108 MerkleRoot: root, 109 }) 110 f.contracts[host.ContractID()] = contract 111 f.mu.Unlock() 112 errChan <- nil 113 }(missingPieces[i], hosts[i]) 114 } 115 var errs hostErrs 116 for i := 0; i < numPieces; i++ { 117 err := <-errChan 118 if err != nil { 119 errs = append(errs, err) 120 } 121 } 122 if errs != nil { 123 return errs 124 } 125 126 return nil 127 } 128 129 // incompleteChunks returns a map of chunks containing pieces that have not 130 // been uploaded. 131 func (f *file) incompleteChunks() map[uint64][]uint64 { 132 f.mu.RLock() 133 defer f.mu.RUnlock() 134 135 present := make([][]bool, f.numChunks()) 136 for i := range present { 137 present[i] = make([]bool, f.erasureCode.NumPieces()) 138 } 139 for _, fc := range f.contracts { 140 for _, p := range fc.Pieces { 141 present[p.Chunk][p.Piece] = true 142 } 143 } 144 145 incomplete := make(map[uint64][]uint64) 146 for chunkIndex, pieceBools := range present { 147 for pieceIndex, ok := range pieceBools { 148 if !ok { 149 incomplete[uint64(chunkIndex)] = append(incomplete[uint64(chunkIndex)], uint64(pieceIndex)) 150 } 151 } 152 } 153 return incomplete 154 } 155 156 // chunkHosts returns the hosts storing the given chunk. 157 func (f *file) chunkHosts(chunk uint64) []modules.NetAddress { 158 f.mu.RLock() 159 defer f.mu.RUnlock() 160 161 var old []modules.NetAddress 162 for _, fc := range f.contracts { 163 for _, p := range fc.Pieces { 164 if p.Chunk == chunk { 165 old = append(old, fc.IP) 166 break 167 } 168 } 169 } 170 return old 171 } 172 173 // expiringContracts returns the contracts that will expire soon. 174 // TODO: what if contract has fully expired? 175 func (f *file) expiringContracts(height types.BlockHeight) []fileContract { 176 f.mu.RLock() 177 defer f.mu.RUnlock() 178 179 var expiring []fileContract 180 for _, fc := range f.contracts { 181 if height >= fc.WindowStart-renewThreshold { 182 expiring = append(expiring, fc) 183 } 184 } 185 return expiring 186 } 187 188 // offlineChunks returns the chunks belonging to "offline" hosts -- hosts that 189 // do not meet uptime requirements. Importantly, only chunks missing more than 190 // half their redundancy are returned. 191 func (f *file) offlineChunks(hdb hostDB) map[uint64][]uint64 { 192 f.mu.RLock() 193 defer f.mu.RUnlock() 194 195 // mark all pieces belonging to offline hosts. 196 offline := make(map[uint64][]uint64) 197 for _, fc := range f.contracts { 198 if hdb.IsOffline(fc.IP) { 199 for _, p := range fc.Pieces { 200 offline[p.Chunk] = append(offline[p.Chunk], p.Piece) 201 } 202 } 203 } 204 // filter out chunks missing less than half of their redundancy 205 filtered := make(map[uint64][]uint64) 206 for chunk, pieces := range offline { 207 if len(pieces) > f.erasureCode.NumPieces()/2 { 208 filtered[chunk] = pieces 209 } 210 } 211 return filtered 212 } 213 214 // threadedRepairLoop improves the health of files tracked by the renter by 215 // reuploading their missing pieces. Multiple repair attempts may be necessary 216 // before the file reaches full redundancy. 217 func (r *Renter) threadedRepairLoop() { 218 for { 219 time.Sleep(5 * time.Second) 220 221 if !r.wallet.Unlocked() { 222 continue 223 } 224 225 if len(r.hostContractor.Contracts()) == 0 { 226 // nothing to revise 227 continue 228 } 229 230 // make copy of repair set under lock 231 repairing := make(map[string]trackedFile) 232 id := r.mu.RLock() 233 for name, meta := range r.tracking { 234 repairing[name] = meta 235 } 236 r.mu.RUnlock(id) 237 238 // create host pool 239 pool := r.newHostPool() 240 for name, meta := range repairing { 241 r.threadedRepairFile(name, meta, pool) 242 } 243 pool.Close() // heh 244 } 245 } 246 247 // threadedRepairFile repairs and saves an individual file. 248 func (r *Renter) threadedRepairFile(name string, meta trackedFile, pool *hostPool) { 249 // helper function 250 logAndRemove := func(fmt string, args ...interface{}) { 251 r.log.Printf(fmt, args...) 252 id := r.mu.Lock() 253 delete(r.tracking, name) 254 r.save() 255 r.mu.Unlock(id) 256 } 257 258 id := r.mu.RLock() 259 f, ok := r.files[name] 260 r.mu.RUnlock(id) 261 if !ok { 262 logAndRemove("removing %v from repair set: no longer tracking that file", name) 263 return 264 } 265 266 // determine if there is any work to do 267 incChunks := f.incompleteChunks() 268 if len(incChunks) == 0 { 269 return 270 } 271 272 // open file handle 273 handle, err := os.Open(meta.RepairPath) 274 if err != nil { 275 logAndRemove("removing %v from repair set: %v", name, err) 276 return 277 } 278 defer handle.Close() 279 280 // repair incomplete chunks 281 if len(incChunks) != 0 { 282 r.log.Printf("repairing %v chunks of %v", len(incChunks), f.name) 283 r.repairChunks(f, handle, incChunks, pool) 284 } 285 } 286 287 // repairChunks uploads missing chunks of f to new hosts. 288 func (r *Renter) repairChunks(f *file, handle io.ReaderAt, chunks map[uint64][]uint64, pool *hostPool) { 289 for chunk, pieces := range chunks { 290 // Determine host set. We want one host for each missing piece, and no 291 // repeats of other hosts of this chunk. 292 hosts := pool.uniqueHosts(len(pieces), f.chunkHosts(chunk)) 293 if len(hosts) == 0 { 294 r.log.Debugf("aborting repair of %v: host pool is empty", f.name) 295 return 296 } 297 // upload to new hosts 298 err := f.repair(chunk, pieces, handle, hosts) 299 if err != nil { 300 if he, ok := err.(hostErrs); ok { 301 // if a specific host failed, remove it from the pool 302 for _, h := range he { 303 // only log non-graceful errors 304 if h.err != modules.ErrStopResponse { 305 r.log.Printf("failed to upload to host %v: %v", h.host, h.err) 306 } 307 pool.remove(h.host) 308 } 309 } else { 310 // any other type of error indicates a serious problem 311 r.log.Printf("aborting repair of %v: %v", f.name, err) 312 return 313 } 314 } 315 316 // save the new contract 317 f.mu.RLock() 318 err = r.saveFile(f) 319 f.mu.RUnlock() 320 if err != nil { 321 // If saving failed for this chunk, it will probably fail for the 322 // next chunk as well. Better to try again on the next cycle. 323 r.log.Printf("failed to save repaired file %v: %v", f.name, err) 324 return 325 } 326 } 327 }