github.com/zhiqiangxu/util@v0.0.0-20230112053021-0a7aee056cd5/deadlock/detector.go (about) 1 package deadlock 2 3 import ( 4 "runtime" 5 "sync" 6 7 "github.com/petermattis/goid" 8 "github.com/zhiqiangxu/util" 9 ) 10 11 type detector struct { 12 mu sync.Mutex 13 ownerResouces map[int64]map[uint64]bool 14 resouceOwners map[uint64]*resourceOwner 15 waitForMap map[int64]*waitForResource 16 } 17 18 type resourceOwner struct { 19 wgid int64 20 rgids map[int64]int 21 } 22 23 type waitForResource struct { 24 resourceID uint64 25 w bool 26 } 27 28 func newDetector() *detector { 29 return &detector{ 30 ownerResouces: make(map[int64]map[uint64]bool), 31 resouceOwners: make(map[uint64]*resourceOwner), 32 waitForMap: make(map[int64]*waitForResource), 33 } 34 } 35 36 func (d *detector) onAcquiredLocked(resourceID uint64, w bool) { 37 gid := goid.Get() 38 39 d.mu.Lock() 40 defer d.mu.Unlock() 41 42 // update ownerResouces 43 ownedResources := d.ownerResouces[gid] 44 if ownedResources == nil { 45 ownedResources = make(map[uint64]bool) 46 d.ownerResouces[gid] = ownedResources 47 } 48 ownedResources[resourceID] = w 49 50 // update resouceOwners 51 resourceOwners := d.resouceOwners[resourceID] 52 if resourceOwners == nil { 53 resourceOwners = &resourceOwner{} 54 d.resouceOwners[resourceID] = resourceOwners 55 } 56 if w { 57 if resourceOwners.wgid != 0 { 58 panic("write lock holding by more than one owners") 59 } 60 resourceOwners.wgid = gid 61 } else { 62 rgids := resourceOwners.rgids 63 if rgids == nil { 64 rgids = make(map[int64]int) 65 resourceOwners.rgids = rgids 66 } 67 rgids[gid]++ 68 } 69 70 // update waitForMap 71 delete(d.waitForMap, gid) 72 } 73 74 // ErrorDeadlock contains deadlock info 75 type ErrorDeadlock struct { 76 SourceParty Party 77 OwnerParty Party 78 Stack string 79 } 80 81 // ErrorUsage for incorrect lock usage 82 type ErrorUsage struct { 83 Msg string 84 Stack string 85 } 86 87 // Party for one side of deadlock 88 type Party struct { 89 GID int64 90 ResourceID uint64 91 W bool 92 } 93 94 func getCallStack() string { 95 const size = 64 << 10 96 buf := make([]byte, size) 97 buf = buf[:runtime.Stack(buf, false)] 98 return util.String(buf) 99 } 100 101 // ParsePanicError returns non nil ErrorDeadlock if deadlock happend 102 // the ErrorUsage is non nil for lock usage problems 103 func ParsePanicError(panicErr interface{}) (edl *ErrorDeadlock, errUsage *ErrorUsage) { 104 if panicErr == nil { 105 return 106 } 107 108 if panicErrStr, ok := panicErr.(string); ok { 109 errUsage = &ErrorUsage{ 110 Msg: panicErrStr, 111 Stack: getCallStack(), 112 } 113 return 114 } 115 116 if panicErrDL, ok := panicErr.(*ErrorDeadlock); ok { 117 panicErrDL.Stack = getCallStack() 118 edl = panicErrDL 119 return 120 } 121 122 panic("bug happened") 123 } 124 125 func (d *detector) onWaitLocked(resourceID uint64, w bool) { 126 gid := goid.Get() 127 128 d.mu.Lock() 129 defer d.mu.Unlock() 130 131 if d.waitForMap[gid] != nil { 132 panic("waiting for multiple resources") 133 } 134 135 resourceOwners := d.resouceOwners[resourceID] 136 if resourceOwners == nil { 137 panic("waiting for a resource with no owner") 138 } 139 if resourceOwners.wgid == 0 && len(resourceOwners.rgids) == 0 { 140 panic("waiting for a resource with no owner") 141 } 142 143 // detect deadlock 144 var err *ErrorDeadlock 145 // check deadlock with write lock owner 146 if resourceOwners.wgid != 0 { 147 err = d.doDetect(gid, resourceOwners.wgid) 148 if err != nil { 149 err.OwnerParty = Party{GID: resourceOwners.wgid, ResourceID: resourceID, W: true} 150 panic(err) 151 } 152 } 153 // check deadlock with read lock owner 154 for rgid := range resourceOwners.rgids { 155 err = d.doDetect(gid, rgid) 156 if err != nil { 157 err.OwnerParty = Party{GID: rgid, ResourceID: resourceID, W: false} 158 panic(err) 159 } 160 } 161 162 d.waitForMap[gid] = &waitForResource{resourceID: resourceID, w: w} 163 } 164 165 func (d *detector) doDetect(sourceGID, ownerGID int64) (err *ErrorDeadlock) { 166 waitingForResource := d.waitForMap[ownerGID] 167 if waitingForResource == nil { 168 return 169 } 170 171 resourceOwners := d.resouceOwners[waitingForResource.resourceID] 172 if resourceOwners == nil || (resourceOwners.wgid == 0 && len(resourceOwners.rgids) == 0) { 173 panic("waiting for a resource with no owner") 174 } 175 176 if resourceOwners.wgid != 0 { 177 if resourceOwners.wgid == sourceGID { 178 err = &ErrorDeadlock{SourceParty: Party{ 179 GID: sourceGID, 180 ResourceID: waitingForResource.resourceID, 181 W: true, 182 }} 183 return 184 } 185 err = d.doDetect(sourceGID, resourceOwners.wgid) 186 if err != nil { 187 return 188 } 189 } 190 191 for rgid := range resourceOwners.rgids { 192 if rgid == sourceGID { 193 err = &ErrorDeadlock{ 194 SourceParty: Party{ 195 GID: sourceGID, 196 ResourceID: waitingForResource.resourceID, 197 W: false, 198 }} 199 return 200 } 201 err = d.doDetect(sourceGID, rgid) 202 if err != nil { 203 return 204 } 205 } 206 return 207 } 208 209 func (d *detector) onReleaseLocked(resourceID uint64, w bool) { 210 gid := goid.Get() 211 212 d.mu.Lock() 213 defer d.mu.Unlock() 214 215 // update ownerResouces 216 ownedResources := d.ownerResouces[gid] 217 if ownedResources == nil { 218 panic("releasing a lock not owned") 219 } 220 if _, exists := ownedResources[resourceID]; !exists { 221 panic("releasing a lock not owned") 222 } 223 delete(ownedResources, resourceID) 224 if len(ownedResources) == 0 { 225 delete(d.ownerResouces, gid) 226 } 227 228 // update resouceOwners 229 resourceOwners := d.resouceOwners[resourceID] 230 if resourceOwners == nil { 231 panic("releasing a lock not owned") 232 } 233 if w { 234 if resourceOwners.wgid != gid { 235 panic("releasing a lock not owned") 236 } 237 resourceOwners.wgid = 0 238 if len(resourceOwners.rgids) == 0 { 239 delete(d.resouceOwners, resourceID) 240 } 241 } else { 242 if _, exists := resourceOwners.rgids[gid]; !exists { 243 panic("releasing a lock not owned") 244 } 245 246 resourceOwners.rgids[gid]-- 247 if resourceOwners.rgids[gid] == 0 { 248 delete(resourceOwners.rgids, gid) 249 if len(resourceOwners.rgids) == 0 && resourceOwners.wgid == 0 { 250 delete(d.resouceOwners, resourceID) 251 } 252 } else if resourceOwners.rgids[gid] < 0 { 253 panic("releasing a read lock too many times") 254 } 255 } 256 } 257 258 var d *detector 259 260 func init() { 261 d = newDetector() 262 }