github.com/zhiqiangxu/util@v0.0.0-20230112053021-0a7aee056cd5/deadlock/detector.go (about)

     1  package deadlock
     2  
     3  import (
     4  	"runtime"
     5  	"sync"
     6  
     7  	"github.com/petermattis/goid"
     8  	"github.com/zhiqiangxu/util"
     9  )
    10  
    11  type detector struct {
    12  	mu            sync.Mutex
    13  	ownerResouces map[int64]map[uint64]bool
    14  	resouceOwners map[uint64]*resourceOwner
    15  	waitForMap    map[int64]*waitForResource
    16  }
    17  
    18  type resourceOwner struct {
    19  	wgid  int64
    20  	rgids map[int64]int
    21  }
    22  
    23  type waitForResource struct {
    24  	resourceID uint64
    25  	w          bool
    26  }
    27  
    28  func newDetector() *detector {
    29  	return &detector{
    30  		ownerResouces: make(map[int64]map[uint64]bool),
    31  		resouceOwners: make(map[uint64]*resourceOwner),
    32  		waitForMap:    make(map[int64]*waitForResource),
    33  	}
    34  }
    35  
    36  func (d *detector) onAcquiredLocked(resourceID uint64, w bool) {
    37  	gid := goid.Get()
    38  
    39  	d.mu.Lock()
    40  	defer d.mu.Unlock()
    41  
    42  	// update ownerResouces
    43  	ownedResources := d.ownerResouces[gid]
    44  	if ownedResources == nil {
    45  		ownedResources = make(map[uint64]bool)
    46  		d.ownerResouces[gid] = ownedResources
    47  	}
    48  	ownedResources[resourceID] = w
    49  
    50  	// update resouceOwners
    51  	resourceOwners := d.resouceOwners[resourceID]
    52  	if resourceOwners == nil {
    53  		resourceOwners = &resourceOwner{}
    54  		d.resouceOwners[resourceID] = resourceOwners
    55  	}
    56  	if w {
    57  		if resourceOwners.wgid != 0 {
    58  			panic("write lock holding by more than one owners")
    59  		}
    60  		resourceOwners.wgid = gid
    61  	} else {
    62  		rgids := resourceOwners.rgids
    63  		if rgids == nil {
    64  			rgids = make(map[int64]int)
    65  			resourceOwners.rgids = rgids
    66  		}
    67  		rgids[gid]++
    68  	}
    69  
    70  	// update waitForMap
    71  	delete(d.waitForMap, gid)
    72  }
    73  
    74  // ErrorDeadlock contains deadlock info
    75  type ErrorDeadlock struct {
    76  	SourceParty Party
    77  	OwnerParty  Party
    78  	Stack       string
    79  }
    80  
    81  // ErrorUsage for incorrect lock usage
    82  type ErrorUsage struct {
    83  	Msg   string
    84  	Stack string
    85  }
    86  
    87  // Party for one side of deadlock
    88  type Party struct {
    89  	GID        int64
    90  	ResourceID uint64
    91  	W          bool
    92  }
    93  
    94  func getCallStack() string {
    95  	const size = 64 << 10
    96  	buf := make([]byte, size)
    97  	buf = buf[:runtime.Stack(buf, false)]
    98  	return util.String(buf)
    99  }
   100  
   101  // ParsePanicError returns non nil ErrorDeadlock if deadlock happend
   102  // the ErrorUsage is non nil for lock usage problems
   103  func ParsePanicError(panicErr interface{}) (edl *ErrorDeadlock, errUsage *ErrorUsage) {
   104  	if panicErr == nil {
   105  		return
   106  	}
   107  
   108  	if panicErrStr, ok := panicErr.(string); ok {
   109  		errUsage = &ErrorUsage{
   110  			Msg:   panicErrStr,
   111  			Stack: getCallStack(),
   112  		}
   113  		return
   114  	}
   115  
   116  	if panicErrDL, ok := panicErr.(*ErrorDeadlock); ok {
   117  		panicErrDL.Stack = getCallStack()
   118  		edl = panicErrDL
   119  		return
   120  	}
   121  
   122  	panic("bug happened")
   123  }
   124  
   125  func (d *detector) onWaitLocked(resourceID uint64, w bool) {
   126  	gid := goid.Get()
   127  
   128  	d.mu.Lock()
   129  	defer d.mu.Unlock()
   130  
   131  	if d.waitForMap[gid] != nil {
   132  		panic("waiting for multiple resources")
   133  	}
   134  
   135  	resourceOwners := d.resouceOwners[resourceID]
   136  	if resourceOwners == nil {
   137  		panic("waiting for a resource with no owner")
   138  	}
   139  	if resourceOwners.wgid == 0 && len(resourceOwners.rgids) == 0 {
   140  		panic("waiting for a resource with no owner")
   141  	}
   142  
   143  	// detect deadlock
   144  	var err *ErrorDeadlock
   145  	// check deadlock with write lock owner
   146  	if resourceOwners.wgid != 0 {
   147  		err = d.doDetect(gid, resourceOwners.wgid)
   148  		if err != nil {
   149  			err.OwnerParty = Party{GID: resourceOwners.wgid, ResourceID: resourceID, W: true}
   150  			panic(err)
   151  		}
   152  	}
   153  	// check deadlock with read lock owner
   154  	for rgid := range resourceOwners.rgids {
   155  		err = d.doDetect(gid, rgid)
   156  		if err != nil {
   157  			err.OwnerParty = Party{GID: rgid, ResourceID: resourceID, W: false}
   158  			panic(err)
   159  		}
   160  	}
   161  
   162  	d.waitForMap[gid] = &waitForResource{resourceID: resourceID, w: w}
   163  }
   164  
   165  func (d *detector) doDetect(sourceGID, ownerGID int64) (err *ErrorDeadlock) {
   166  	waitingForResource := d.waitForMap[ownerGID]
   167  	if waitingForResource == nil {
   168  		return
   169  	}
   170  
   171  	resourceOwners := d.resouceOwners[waitingForResource.resourceID]
   172  	if resourceOwners == nil || (resourceOwners.wgid == 0 && len(resourceOwners.rgids) == 0) {
   173  		panic("waiting for a resource with no owner")
   174  	}
   175  
   176  	if resourceOwners.wgid != 0 {
   177  		if resourceOwners.wgid == sourceGID {
   178  			err = &ErrorDeadlock{SourceParty: Party{
   179  				GID:        sourceGID,
   180  				ResourceID: waitingForResource.resourceID,
   181  				W:          true,
   182  			}}
   183  			return
   184  		}
   185  		err = d.doDetect(sourceGID, resourceOwners.wgid)
   186  		if err != nil {
   187  			return
   188  		}
   189  	}
   190  
   191  	for rgid := range resourceOwners.rgids {
   192  		if rgid == sourceGID {
   193  			err = &ErrorDeadlock{
   194  				SourceParty: Party{
   195  					GID:        sourceGID,
   196  					ResourceID: waitingForResource.resourceID,
   197  					W:          false,
   198  				}}
   199  			return
   200  		}
   201  		err = d.doDetect(sourceGID, rgid)
   202  		if err != nil {
   203  			return
   204  		}
   205  	}
   206  	return
   207  }
   208  
   209  func (d *detector) onReleaseLocked(resourceID uint64, w bool) {
   210  	gid := goid.Get()
   211  
   212  	d.mu.Lock()
   213  	defer d.mu.Unlock()
   214  
   215  	// update ownerResouces
   216  	ownedResources := d.ownerResouces[gid]
   217  	if ownedResources == nil {
   218  		panic("releasing a lock not owned")
   219  	}
   220  	if _, exists := ownedResources[resourceID]; !exists {
   221  		panic("releasing a lock not owned")
   222  	}
   223  	delete(ownedResources, resourceID)
   224  	if len(ownedResources) == 0 {
   225  		delete(d.ownerResouces, gid)
   226  	}
   227  
   228  	// update resouceOwners
   229  	resourceOwners := d.resouceOwners[resourceID]
   230  	if resourceOwners == nil {
   231  		panic("releasing a lock not owned")
   232  	}
   233  	if w {
   234  		if resourceOwners.wgid != gid {
   235  			panic("releasing a lock not owned")
   236  		}
   237  		resourceOwners.wgid = 0
   238  		if len(resourceOwners.rgids) == 0 {
   239  			delete(d.resouceOwners, resourceID)
   240  		}
   241  	} else {
   242  		if _, exists := resourceOwners.rgids[gid]; !exists {
   243  			panic("releasing a lock not owned")
   244  		}
   245  
   246  		resourceOwners.rgids[gid]--
   247  		if resourceOwners.rgids[gid] == 0 {
   248  			delete(resourceOwners.rgids, gid)
   249  			if len(resourceOwners.rgids) == 0 && resourceOwners.wgid == 0 {
   250  				delete(d.resouceOwners, resourceID)
   251  			}
   252  		} else if resourceOwners.rgids[gid] < 0 {
   253  			panic("releasing a read lock too many times")
   254  		}
   255  	}
   256  }
   257  
   258  var d *detector
   259  
   260  func init() {
   261  	d = newDetector()
   262  }