github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/agent/dotnetspy/session.go (about)

     1  //go:build dotnetspy
     2  // +build dotnetspy
     3  
     4  package dotnetspy
     5  
     6  import (
     7  	"context"
     8  	"io"
     9  	"time"
    10  
    11  	"github.com/hashicorp/go-multierror"
    12  
    13  	"github.com/pyroscope-io/dotnetdiag"
    14  	"github.com/pyroscope-io/dotnetdiag/nettrace"
    15  	"github.com/pyroscope-io/dotnetdiag/nettrace/profiler"
    16  )
    17  
    18  type session struct {
    19  	pid     int
    20  	timeout time.Duration
    21  
    22  	config  dotnetdiag.CollectTracingConfig
    23  	session *dotnetdiag.Session
    24  
    25  	ch      chan line
    26  	stopped bool
    27  }
    28  
    29  type line struct {
    30  	name []byte
    31  	val  int
    32  }
    33  
    34  func newSession(pid int) *session {
    35  	return &session{
    36  		pid:     pid,
    37  		timeout: 3 * time.Second,
    38  		config: dotnetdiag.CollectTracingConfig{
    39  			CircularBufferSizeMB: 100,
    40  			Providers: []dotnetdiag.ProviderConfig{
    41  				{
    42  					Keywords:     0x0000F00000000000,
    43  					LogLevel:     4,
    44  					ProviderName: "Microsoft-DotNETCore-SampleProfiler",
    45  				},
    46  			},
    47  		},
    48  	}
    49  }
    50  
    51  // start opens a new diagnostic session to the process given, and asynchronously
    52  // processes the event stream.
    53  func (s *session) start() error {
    54  	ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
    55  	defer cancel()
    56  	// If the process does not create Diagnostic Server, the next call will
    57  	// fail, and a session won't be created.
    58  	client := dotnetdiag.NewClient(waitDiagnosticServer(ctx, s.pid))
    59  	ns, err := client.CollectTracing(s.config)
    60  	if err != nil {
    61  		return err
    62  	}
    63  
    64  	stream := nettrace.NewStream(ns)
    65  	trace, err := stream.Open()
    66  	if err != nil {
    67  		_ = ns.Close()
    68  		return err
    69  	}
    70  
    71  	p := profiler.NewSampleProfiler(trace, profilerOptions...)
    72  	stream.EventHandler = p.EventHandler
    73  	stream.MetadataHandler = p.MetadataHandler
    74  	stream.StackBlockHandler = p.StackBlockHandler
    75  	stream.SequencePointBlockHandler = p.SequencePointBlockHandler
    76  
    77  	s.session = ns
    78  	s.ch = make(chan line)
    79  	go func() {
    80  		defer func() {
    81  			s.session = nil
    82  			close(s.ch)
    83  		}()
    84  		for {
    85  			switch err = stream.Next(); err {
    86  			default:
    87  			case nil:
    88  				continue
    89  			case io.EOF:
    90  				// The session is closed by us (on flush or stop call),
    91  				// or the target process has exited.
    92  				for k, v := range p.Samples() {
    93  					// dotnet profiler reports total time v per call stack k.
    94  					// Meanwhile, pyroscope agent expects number of samples is
    95  					// reported. Every sample is a time fraction of second
    96  					// according to sample rate: 1000ms/100 = 10ms by default.
    97  					// To represent reported time v as a number of samples,
    98  					// we divide it by sample duration.
    99  					//
   100  					// Taking into account that under the hood dotnet spy uses
   101  					// Microsoft-DotNETCore-SampleProfiler, which captures a
   102  					// snapshot of each thread's managed callstack every 10 ms,
   103  					// we cannot manage sample rate from outside.
   104  					s.ch <- line{
   105  						name: []byte(k),
   106  						val:  int(v.Milliseconds()) / 10,
   107  					}
   108  				}
   109  			}
   110  			return
   111  		}
   112  	}()
   113  
   114  	return nil
   115  }
   116  
   117  // flush closes NetTrace stream in order to retrieve samples,
   118  // and starts a new session, if not in stopped state.
   119  func (s *session) flush(cb func([]byte, uint64) error) error {
   120  	// Ignore call, if NetTrace session has not been established.
   121  	var errs error
   122  	if s.session != nil {
   123  		_ = s.session.Close()
   124  		for v := range s.ch {
   125  			if err := cb(v.name, uint64(v.val)); err != nil {
   126  				errs = multierror.Append(errs, err)
   127  			}
   128  		}
   129  	}
   130  	if s.stopped {
   131  		return errs
   132  	}
   133  	if err := s.start(); err != nil {
   134  		errs = multierror.Append(errs, err)
   135  	}
   136  	return errs
   137  }
   138  
   139  // stop closes diagnostic session, if it was established, and sets the
   140  // flag preventing session to start again.
   141  func (s *session) stop() error {
   142  	if s.session != nil {
   143  		_ = s.session.Close()
   144  	}
   145  	s.stopped = true
   146  	return nil
   147  }
   148  
   149  // .Net runtime requires some time to initialize diagnostic IPC server and
   150  // start accepting connections. If it fails before context cancel, an empty
   151  // string will be returned.
   152  func waitDiagnosticServer(ctx context.Context, pid int) string {
   153  	// Do not wait for the timer to fire for the first time.
   154  	if addr := dotnetdiag.DefaultServerAddress(pid); addr != "" {
   155  		return addr
   156  	}
   157  	ticker := time.NewTicker(time.Millisecond * 100)
   158  	defer ticker.Stop()
   159  	for {
   160  		select {
   161  		case <-ctx.Done():
   162  			return ""
   163  		case <-ticker.C:
   164  			if addr := dotnetdiag.DefaultServerAddress(pid); addr != "" {
   165  				return addr
   166  			}
   167  		}
   168  	}
   169  }