github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/providers/agent/mcorpc/external/agent.go (about)

     1  // Copyright (c) 2020-2022, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package external
     6  
     7  import (
     8  	"bufio"
     9  	"context"
    10  	"encoding/json"
    11  	"fmt"
    12  	"io"
    13  	"os"
    14  	"os/exec"
    15  	"path/filepath"
    16  	"runtime"
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/choria-io/go-choria/inter"
    22  	"github.com/choria-io/go-choria/internal/util"
    23  	"github.com/choria-io/go-choria/providers/agent/mcorpc"
    24  	agentddl "github.com/choria-io/go-choria/providers/agent/mcorpc/ddl/agent"
    25  	"github.com/choria-io/go-choria/server"
    26  	"github.com/choria-io/go-choria/server/agents"
    27  
    28  	"github.com/sirupsen/logrus"
    29  )
    30  
    31  const (
    32  	rpcRequestProtocol      = "io.choria.mcorpc.external.v1.rpc_request"
    33  	rpcRequestSchema        = "https://choria.io/schemas/mcorpc/external/v1/rpc_request.json"
    34  	rpcReplyProtocol        = "io.choria.mcorpc.external.v1.rpc_reply"
    35  	rpcReplySchema          = "https://choria.io/schemas/mcorpc/external/v1/rpc_reply.json"
    36  	activationProtocol      = "io.choria.mcorpc.external.v1.activation_request"
    37  	activationSchema        = "https://choria.io/schemas/mcorpc/external/v1/activation_request.json"
    38  	activationReplyProtocol = "io.choria.mcorpc.external.v1.activation_reply"
    39  	activationReplySchema   = "https://choria.io/schemas/mcorpc/external/v1/activation_reply.json"
    40  )
    41  
    42  // ActivationCheck is the request to determine if an agent should activate
    43  type ActivationCheck struct {
    44  	Schema   string `json:"$schema"`
    45  	Protocol string `json:"protocol"`
    46  	Agent    string `json:"agent"`
    47  }
    48  
    49  // ActivationReply is the reply from the activation check message
    50  type ActivationReply struct {
    51  	ShouldActivate bool `json:"activate"`
    52  }
    53  
    54  // Request is the request being published to the shim runner
    55  type Request struct {
    56  	Schema     string          `json:"$schema"`
    57  	Protocol   string          `json:"protocol"`
    58  	Agent      string          `json:"agent"`
    59  	Action     string          `json:"action"`
    60  	RequestID  string          `json:"requestid"`
    61  	SenderID   string          `json:"senderid"`
    62  	CallerID   string          `json:"callerid"`
    63  	Collective string          `json:"collective"`
    64  	TTL        int             `json:"ttl"`
    65  	Time       int64           `json:"msgtime"`
    66  	Data       json.RawMessage `json:"data"`
    67  }
    68  
    69  func (p *Provider) newExternalAgent(ddl *agentddl.DDL, mgr server.AgentManager) (*mcorpc.Agent, error) {
    70  	agent := mcorpc.New(ddl.Metadata.Name, ddl.Metadata, mgr.Choria(), mgr.Logger())
    71  	activator, err := p.externalActivationCheck(ddl)
    72  	if err != nil {
    73  		return nil, fmt.Errorf("could not activation check %s: %s", agent.Name(), err)
    74  	}
    75  	agent.SetActivationChecker(activator)
    76  
    77  	p.log.Debugf("Registering proxy actions for External agent %s: %s", ddl.Metadata.Name, strings.Join(ddl.ActionNames(), ", "))
    78  
    79  	for _, action := range ddl.Actions {
    80  		if err != nil {
    81  			return nil, err
    82  		}
    83  
    84  		agent.MustRegisterAction(action.Name, p.externalAction)
    85  	}
    86  
    87  	return agent, nil
    88  }
    89  
    90  func (p *Provider) agentPath(name string, dir string) string {
    91  	base := filepath.Dir(dir)
    92  	if base == "" {
    93  		return ""
    94  	}
    95  
    96  	agentNameOrDir := filepath.Join(base, name)
    97  
    98  	if util.FileIsRegular(agentNameOrDir) {
    99  		p.log.Debugf("Using %s as path to agent binary", agentNameOrDir)
   100  		return agentNameOrDir
   101  	}
   102  
   103  	agentNameOrDir = filepath.Join(agentNameOrDir, fmt.Sprintf("%s-%s_%s", name, runtime.GOOS, runtime.GOARCH))
   104  	p.log.Debugf("Using %s as path to agent binary", agentNameOrDir)
   105  
   106  	return agentNameOrDir
   107  }
   108  
   109  func (p *Provider) externalActivationCheck(ddl *agentddl.DDL) (mcorpc.ActivationChecker, error) {
   110  	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
   111  	defer cancel()
   112  
   113  	if ddl.SourceLocation == "" {
   114  		return nil, fmt.Errorf("do not know where DDL for %s is located on disk, cannot activate", ddl.Metadata.Name)
   115  	}
   116  
   117  	agentPath := p.agentPath(ddl.Metadata.Name, ddl.SourceLocation)
   118  	if !util.FileExist(agentPath) {
   119  		p.log.Debugf("Agent %s does not exist in '%s', cannot perform activation check, not activating", ddl.Metadata.Name, agentPath)
   120  		return func() bool { return false }, nil
   121  	}
   122  
   123  	rep := &ActivationReply{}
   124  	req := &ActivationCheck{
   125  		Schema:   activationSchema,
   126  		Protocol: activationProtocol,
   127  		Agent:    ddl.Metadata.Name,
   128  	}
   129  
   130  	j, err := json.Marshal(req)
   131  	if err != nil {
   132  		return nil, fmt.Errorf("could not json encode activation message: %s", err)
   133  	}
   134  
   135  	p.log.Debugf("Performing activation check on external agent %s using %s", ddl.Metadata.Name, agentPath)
   136  	err = p.executeRequest(ctx, agentPath, activationProtocol, j, rep, ddl.Metadata.Name, p.log, nil)
   137  	if err != nil {
   138  		p.log.Warnf("External agent %s not activating due to error during activation check: %s", agentPath, err)
   139  		return func() bool { return false }, nil
   140  	}
   141  
   142  	return func() bool { return rep.ShouldActivate }, nil
   143  }
   144  
   145  func (p *Provider) externalAction(ctx context.Context, req *mcorpc.Request, reply *mcorpc.Reply, agent *mcorpc.Agent, conn inter.ConnectorInfo) {
   146  	action := fmt.Sprintf("%s#%s", req.Agent, req.Action)
   147  
   148  	p.mu.Lock()
   149  	ddlpath, ok := p.paths[agent.Name()]
   150  	p.mu.Unlock()
   151  	if !ok {
   152  		p.abortAction(fmt.Sprintf("Cannot determine DDL path for agent %s", agent.Name()), agent, reply)
   153  		return
   154  	}
   155  
   156  	ddl, ok := p.agentDDL(agent.Name())
   157  	if !ok {
   158  		p.abortAction(fmt.Sprintf("Cannot find DDL for agent %s", agent.Name()), agent, reply)
   159  		return
   160  	}
   161  
   162  	agentPath := p.agentPath(agent.Metadata().Name, ddlpath)
   163  	if agentPath == "" || !util.FileExist(agentPath) {
   164  		p.abortAction(fmt.Sprintf("Cannot call external agent %s: agent executable was not found", action), agent, reply)
   165  		return
   166  	}
   167  	agent.Log.Debugf("Attempting to call external agent %s (%s) with a timeout %d", action, agentPath, agent.Metadata().Timeout)
   168  
   169  	err := p.validateRequest(ddl, req, agent.Log)
   170  	if err != nil {
   171  		p.abortAction(fmt.Sprintf("Validation failed: %s", err), agent, reply)
   172  		return
   173  	}
   174  
   175  	// 1.5 extra second to give the shim time to start etc
   176  	tctx, cancel := context.WithTimeout(ctx, time.Duration(agent.Metadata().Timeout)*time.Second+(1500*time.Millisecond))
   177  	defer cancel()
   178  
   179  	externreq, err := p.newExternalRequest(req)
   180  	if err != nil {
   181  		p.abortAction(fmt.Sprintf("Could not call external agent %s: json request creation failed: %s", action, err), agent, reply)
   182  		return
   183  	}
   184  
   185  	err = p.executeRequest(tctx, agentPath, rpcRequestProtocol, externreq, reply, agent.Name(), agent.Log, agent.ServerInfoSource)
   186  	if err != nil {
   187  		p.abortAction(fmt.Sprintf("Could not call external agent %s: %s", action, err), agent, reply)
   188  		return
   189  	}
   190  
   191  	err = p.setReplyDefaults(ddl, req.Action, reply)
   192  	if err != nil {
   193  		p.abortAction(fmt.Sprintf("Could not set reply defaults: %s", err), agent, reply)
   194  		return
   195  	}
   196  }
   197  
   198  func (p *Provider) validateRequest(ddl *agentddl.DDL, req *mcorpc.Request, log *logrus.Entry) error {
   199  	actint, err := ddl.ActionInterface(req.Action)
   200  	if err != nil {
   201  		return fmt.Errorf("could not load action: %s", err)
   202  	}
   203  
   204  	p.log.Debugf("Validating request %s: %s", req.RequestID, string(req.Data))
   205  
   206  	warnings, err := actint.ValidateRequestJSON(req.Data)
   207  	if err != nil {
   208  		return err
   209  	}
   210  
   211  	if len(warnings) > 0 {
   212  		for _, w := range warnings {
   213  			log.Warnf(fmt.Sprintf("Validation on input %s to %s#%s returned a warning: %s", req.Action, req.Agent, req.Action, w))
   214  		}
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  func (p *Provider) setReplyDefaults(ddl *agentddl.DDL, action string, reply *mcorpc.Reply) error {
   221  	actint, err := ddl.ActionInterface(action)
   222  	if err != nil {
   223  		return fmt.Errorf("could not load action: %s", err)
   224  	}
   225  
   226  	if reply.Data == nil {
   227  		reply.Data = make(map[string]any)
   228  	}
   229  
   230  	result, ok := reply.Data.(map[string]any)
   231  	if !ok {
   232  		return fmt.Errorf("reply data is in the wrong format")
   233  	}
   234  
   235  	actint.SetOutputDefaults(result)
   236  	reply.Data = result
   237  
   238  	return nil
   239  }
   240  
   241  func (p *Provider) executeRequest(ctx context.Context, command string, protocol string, req []byte, reply any, agentName string, log *logrus.Entry, si agents.ServerInfoSource) error {
   242  	reqfile, err := os.CreateTemp("", "request")
   243  	if err != nil {
   244  		return fmt.Errorf("could not create request temp file: %s", err)
   245  	}
   246  	defer os.Remove(reqfile.Name())
   247  
   248  	repfile, err := os.CreateTemp("", "reply")
   249  	if err != nil {
   250  		return fmt.Errorf("could not create reply temp file: %s", err)
   251  	}
   252  	defer os.Remove(repfile.Name())
   253  	repfile.Close()
   254  
   255  	factsfile, err := os.CreateTemp("", "facts")
   256  	if err != nil {
   257  		return fmt.Errorf("could not create facts temp file: %s", err)
   258  	}
   259  	defer os.Remove(factsfile.Name())
   260  
   261  	_, err = reqfile.Write(req)
   262  	if err != nil {
   263  		return fmt.Errorf("could not create reply temp file: %s", err)
   264  	}
   265  
   266  	agentConfig, err := filepath.Abs(filepath.Join(filepath.Dir(p.cfg.ConfigFile), "plugin.d", agentName))
   267  	if err != nil {
   268  		return fmt.Errorf("could not determine agent config file: %s", err)
   269  	}
   270  
   271  	if si != nil {
   272  		factsfile.Write(si.Facts())
   273  	}
   274  
   275  	execution := exec.CommandContext(ctx, command, reqfile.Name(), repfile.Name(), rpcRequestProtocol)
   276  	execution.Dir = os.TempDir()
   277  	execution.Env = []string{
   278  		"CHORIA_EXTERNAL_REQUEST=" + reqfile.Name(),
   279  		"CHORIA_EXTERNAL_REPLY=" + repfile.Name(),
   280  		"CHORIA_EXTERNAL_PROTOCOL=" + protocol,
   281  		"CHORIA_EXTERNAL_CONFIG=" + agentConfig,
   282  		"CHORIA_EXTERNAL_FACTS=" + factsfile.Name(),
   283  		"PATH=" + os.Getenv("PATH"),
   284  	}
   285  
   286  	stdout, err := execution.StdoutPipe()
   287  	if err != nil {
   288  		return fmt.Errorf("could not open STDOUT: %s", err)
   289  	}
   290  
   291  	stderr, err := execution.StderrPipe()
   292  	if err != nil {
   293  		return fmt.Errorf("could not open STDERR: %s", err)
   294  	}
   295  
   296  	wg := &sync.WaitGroup{}
   297  	outputReader := func(wg *sync.WaitGroup, in io.ReadCloser, logger func(args ...any)) {
   298  		defer wg.Done()
   299  
   300  		scanner := bufio.NewScanner(in)
   301  		for scanner.Scan() {
   302  			logger(scanner.Text())
   303  		}
   304  	}
   305  
   306  	wg.Add(1)
   307  	go outputReader(wg, stderr, log.Error)
   308  	wg.Add(1)
   309  	go outputReader(wg, stdout, log.Info)
   310  
   311  	err = execution.Start()
   312  	if err != nil {
   313  		return fmt.Errorf("executing %s failed: %s", filepath.Base(command), err)
   314  	}
   315  
   316  	execution.Wait()
   317  	wg.Wait()
   318  
   319  	if execution.ProcessState.ExitCode() != 0 {
   320  		return fmt.Errorf("executing %s failed: exit status %d", filepath.Base(command), execution.ProcessState.ExitCode())
   321  	}
   322  
   323  	repjson, err := os.ReadFile(repfile.Name())
   324  	if err != nil {
   325  		return fmt.Errorf("failed to read reply json: %s", err)
   326  	}
   327  
   328  	err = json.Unmarshal(repjson, reply)
   329  	if err != nil {
   330  		return fmt.Errorf("failed to decode reply json: %s", err)
   331  	}
   332  
   333  	return nil
   334  }
   335  
   336  func (p *Provider) newExternalRequest(req *mcorpc.Request) ([]byte, error) {
   337  	sr := Request{
   338  		Schema:     rpcRequestSchema,
   339  		Protocol:   rpcRequestProtocol,
   340  		Action:     req.Action,
   341  		Agent:      req.Agent,
   342  		CallerID:   req.CallerID,
   343  		Collective: req.Collective,
   344  		RequestID:  req.RequestID,
   345  		SenderID:   req.SenderID,
   346  		Time:       req.Time.Unix(),
   347  		TTL:        req.TTL,
   348  		Data:       req.Data,
   349  	}
   350  
   351  	return json.Marshal(sr)
   352  }
   353  
   354  func (p *Provider) abortAction(reason string, agent *mcorpc.Agent, reply *mcorpc.Reply) {
   355  	agent.Log.Error(reason)
   356  	reply.Statuscode = mcorpc.Aborted
   357  	reply.Statusmsg = reason
   358  }