go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/testing/fakebot/main.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Command fakebot calls Swarming RBE API endpoints to test them.
    16  //
    17  // It is intended to be running locally side-by-side with a Swarming RBE
    18  // server process that is running in integration testing mode with
    19  // `-expose-integration-mocks` flag passed to it.
    20  package main
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"encoding/json"
    26  	"flag"
    27  	"fmt"
    28  	"io"
    29  	"net/http"
    30  	"os"
    31  	"time"
    32  
    33  	"google.golang.org/protobuf/types/known/timestamppb"
    34  
    35  	"go.chromium.org/luci/common/clock"
    36  	"go.chromium.org/luci/common/errors"
    37  	"go.chromium.org/luci/common/logging"
    38  	"go.chromium.org/luci/common/logging/gologger"
    39  	"go.chromium.org/luci/common/system/signals"
    40  	"go.chromium.org/luci/grpc/prpc"
    41  
    42  	internalspb "go.chromium.org/luci/swarming/proto/internals"
    43  	"go.chromium.org/luci/swarming/server/rbe"
    44  	"go.chromium.org/luci/swarming/server/testing/integrationmocks"
    45  )
    46  
    47  var (
    48  	botID       = flag.String("bot-id", "fake-bot", "ID of this bot")
    49  	pool        = flag.String("pool", "local-test", "Value for `pool` dimension")
    50  	serverPort  = flag.Int("server-port", 8800, "Localhost port with the Swarming RBE server")
    51  	rbeInstance = flag.String("rbe-instance", "projects/chromium-swarm-dev/instances/default_instance", "Full RBE instance name to use for tests")
    52  	taskDelay   = flag.Duration("task-delay", 5*time.Second, "How long to pretend working on a task")
    53  )
    54  
    55  func main() {
    56  	flag.Parse()
    57  	ctx := gologger.StdConfig.Use(context.Background())
    58  	if err := run(ctx); err != nil {
    59  		errors.Log(ctx, err)
    60  		os.Exit(1)
    61  	}
    62  }
    63  
    64  func run(ctx context.Context) error {
    65  	loopCtx, cancel := context.WithCancel(ctx)
    66  	defer cancel()
    67  	signals.HandleInterrupt(func() {
    68  		logging.Infof(ctx, "Got termination signal")
    69  		cancel()
    70  	})
    71  
    72  	bot := NewBot(ctx, *botID, *pool, *serverPort, *rbeInstance)
    73  
    74  	// The last processed lease we should close on the next session update.
    75  	var lease *rbe.Lease
    76  
    77  	defer func() {
    78  		// Always try to cleanup the session when exiting.
    79  		if bot.HasSession() {
    80  			if _, err := bot.UpdateSession(ctx, true, "BOT_TERMINATING", lease); err != nil {
    81  				logging.Errorf(ctx, "Error when terminating the bot session: %s", err)
    82  			}
    83  		}
    84  	}()
    85  
    86  	for loopCtx.Err() == nil {
    87  		// Pretend to poll Swarming Python to get the fresh poll token.
    88  		if err := bot.RefreshPollToken(ctx); err != nil {
    89  			return errors.Annotate(err, "getting poll token").Err()
    90  		}
    91  
    92  		// Create a new session if there's no current healthy session.
    93  		if !bot.HasSession() {
    94  			if err := bot.CreateSession(ctx); err != nil {
    95  				return errors.Annotate(err, "creating bot session").Err()
    96  			}
    97  		}
    98  
    99  		// Wait for a lease. This also closes the previous lease, if any.
   100  		var err error
   101  		switch lease, err = bot.UpdateSession(ctx, true, "OK", lease); {
   102  		case err != nil:
   103  			return errors.Annotate(err, "polling for a task").Err()
   104  		case !bot.HasSession():
   105  			logging.Errorf(ctx, "Bot session was closed by the server")
   106  			lease = nil
   107  		case lease != nil:
   108  			// If got a lease, launch the worker loop.
   109  			lease, err = workerLoop(ctx, loopCtx, bot, lease)
   110  			if err != nil {
   111  				return errors.Annotate(err, "when working on a lease").Err()
   112  			}
   113  		}
   114  	}
   115  
   116  	return nil
   117  }
   118  
   119  func workerLoop(ctx, loopCtx context.Context, bot *Bot, lease *rbe.Lease) (*rbe.Lease, error) {
   120  	if lease.State != "PENDING" {
   121  		return nil, errors.Reason("unexpected lease state %s", lease.State).Err()
   122  	}
   123  
   124  	leaseID := lease.ID
   125  	payload := lease.Payload
   126  
   127  	if payload.Noop {
   128  		return &rbe.Lease{
   129  			ID:     leaseID,
   130  			State:  "COMPLETED",
   131  			Result: &internalspb.TaskResult{},
   132  		}, nil
   133  	}
   134  
   135  	loopCtx, cancel := clock.WithTimeout(loopCtx, *taskDelay)
   136  	defer cancel()
   137  
   138  	for loopCtx.Err() == nil {
   139  		// "Ping" the lease. This also tells us if we should drop it.
   140  		lease, err := bot.UpdateSession(ctx, false, "OK", &rbe.Lease{
   141  			ID:    leaseID,
   142  			State: "ACTIVE",
   143  		})
   144  		switch {
   145  		case err != nil:
   146  			return nil, errors.Annotate(err, "when pinging lease").Err()
   147  		case lease == nil:
   148  			return nil, errors.Reason("the lease disappeared").Err()
   149  		case lease.ID != leaseID:
   150  			return nil, errors.Reason("got unexpected lease %q != %q", lease.ID, leaseID).Err()
   151  		case lease.State == "ACTIVE":
   152  			// Carry on.
   153  		case lease.State == "CANCELLED":
   154  			// Done with this lease.
   155  			logging.Infof(ctx, "The lease was canceled")
   156  			return &rbe.Lease{
   157  				ID:    leaseID,
   158  				State: "COMPLETED",
   159  			}, nil
   160  		default:
   161  			return nil, errors.Reason("got unexpected lease state %s", lease.State).Err()
   162  		}
   163  		clock.Sleep(loopCtx, time.Second)
   164  	}
   165  
   166  	return &rbe.Lease{
   167  		ID:     leaseID,
   168  		State:  "COMPLETED",
   169  		Result: &internalspb.TaskResult{},
   170  	}, nil
   171  }
   172  
   173  ////////////////////////////////////////////////////////////////////////////////
   174  
   175  type Bot struct {
   176  	dimensions  map[string][]string
   177  	server      string
   178  	rbeInstance string
   179  
   180  	mocks integrationmocks.IntegrationMocksClient
   181  
   182  	pollToken     []byte
   183  	nextPollToken time.Time
   184  
   185  	sessionToken  []byte
   186  	sessionID     string
   187  	sessionStatus string
   188  	sessionExpiry time.Time
   189  }
   190  
   191  func NewBot(ctx context.Context, botID, pool string, serverPort int, rbeInstance string) *Bot {
   192  	return &Bot{
   193  		dimensions: map[string][]string{
   194  			"id":   {botID},
   195  			"pool": {pool},
   196  		},
   197  		server:      fmt.Sprintf("http://127.0.0.1:%d", serverPort),
   198  		rbeInstance: rbeInstance,
   199  		mocks: integrationmocks.NewIntegrationMocksClient(&prpc.Client{
   200  			C:       http.DefaultClient,
   201  			Host:    fmt.Sprintf("127.0.0.1:%d", serverPort),
   202  			Options: &prpc.Options{Insecure: true},
   203  		}),
   204  	}
   205  }
   206  
   207  // rpc sends a JSON RPC to Swarming RBE local server.
   208  func (b *Bot) rpc(ctx context.Context, endpoint string, req, resp any) error {
   209  	blob, err := json.Marshal(req)
   210  	if err != nil {
   211  		return errors.Annotate(err, "failed to marshal the request body").Err()
   212  	}
   213  
   214  	httpResp, err := http.DefaultClient.Post(
   215  		b.server+endpoint,
   216  		"application/json; charset=utf-8",
   217  		bytes.NewReader(blob))
   218  
   219  	var respBody []byte
   220  	if httpResp != nil && httpResp.Body != nil {
   221  		defer func() { _ = httpResp.Body.Close() }()
   222  		var err error
   223  		if respBody, err = io.ReadAll(httpResp.Body); err != nil {
   224  			return errors.Annotate(err, "failed to read response body").Err()
   225  		}
   226  	}
   227  
   228  	if err != nil {
   229  		return errors.Annotate(err, "%s", endpoint).Err()
   230  	}
   231  	if httpResp.StatusCode != http.StatusOK {
   232  		return errors.Reason("%s: HTTP %d: %s", endpoint, httpResp.StatusCode, string(respBody)).Err()
   233  	}
   234  
   235  	if resp != nil && respBody != nil {
   236  		if err := json.Unmarshal(respBody, resp); err != nil {
   237  			return errors.Annotate(err, "failed to unmarshal the response (%s): %q", err, string(respBody)).Err()
   238  		}
   239  	}
   240  
   241  	return nil
   242  }
   243  
   244  // RefreshPollToken grabs a fresh poll token if necessary.
   245  func (b *Bot) RefreshPollToken(ctx context.Context) error {
   246  	if !b.nextPollToken.IsZero() && clock.Now(ctx).Before(b.nextPollToken) {
   247  		return nil
   248  	}
   249  
   250  	tok, err := b.mocks.GeneratePollToken(ctx, &internalspb.PollState{
   251  		Id: "fake-token",
   252  		EnforcedDimensions: []*internalspb.PollState_Dimension{
   253  			{Key: "id", Values: b.dimensions["id"]},
   254  		},
   255  		Expiry:      timestamppb.New(clock.Now(ctx).Add(time.Hour)),
   256  		RbeInstance: b.rbeInstance,
   257  		IpAllowlist: "localhost",
   258  		AuthMethod: &internalspb.PollState_IpAllowlistAuth{
   259  			IpAllowlistAuth: &internalspb.PollState_IPAllowlistAuth{},
   260  		},
   261  	})
   262  	if err != nil {
   263  		return err
   264  	}
   265  
   266  	b.pollToken = tok.PollToken
   267  	b.nextPollToken = clock.Now(ctx).Add(time.Minute)
   268  
   269  	return nil
   270  }
   271  
   272  // HasSession is true if we have an active session.
   273  func (b *Bot) HasSession() bool {
   274  	return len(b.sessionToken) != 0 && b.sessionStatus == "OK"
   275  }
   276  
   277  // CreateSession creates a new bot session.
   278  func (b *Bot) CreateSession(ctx context.Context) error {
   279  	logging.Infof(ctx, "Creating the session")
   280  
   281  	var resp rbe.CreateBotSessionResponse
   282  	err := b.rpc(ctx, "/swarming/api/v1/bot/rbe/session/create", &rbe.CreateBotSessionRequest{
   283  		PollToken:  b.pollToken,
   284  		Dimensions: b.dimensions,
   285  	}, &resp)
   286  	if err != nil {
   287  		return errors.Annotate(err, "creating session").Err()
   288  	}
   289  
   290  	b.sessionToken = resp.SessionToken
   291  	b.sessionID = resp.SessionID
   292  	b.sessionStatus = "OK"
   293  	b.sessionExpiry = time.Unix(resp.SessionExpiry, 0).UTC()
   294  
   295  	logging.Infof(ctx, "Created the session: %s", b.sessionID)
   296  	return nil
   297  }
   298  
   299  // UpdateSession updates the session.
   300  func (b *Bot) UpdateSession(ctx context.Context, withPollToken bool, status string, lease *rbe.Lease) (*rbe.Lease, error) {
   301  	if !b.HasSession() {
   302  		return nil, errors.Reason("no healthy session").Err()
   303  	}
   304  
   305  	if lease != nil {
   306  		logging.Infof(ctx, "Updating the session: %s [%s=%s]", status, lease.ID, lease.State)
   307  	} else {
   308  		logging.Infof(ctx, "Updating the session: %s", status)
   309  	}
   310  
   311  	var pollToken []byte
   312  	if withPollToken {
   313  		pollToken = b.pollToken
   314  	}
   315  
   316  	var resp rbe.UpdateBotSessionResponse
   317  	err := b.rpc(ctx, "/swarming/api/v1/bot/rbe/session/update", &rbe.UpdateBotSessionRequest{
   318  		SessionToken: b.sessionToken,
   319  		PollToken:    pollToken,
   320  		Dimensions:   b.dimensions,
   321  		Status:       status,
   322  		Lease:        lease,
   323  	}, &resp)
   324  	if err != nil {
   325  		return nil, errors.Annotate(err, "updating the session").Err()
   326  	}
   327  
   328  	b.sessionToken = resp.SessionToken
   329  	b.sessionStatus = resp.Status
   330  	b.sessionExpiry = time.Unix(resp.SessionExpiry, 0).UTC()
   331  
   332  	return resp.Lease, nil
   333  }