github.com/square/finch@v0.0.0-20240412205204-6530c03e2b96/compute/server.go (about)

     1  // Copyright 2024 Block, Inc.
     2  
     3  package compute
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"log"
     9  	"os"
    10  	"path/filepath"
    11  	"sync"
    12  
    13  	"github.com/rs/xid"
    14  
    15  	"github.com/square/finch"
    16  	"github.com/square/finch/config"
    17  	"github.com/square/finch/data"
    18  	"github.com/square/finch/stage"
    19  	"github.com/square/finch/stats"
    20  )
    21  
    22  // Server coordinates instances: the local and any remotes. Server implements
    23  // Compute so server.Server (the Finch core server) can run as a client or server.
    24  type Server struct {
    25  	api  *API   // handles remote compute (rc)
    26  	name string // defaults to "local"
    27  	test bool
    28  	// --
    29  	gds *data.Scope // global data scope
    30  	cfg config.Stage
    31  }
    32  
    33  type ack struct {
    34  	name string // "" for local, else remote.name
    35  	err  error
    36  }
    37  
    38  func NewServer(name, addr string, test bool) *Server {
    39  	s := &Server{
    40  		name: name,
    41  		test: test,
    42  		gds:  data.NewScope(), // global data
    43  	}
    44  	if addr != "" {
    45  		s.api = NewAPI(finch.WithPort(addr, finch.DEFAULT_SERVER_PORT))
    46  	}
    47  	return s
    48  }
    49  
    50  func (s *Server) Run(ctxFinch context.Context, stages []config.Stage) error {
    51  	for _, cfg := range stages {
    52  		// cd dir of config file so relative file paths in config work
    53  		if err := os.Chdir(filepath.Dir(cfg.File)); err != nil {
    54  			return err
    55  		}
    56  
    57  		if err := s.run(ctxFinch, cfg); err != nil {
    58  			return err
    59  		}
    60  
    61  		if ctxFinch.Err() != nil {
    62  			finch.Debug("finch terminated")
    63  			return nil
    64  		}
    65  	}
    66  	return nil
    67  }
    68  
    69  // Run runs all the stages on all the instances (local and remote).
    70  func (s *Server) run(ctxFinch context.Context, cfg config.Stage) error {
    71  	var err error
    72  	stageName := cfg.Name
    73  
    74  	nInstances := finch.Uint(cfg.Compute.Instances)
    75  	nRemotes := nInstances - 1 // -1 for local unless..
    76  	if cfg.Compute.DisableLocal {
    77  		nRemotes += 1 // no local, so all instances are remote
    78  	}
    79  	if nRemotes == 0 {
    80  		fmt.Printf("#\n# %s\n#\n", stageName)
    81  	} else {
    82  		cfg.Id = xid.New().String() // unique stage ID for remotes
    83  		fmt.Printf("#\n# %s (%s)\n#\n", stageName, cfg.Id)
    84  	}
    85  
    86  	m := &stageMeta{
    87  		Mutex:    &sync.Mutex{},
    88  		cfg:      cfg,
    89  		nRemotes: nRemotes,
    90  		bootChan: make(chan ack, nInstances),
    91  		runChan:  make(chan struct{}),
    92  		doneChan: make(chan ack, nInstances),
    93  		clients:  map[string]*client{},
    94  	}
    95  
    96  	if !config.True(cfg.Stats.Disable) {
    97  		m.stats, err = stats.NewCollector(cfg.Stats, s.name, nInstances)
    98  		if err != nil {
    99  			return err
   100  		}
   101  	}
   102  
   103  	s.gds.Reset() // keep data global and stage data, delete the rest
   104  
   105  	// Create and boot local instance first because if this doesn't work,
   106  	// then remotes shouldn't work either because they all boot with the
   107  	// exact same config.
   108  	var local *stage.Stage
   109  	if !cfg.Compute.DisableLocal {
   110  		local = stage.New(cfg, s.gds, m.stats)
   111  		if err := local.Prepare(ctxFinch); err != nil {
   112  			return err
   113  		}
   114  		m.bootChan <- ack{name: s.name} // must ack local, too
   115  	}
   116  
   117  	// Set stage in API to trigger remote instances to boot
   118  	if s.api != nil && nRemotes > 0 {
   119  		if err := s.api.Stage(m); err != nil {
   120  			return err
   121  		}
   122  	}
   123  
   124  	// Wait for the required number instances to boot. If running only local,
   125  	// this will be instant because local already booted and acked above.
   126  	// But with remotes, this might take a few milliseconds over the network.
   127  	if nInstances > 1 {
   128  		log.Printf("Waiting for %d instances to boot...", nInstances)
   129  	}
   130  	booted := uint(0)
   131  	for booted < nInstances {
   132  		select {
   133  		case ack := <-m.bootChan:
   134  			if ack.err != nil {
   135  				log.Printf("Remote %s error on boot: %s", ack.name, ack.err)
   136  				continue
   137  			}
   138  			booted += 1
   139  			if nInstances > 1 {
   140  				log.Printf("%s booted", ack.name)
   141  			}
   142  		case <-ctxFinch.Done():
   143  			return nil
   144  		}
   145  	}
   146  
   147  	// Close stage in API to prevent remotes from joining
   148  	m.Lock()
   149  	m.booted = true
   150  	m.Unlock()
   151  
   152  	if s.test {
   153  		return nil
   154  	}
   155  
   156  	// ----------------------------------------------------------------------
   157  	// Run stage
   158  	// ----------------------------------------------------------------------
   159  
   160  	finch.Debug("run %s", stageName)
   161  	close(m.runChan) // signal remotes to run
   162  
   163  	if local != nil { // start local instance
   164  		go func() {
   165  			local.Run(ctxFinch)
   166  			m.doneChan <- ack{name: s.name}
   167  		}()
   168  	}
   169  
   170  	// Wait for instances to finish running
   171  	running := booted
   172  	for running > 0 {
   173  		select {
   174  		case ack := <-m.doneChan:
   175  			running -= 1
   176  			if ack.err != nil {
   177  				log.Printf("%s error running stage %s: %s", ack.name, stageName, ack.err)
   178  			}
   179  			if nInstances > 1 {
   180  				log.Printf("%s completed stage %s", ack.name, stageName)
   181  				if running > 0 {
   182  					log.Printf("%d/%d instances running", running, nInstances)
   183  				}
   184  			}
   185  		case <-ctxFinch.Done():
   186  			// Signal remote instances to stop early and (maybe) send finals stats
   187  			if s.api != nil {
   188  				s.api.Stage(nil)
   189  			}
   190  		}
   191  	}
   192  
   193  	return nil
   194  }