github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/pipeline/pipeline.go (about)

     1  package pipeline
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/observiq/carbon/errors"
     8  	"github.com/observiq/carbon/operator"
     9  	"gonum.org/v1/gonum/graph/encoding/dot"
    10  	"gonum.org/v1/gonum/graph/simple"
    11  	"gonum.org/v1/gonum/graph/topo"
    12  )
    13  
    14  // Pipeline is a directed graph of connected operators.
    15  type Pipeline struct {
    16  	Graph   *simple.DirectedGraph
    17  	running bool
    18  }
    19  
    20  // Start will start the operators in a pipeline in reverse topological order.
    21  func (p *Pipeline) Start() error {
    22  	if p.running {
    23  		return nil
    24  	}
    25  
    26  	sortedNodes, _ := topo.Sort(p.Graph)
    27  	for i := len(sortedNodes) - 1; i >= 0; i-- {
    28  		operator := sortedNodes[i].(OperatorNode).Operator()
    29  		operator.Logger().Debug("Starting operator")
    30  		if err := operator.Start(); err != nil {
    31  			return err
    32  		}
    33  		operator.Logger().Debug("Started operator")
    34  	}
    35  
    36  	p.running = true
    37  	return nil
    38  }
    39  
    40  // Stop will stop the operators in a pipeline in topological order.
    41  func (p *Pipeline) Stop() {
    42  	if !p.running {
    43  		return
    44  	}
    45  
    46  	sortedNodes, _ := topo.Sort(p.Graph)
    47  	for _, node := range sortedNodes {
    48  		operator := node.(OperatorNode).Operator()
    49  		operator.Logger().Debug("Stopping operator")
    50  		_ = operator.Stop()
    51  		operator.Logger().Debug("Stopped operator")
    52  	}
    53  
    54  	p.running = false
    55  }
    56  
    57  // MarshalDot will encode the pipeline as a dot graph.
    58  func (p *Pipeline) MarshalDot() ([]byte, error) {
    59  	return dot.Marshal(p.Graph, "G", "", " ")
    60  }
    61  
    62  // addNodes will add operators as nodes to the supplied graph.
    63  func addNodes(graph *simple.DirectedGraph, operators []operator.Operator) error {
    64  	for _, operator := range operators {
    65  		operatorNode := createOperatorNode(operator)
    66  		if graph.Node(operatorNode.ID()) != nil {
    67  			return errors.NewError(
    68  				fmt.Sprintf("operator with id '%s' already exists in pipeline", operatorNode.Operator().ID()),
    69  				"ensure that each operator has a unique `type` or `id`",
    70  			)
    71  		}
    72  
    73  		graph.AddNode(operatorNode)
    74  	}
    75  	return nil
    76  }
    77  
    78  // connectNodes will connect the nodes in the supplied graph.
    79  func connectNodes(graph *simple.DirectedGraph) error {
    80  	nodes := graph.Nodes()
    81  	for nodes.Next() {
    82  		node := nodes.Node().(OperatorNode)
    83  		if err := connectNode(graph, node); err != nil {
    84  			return err
    85  		}
    86  	}
    87  
    88  	if _, err := topo.Sort(graph); err != nil {
    89  		return errors.NewError(
    90  			"pipeline has a circular dependency",
    91  			"ensure that all operators are connected in a straight, acyclic line",
    92  			"cycles", unorderableToCycles(err.(topo.Unorderable)),
    93  		)
    94  	}
    95  
    96  	return nil
    97  }
    98  
    99  // connectNode will connect a node to its outputs in the supplied graph.
   100  func connectNode(graph *simple.DirectedGraph, inputNode OperatorNode) error {
   101  	for outputOperatorID, outputNodeID := range inputNode.OutputIDs() {
   102  		if graph.Node(outputNodeID) == nil {
   103  			return errors.NewError(
   104  				"operators cannot be connected, because the output does not exist in the pipeline",
   105  				"ensure that the output operator is defined",
   106  				"input_operator", inputNode.Operator().ID(),
   107  				"output_operator", outputOperatorID,
   108  			)
   109  		}
   110  
   111  		outputNode := graph.Node(outputNodeID).(OperatorNode)
   112  		if !outputNode.Operator().CanProcess() {
   113  			return errors.NewError(
   114  				"operators cannot be connected, because the output operator can not process logs",
   115  				"ensure that the output operator can process logs (like a parser or destination)",
   116  				"input_operator", inputNode.Operator().ID(),
   117  				"output_operator", outputOperatorID,
   118  			)
   119  		}
   120  
   121  		if graph.HasEdgeFromTo(inputNode.ID(), outputNodeID) {
   122  			return errors.NewError(
   123  				"operators cannot be connected, because a connection already exists",
   124  				"ensure that only a single connection exists between the two operators",
   125  				"input_operator", inputNode.Operator().ID(),
   126  				"output_operator", outputOperatorID,
   127  			)
   128  		}
   129  
   130  		edge := graph.NewEdge(inputNode, outputNode)
   131  		graph.SetEdge(edge)
   132  	}
   133  
   134  	return nil
   135  }
   136  
   137  // setOperatorOutputs will set the outputs on operators that can output.
   138  func setOperatorOutputs(operators []operator.Operator) error {
   139  	for _, operator := range operators {
   140  		if !operator.CanOutput() {
   141  			continue
   142  		}
   143  
   144  		if err := operator.SetOutputs(operators); err != nil {
   145  			return errors.WithDetails(err, "operator_id", operator.ID())
   146  		}
   147  	}
   148  	return nil
   149  }
   150  
   151  // NewPipeline creates a new pipeline of connected operators.
   152  func NewPipeline(operators []operator.Operator) (*Pipeline, error) {
   153  	if err := setOperatorOutputs(operators); err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	graph := simple.NewDirectedGraph()
   158  	if err := addNodes(graph, operators); err != nil {
   159  		return nil, err
   160  	}
   161  
   162  	if err := connectNodes(graph); err != nil {
   163  		return nil, err
   164  	}
   165  
   166  	return &Pipeline{Graph: graph}, nil
   167  }
   168  
   169  func unorderableToCycles(err topo.Unorderable) string {
   170  	var cycles strings.Builder
   171  	for i, cycle := range err {
   172  		if i != 0 {
   173  			cycles.WriteByte(',')
   174  		}
   175  		cycles.WriteByte('(')
   176  		for _, node := range cycle {
   177  			cycles.WriteString(node.(OperatorNode).operator.ID())
   178  			cycles.Write([]byte(` -> `))
   179  		}
   180  		cycles.WriteString(cycle[0].(OperatorNode).operator.ID())
   181  		cycles.WriteByte(')')
   182  	}
   183  	return cycles.String()
   184  }