kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/analysis/proxy/proxy.go (about)

     1  /*
     2   * Copyright 2017 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package proxy implements a wrapper that proxies an analysis request to a
    18  // Kythe indexer that does not speak protocol buffers or RPCs.
    19  //
    20  // Communication with the underlying indexer is via JSON over pipes, with
    21  // messages from the proxy to the indexer on one side and replies on the other.
    22  // Each JSON message is sent as a single complete line terminated by a newline
    23  // (LF) character.
    24  //
    25  // Communication between proxy and indexer is in lock-step; there is only one
    26  // transaction active at a time in either direction and the indexer initiates
    27  // all requests.
    28  //
    29  // The protocol between indexer (X) and proxy (P) is:
    30  //
    31  //	X → P: {"req":"analysis"}<LF>
    32  //	P → X: {"rsp":"ok","args":{"unit":<unit>,"rev":<revision>,"fds":<addr>}}<LF>
    33  //	       {"rsp":"error","args":<error>}<LF>
    34  //
    35  //	X → P: {"req":"analysis_wire"}<LF>
    36  //	P → X: {"rsp":"ok","args":{"unit":<unit_wire>,"rev":<revision>,"fds":<addr>}}<LF>
    37  //	       {"rsp":"error","args":<error>}<LF>
    38  //
    39  //	X → P: {"req":"output","args":[<entry>...]}<LF>
    40  //	P → X: {"rsp":"ok"}<LF>
    41  //	       {"rsp":"error","args":<error>}<LF>
    42  //
    43  //	X → P: {"req":"output_wire","args":[<entry_wire>...]}<LF>
    44  //	P → X: {"rsp":"ok"}<LF>
    45  //	       {"rsp":"error","args":<error>}<LF>
    46  //
    47  //	X → P: {"req":"done","args":{"ok":true,"msg":<error>}}<LF>
    48  //	P → X: {"rsp":"ok"}<LF>
    49  //	       {"rsp":"error","args":<error>}<LF>
    50  //
    51  //	X → P: {"req":"file","args":{"path":<path>,"digest":<digest>}}<LF>
    52  //	P → X: {"rsp":"ok","args":{"path":<path>,"digest":<digest>,"content":<bytes>}}<LF>
    53  //	       {"rsp":"error","args":<error>}<LF>
    54  //
    55  // Where:
    56  //
    57  //	<addr>       -- service address
    58  //	<bytes>      -- BASE-64 encoded bytes (string)
    59  //	<digest>     -- file content digest (string)
    60  //	<entry>      -- JSON encoded kythe.proto.Entry message
    61  //	<entry_wire> -- BASE-64 encoded kythe.proto.Entry message in wire format (string)
    62  //	<error>      -- error diagnostic (string)
    63  //	<path>       -- file path (string)
    64  //	<revision>   -- revision marker (string)
    65  //	<unit>       -- JSON encoded kythe.proto.CompilationUnit message
    66  //	<unit_wire>  -- BASE-64 encoded kythe.proto.CompilationUnit message in wire format (string)
    67  //	<LF>         -- line feed character (decimal code 10)
    68  //
    69  // When rsp="error" in a reply, the args are an error string. The ordinary flow
    70  // for the indexer is:
    71  //
    72  //	{"req":"analysis"}   -- to start a new analysis task
    73  //	{"req":"output",...} -- to send outputs for the task
    74  //	{"req":"file",...}   -- to fetch required input files.
    75  //	    ... (as many times as needed) ...
    76  //	{"req":"done",...}   -- to mark the analysis as complete
    77  //	                        and to report success/failure
    78  //
    79  // The proxy supports an extra /kythe/code/json fact.  Its value will be
    80  // interpreted as a JSON-encoded kythe.proto.common.MarkedSource message and
    81  // will be rewritted to the equivalent wire-encoded /kythe/code fact.
    82  //
    83  // In case of an indexing error, the indexer is free to terminate the analysis
    84  // early and report {"req":"done","args":{"ok":false}} to the driver.
    85  //
    86  // In case of an error writing output data, the driver will report an error in
    87  // response to the "output" call. Once this happens, the analysis is abandoned
    88  // as if the indexer had called "done". Subsequent calls to "output" or "done"
    89  // will behave as if no analysis is in progress. The indexer is free at that
    90  // point to start a new analysis.
    91  package proxy // import "kythe.io/kythe/go/platform/analysis/proxy"
    92  
    93  import (
    94  	"encoding/json"
    95  	"errors"
    96  	"fmt"
    97  	"io"
    98  
    99  	"kythe.io/kythe/go/util/log"
   100  	"kythe.io/kythe/go/util/schema/facts"
   101  
   102  	"google.golang.org/protobuf/encoding/protojson"
   103  	"google.golang.org/protobuf/proto"
   104  
   105  	apb "kythe.io/kythe/proto/analysis_go_proto"
   106  	cpb "kythe.io/kythe/proto/common_go_proto"
   107  	spb "kythe.io/kythe/proto/storage_go_proto"
   108  )
   109  
   110  // New returns a proxy that reads requests from in and writes responses to out.
   111  // The caller must invoke Run to start processing requests.
   112  func New(in io.Reader, out io.Writer) *Proxy {
   113  	return &Proxy{
   114  		in:  json.NewDecoder(in),
   115  		out: json.NewEncoder(out),
   116  	}
   117  }
   118  
   119  // A request represents a request from the indexer to the proxy.
   120  type request struct {
   121  	Type string          `json:"req"` // the type of the request
   122  	Args json.RawMessage `json:"args,omitempty"`
   123  }
   124  
   125  // A response represents a response from the proxy to the indexer.
   126  type response struct {
   127  	Status string `json:"rsp"` // status message
   128  	Args   any    `json:"args,omitempty"`
   129  }
   130  
   131  // A unit represents a compilation unit (in JSON form) to be analyzed.
   132  type unit struct {
   133  	Unit            json.RawMessage `json:"unit"`
   134  	Revision        string          `json:"rev,omitempty"`
   135  	FileDataService string          `json:"fds,omitempty"`
   136  }
   137  
   138  // A unit represents a compilation unit (in base64-encoded wire format) to be analyzed.
   139  type unitWire struct {
   140  	Unit            []byte `json:"unit"`
   141  	Revision        string `json:"rev,omitempty"`
   142  	FileDataService string `json:"fds,omitempty"`
   143  }
   144  
   145  // A file represents a file request or content reply.
   146  type file struct {
   147  	Path    string `json:"path,omitempty"`
   148  	Digest  string `json:"digest,omitempty"`
   149  	Content []byte `json:"content,omitempty"`
   150  }
   151  
   152  // A status represents a status message from a "done" request.
   153  type status struct {
   154  	OK      bool   `json:"ok,omitempty"`
   155  	Message string `json:"msg,omitempty"`
   156  }
   157  
   158  // A Proxy processes requests from an indexer subprocess on the other end of a
   159  // pipe by dispatching them to a callback handler.  The caller must invoke Run
   160  // to start processing requests.
   161  type Proxy struct {
   162  	in  *json.Decoder // input from the indexer
   163  	out *json.Encoder // output to the indexer
   164  	err error
   165  }
   166  
   167  // Run blocks serving requests from the indexer until communication with the
   168  // indexer reports an error. The resulting error is returned unless it is
   169  // io.EOF, in which case Run returns nil.
   170  func (p *Proxy) Run(h Handler) error {
   171  	var hasReq bool // true when a request is being processed
   172  	for {
   173  		var req request
   174  		if err := p.in.Decode(&req); err == io.EOF {
   175  			return nil
   176  		} else if err != nil {
   177  			return err
   178  		}
   179  
   180  		switch req.Type {
   181  		case "analysis":
   182  			// Prerequisite: There is no analysis request already in progress.
   183  			if hasReq {
   184  				p.reply("error", "an analysis is already in progress")
   185  			} else if req, err := h.Analysis(); err != nil {
   186  				p.reply("error", err.Error())
   187  			} else {
   188  				hasReq = true
   189  				u, err := protojson.MarshalOptions{UseProtoNames: true}.Marshal(req.Compilation)
   190  				if err != nil {
   191  					return fmt.Errorf("error marshalling compilation unit as JSON: %w", err)
   192  				}
   193  				p.reply("ok", &unit{
   194  					Unit:            json.RawMessage(u),
   195  					Revision:        req.Revision,
   196  					FileDataService: req.FileDataService,
   197  				})
   198  			}
   199  
   200  		case "analysis_wire":
   201  			// Prerequisite: There is no analysis request already in progress.
   202  			if hasReq {
   203  				p.reply("error", "an analysis is already in progress")
   204  			} else if req, err := h.Analysis(); err != nil {
   205  				p.reply("error", err.Error())
   206  			} else {
   207  				hasReq = true
   208  				u, err := proto.MarshalOptions{}.Marshal(req.Compilation)
   209  				if err != nil {
   210  					return fmt.Errorf("error marshalling compilation unit as wire format: %w", err)
   211  				}
   212  				p.reply("ok", &unitWire{
   213  					Unit:            u,
   214  					Revision:        req.Revision,
   215  					FileDataService: req.FileDataService,
   216  				})
   217  			}
   218  
   219  		case "output":
   220  			// Prerequisite: There is an analysis request in progress.
   221  			if !hasReq {
   222  				p.reply("error", "no analysis is in progress")
   223  				break
   224  			}
   225  			entries, err := decodeEntries(req.Args)
   226  			if err != nil {
   227  				p.reply("error", "invalid entries: "+err.Error())
   228  			} else if err := h.Output(entries...); err != nil {
   229  				p.reply("error", err.Error())
   230  			} else {
   231  				p.reply("ok", nil)
   232  				break
   233  			}
   234  
   235  			// Analysis is now abandoned; report the error back to the
   236  			// handler and give up.
   237  			h.Done(err)
   238  			hasReq = false
   239  
   240  		case "output_wire":
   241  			// Prerequisite: There is an analysis request in progress.
   242  			if !hasReq {
   243  				p.reply("error", "no analysis is in progress")
   244  				break
   245  			}
   246  			entries, err := decodeWireEntries(req.Args)
   247  			if err != nil {
   248  				p.reply("error", "invalid entries: "+err.Error())
   249  			} else if err := h.Output(entries...); err != nil {
   250  				p.reply("error", err.Error())
   251  			} else {
   252  				p.reply("ok", nil)
   253  				break
   254  			}
   255  
   256  			// Analysis is now abandoned; report the error back to the
   257  			// handler and give up.
   258  			h.Done(err)
   259  			hasReq = false
   260  
   261  		case "done":
   262  			// Prerequisite: There is an analysis request in progress.
   263  			if !hasReq {
   264  				p.reply("error", "no analysis is in progress")
   265  				break
   266  			}
   267  			hasReq = false
   268  			if req.Args == nil {
   269  				h.Done(nil)
   270  			} else {
   271  				var stat status
   272  				if err := json.Unmarshal(req.Args, &stat); err != nil {
   273  					h.Done(err)
   274  					p.reply("error", "invalid status: "+err.Error())
   275  					break
   276  				} else if !stat.OK {
   277  					h.Done(errors.New("analysis failed: " + stat.Message))
   278  				} else {
   279  					h.Done(nil)
   280  				}
   281  			}
   282  			p.reply("ok", nil)
   283  
   284  		case "file":
   285  			// A file request can be issued at any time, though in practice it
   286  			// may fail when no request is present since different requests may
   287  			// come from different file sources.
   288  			var info file
   289  			if err := json.Unmarshal(req.Args, &info); err != nil {
   290  				p.reply("error", "invalid file request: "+err.Error())
   291  				break
   292  			} else if info.Path == "" && info.Digest == "" {
   293  				p.reply("error", "empty file request")
   294  				break
   295  			}
   296  			bits, err := h.File(info.Path, info.Digest)
   297  			if err != nil {
   298  				p.reply("error", err.Error())
   299  			} else {
   300  				p.reply("ok", &file{
   301  					Path:    info.Path,
   302  					Digest:  info.Digest,
   303  					Content: bits,
   304  				})
   305  			}
   306  
   307  		default:
   308  			p.reply("error", "invalid request: "+req.Type)
   309  		}
   310  
   311  		if p.err != nil {
   312  			return p.err // deferred from above
   313  		}
   314  	}
   315  }
   316  
   317  const codeJSONFact = facts.Code + "/json"
   318  
   319  func rewriteEntry(e *spb.Entry) (*spb.Entry, error) {
   320  	if e.GetFactName() == codeJSONFact {
   321  		ms := new(cpb.MarkedSource)
   322  		if err := protojson.Unmarshal(e.GetFactValue(), ms); err != nil {
   323  			return nil, err
   324  		}
   325  		rec, err := proto.Marshal(ms)
   326  		if err != nil {
   327  			return nil, err
   328  		}
   329  		return &spb.Entry{
   330  			Source:    e.Source,
   331  			FactName:  facts.Code,
   332  			FactValue: rec,
   333  		}, nil
   334  	}
   335  	return e, nil
   336  }
   337  
   338  func decodeEntries(jsonArray json.RawMessage) ([]*spb.Entry, error) {
   339  	var messages []json.RawMessage
   340  	if err := json.Unmarshal(jsonArray, &messages); err != nil {
   341  		return nil, err
   342  	}
   343  	entries := make([]*spb.Entry, len(messages))
   344  	for i, msg := range messages {
   345  		e := new(spb.Entry)
   346  		if err := protojson.Unmarshal(msg, e); err != nil {
   347  			return nil, err
   348  		}
   349  		e, err := rewriteEntry(e)
   350  		if err != nil {
   351  			log.Errorf("could not rewrite entry: %v", err)
   352  			continue
   353  		}
   354  		entries[i] = e
   355  	}
   356  	return entries, nil
   357  }
   358  
   359  func decodeWireEntries(jsonArray json.RawMessage) ([]*spb.Entry, error) {
   360  	var messages [][]byte
   361  	if err := json.Unmarshal(jsonArray, &messages); err != nil {
   362  		return nil, err
   363  	}
   364  	entries := make([]*spb.Entry, len(messages))
   365  	for i, msg := range messages {
   366  		e := new(spb.Entry)
   367  		if err := (proto.UnmarshalOptions{}.Unmarshal(msg, e)); err != nil {
   368  			return nil, err
   369  		}
   370  		e, err := rewriteEntry(e)
   371  		if err != nil {
   372  			log.Errorf("could not rewrite entry: %v", err)
   373  			continue
   374  		}
   375  		entries[i] = e
   376  	}
   377  	return entries, nil
   378  }
   379  
   380  func (p *Proxy) reply(status string, args any) {
   381  	p.err = p.out.Encode(&response{
   382  		Status: status,
   383  		Args:   args,
   384  	})
   385  }
   386  
   387  // A Handler provides callbacks to handle requests issued by the indexer.
   388  type Handler interface {
   389  	// Analysis fetches a new analysis request. It is an error if there is an
   390  	// unfinished request already in flight.
   391  	Analysis() (*apb.AnalysisRequest, error)
   392  
   393  	// Output delivers output from an ongoing analysis.
   394  	Output(...*spb.Entry) error
   395  
   396  	// Done reports an ongoing analysis as complete, with the error indicating
   397  	// success or failure.
   398  	Done(error)
   399  
   400  	// File requests the contents of a file given a path and/or digest, at
   401  	// least one of which must be nonempty.
   402  	File(path, digest string) ([]byte, error)
   403  }