github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/binary_transparency/firmware/cmd/ftmap/map.go (about)

     1  // Copyright 2020 Google LLC. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // map constructs a verifiable map from the firmware in the FT log.
    16  package main
    17  
    18  import (
    19  	"context"
    20  	"database/sql"
    21  	"encoding/json"
    22  	"flag"
    23  	"fmt"
    24  	"reflect"
    25  
    26  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    27  	"github.com/apache/beam/sdks/v2/go/pkg/beam/io/databaseio"
    28  	beamlog "github.com/apache/beam/sdks/v2/go/pkg/beam/log"
    29  	"github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx"
    30  
    31  	"github.com/golang/glog"
    32  
    33  	"github.com/google/trillian/experimental/batchmap"
    34  	"github.com/google/trillian/types"
    35  
    36  	"github.com/google/trillian-examples/binary_transparency/firmware/api"
    37  	"github.com/google/trillian-examples/binary_transparency/firmware/internal/ftmap"
    38  
    39  	_ "github.com/go-sql-driver/mysql"
    40  	_ "github.com/mattn/go-sqlite3"
    41  )
    42  
    43  var (
    44  	trillianMySQL = flag.String("trillian_mysql", "", "The connection string to the Trillian MySQL database.")
    45  	mapDBString   = flag.String("map_db", "", "Connection path for output database where the map tiles will be written.")
    46  	count         = flag.Int64("count", -1, "The total number of entries starting from the beginning of the log to use, or -1 to use all. This can be used to independently create maps of the same size.")
    47  	batchSize     = flag.Int("write_batch_size", 250, "Number of tiles to write per batch")
    48  )
    49  
    50  func init() {
    51  	beam.RegisterType(reflect.TypeOf((*tileToDBRowFn)(nil)).Elem())
    52  	beam.RegisterType(reflect.TypeOf((*logToDBRowFn)(nil)).Elem())
    53  	beam.RegisterType(reflect.TypeOf((*aggToDBRowFn)(nil)).Elem())
    54  }
    55  
    56  func main() {
    57  	flag.Parse()
    58  	beam.Init()
    59  
    60  	// Connect to where we will read from and write to.
    61  	trillianDB, err := newTrillianDBFromFlags()
    62  	if err != nil {
    63  		glog.Exitf("Failed to initialize Trillian connection: %v", err)
    64  	}
    65  	mapDB, rev, err := sinkFromFlags()
    66  	if err != nil {
    67  		glog.Exitf("Failed to initialize Map DB: %v", err)
    68  	}
    69  
    70  	// The tree & strata config is part of the API for clients. If we make this configurable then
    71  	// there needs to be some dynamic way to get this to clients (e.g. in a MapCheckpoint).
    72  	pb := ftmap.NewMapBuilder(trillianDB, api.MapTreeID, api.MapPrefixStrata)
    73  
    74  	beamlog.SetLogger(&BeamGLogger{InfoLogAtVerbosity: 2})
    75  	p, s := beam.NewPipelineWithRoot()
    76  	result, err := pb.Create(s, *count)
    77  	if err != nil {
    78  		glog.Exitf("Failed to build Create pipeline: %v", err)
    79  	}
    80  
    81  	tileRows := beam.ParDo(s.Scope("convertTiles"), &tileToDBRowFn{Revision: rev}, result.MapTiles)
    82  	databaseio.WriteWithBatchSize(s.Scope("sinkTiles"), *batchSize, "sqlite3", *mapDBString, "tiles", []string{}, tileRows)
    83  	aggRows := beam.ParDo(s.Scope("convertAgg"), &aggToDBRowFn{Revision: rev}, result.AggregatedFirmware)
    84  	databaseio.WriteWithBatchSize(s.Scope("sinkAgg"), *batchSize, "sqlite3", *mapDBString, "aggregations", []string{}, aggRows)
    85  	logRows := beam.ParDo(s, &logToDBRowFn{rev}, result.DeviceLogs)
    86  	databaseio.WriteWithBatchSize(s.Scope("sinkLogs"), *batchSize, "sqlite3", *mapDBString, "logs", []string{}, logRows)
    87  
    88  	// All of the above constructs the pipeline but doesn't run it. Now we run it.
    89  	if err := beamx.Run(context.Background(), p); err != nil {
    90  		glog.Exitf("Failed to execute job: %q", err)
    91  	}
    92  
    93  	// Now write the revision metadata to finalize this map construction.
    94  	if err := mapDB.WriteRevision(rev, result.Metadata.Checkpoint, result.Metadata.Entries); err != nil {
    95  		glog.Exitf("Failed to finalize map revison %d: %v", rev, err)
    96  	}
    97  }
    98  
    99  func sinkFromFlags() (*ftmap.MapDB, int, error) {
   100  	if len(*mapDBString) == 0 {
   101  		return nil, 0, fmt.Errorf("missing flag: map_db")
   102  	}
   103  
   104  	mapDB, err := ftmap.NewMapDB(*mapDBString)
   105  	if err != nil {
   106  		return nil, 0, fmt.Errorf("failed to open map DB at %q: %v", *mapDBString, err)
   107  	}
   108  
   109  	var rev int
   110  	if rev, err = mapDB.NextWriteRevision(); err != nil {
   111  		return nil, 0, fmt.Errorf("failed to query for next write revision: %v", err)
   112  
   113  	}
   114  	return mapDB, rev, nil
   115  }
   116  
   117  // LogDBRow adapts DeviceReleaseLog to the schema format of the Map database to allow for databaseio writing.
   118  type LogDBRow struct {
   119  	Revision int
   120  	DeviceID string
   121  	Leaves   []byte
   122  }
   123  
   124  type logToDBRowFn struct {
   125  	Revision int
   126  }
   127  
   128  func (fn *logToDBRowFn) ProcessElement(ctx context.Context, l *api.DeviceReleaseLog) (LogDBRow, error) {
   129  	bs, err := json.Marshal(l.Revisions)
   130  	if err != nil {
   131  		return LogDBRow{}, err
   132  	}
   133  	return LogDBRow{
   134  		Revision: fn.Revision,
   135  		DeviceID: l.DeviceID,
   136  		Leaves:   bs,
   137  	}, nil
   138  }
   139  
   140  // MapTile is the schema format of the Map database to allow for databaseio writing.
   141  type MapTile struct {
   142  	Revision int
   143  	Path     []byte
   144  	Tile     []byte
   145  }
   146  
   147  type tileToDBRowFn struct {
   148  	Revision int
   149  }
   150  
   151  func (fn *tileToDBRowFn) ProcessElement(ctx context.Context, t *batchmap.Tile) (MapTile, error) {
   152  	bs, err := json.Marshal(t)
   153  	if err != nil {
   154  		return MapTile{}, err
   155  	}
   156  	return MapTile{
   157  		Revision: fn.Revision,
   158  		Path:     t.Path,
   159  		Tile:     bs,
   160  	}, nil
   161  }
   162  
   163  // AggregatedFirmwareDBRow adapts AggregatedFirmware to the schema format of the Map database to allow for databaseio writing.
   164  type AggregatedFirmwareDBRow struct {
   165  	// The keys are the index of the FW Log Metadata that was aggregated, and map Revision number.
   166  	FWLogIndex uint64
   167  	Revision   int
   168  
   169  	// The value is the summary of the aggregated information. Thus far, a bool for whether it's considered good.
   170  	// Clients will have the other information about the FW so no need to duplicate it here.
   171  	Good int
   172  }
   173  
   174  type aggToDBRowFn struct {
   175  	Revision int
   176  }
   177  
   178  func (fn *aggToDBRowFn) ProcessElement(ctx context.Context, t *api.AggregatedFirmware) AggregatedFirmwareDBRow {
   179  	goodInt := 0
   180  	if t.Good {
   181  		goodInt = 1
   182  	}
   183  	return AggregatedFirmwareDBRow{
   184  		FWLogIndex: t.Index,
   185  		Revision:   fn.Revision,
   186  		Good:       goodInt,
   187  	}
   188  }
   189  
   190  // TODO(mhutchinson): This only works if the Trillian DB has a single tree.
   191  type trillianDB struct {
   192  	dbString string
   193  	db       *sql.DB
   194  }
   195  
   196  func newTrillianDBFromFlags() (*trillianDB, error) {
   197  	if len(*trillianMySQL) == 0 {
   198  		return nil, fmt.Errorf("missing flag: trillian_mysql")
   199  	}
   200  	db, err := sql.Open("mysql", *trillianMySQL)
   201  	return &trillianDB{
   202  		dbString: *trillianMySQL,
   203  		db:       db,
   204  	}, err
   205  }
   206  
   207  // Head gets the STH and the total number of entries available to process.
   208  func (m *trillianDB) Head() ([]byte, int64, error) {
   209  	// This implementation taken from Trillian's storage/mysql/log_storage.go#fetchLatestRoot
   210  	var timestamp, treeSize, treeRevision int64
   211  	var rootHash []byte
   212  	if err := m.db.QueryRow("SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision FROM TreeHead ORDER BY TreeRevision DESC LIMIT 1").Scan(
   213  		&timestamp, &treeSize, &rootHash, &treeRevision,
   214  	); err != nil {
   215  		// It's possible there are no roots for this tree yet
   216  		return []byte{}, 0, fmt.Errorf("failed to read TreeHead table: %w", err)
   217  	}
   218  
   219  	// Put logRoot back together. Fortunately LogRoot has a deterministic serialization.
   220  	cp, err := (&types.LogRootV1{
   221  		RootHash:       rootHash,
   222  		TimestampNanos: uint64(timestamp),
   223  		Revision:       uint64(treeRevision),
   224  		TreeSize:       uint64(treeSize),
   225  	}).MarshalBinary()
   226  	if err != nil {
   227  		return []byte{}, 0, fmt.Errorf("failed to marshal LogRoot: %w", err)
   228  	}
   229  	return cp, treeSize, nil
   230  }
   231  
   232  const sequencedLeafDataQuery = `
   233  SELECT
   234    s.SequenceNumber AS Seq,
   235    l.LeafValue AS Data
   236  FROM SequencedLeafData s INNER JOIN LeafData l
   237    ON s.TreeId = l.TreeId AND s.LeafIdentityHash = l.LeafIdentityHash 
   238  WHERE
   239    s.SequenceNumber >= %d AND s.SequenceNumber < %d
   240  `
   241  
   242  // Entries returns a PCollection of InputLogLeaf, containing entries in range [start, end).
   243  func (m *trillianDB) Entries(s beam.Scope, start, end int64) beam.PCollection {
   244  	return databaseio.Query(s, "mysql", m.dbString, fmt.Sprintf(sequencedLeafDataQuery, start, end), reflect.TypeOf(ftmap.InputLogLeaf{}))
   245  }
   246  
   247  // BeamGLogger allows Beam to log via the glog mechanism.
   248  // This is used to allow the very verbose logging output from Beam to be switched off.
   249  type BeamGLogger struct {
   250  	InfoLogAtVerbosity glog.Level
   251  }
   252  
   253  // Log logs.
   254  func (l *BeamGLogger) Log(ctx context.Context, sev beamlog.Severity, _ int, msg string) {
   255  	switch sev {
   256  	case beamlog.SevDebug:
   257  		glog.V(3).Info(msg)
   258  	case beamlog.SevInfo:
   259  		glog.V(l.InfoLogAtVerbosity).Info(msg)
   260  	case beamlog.SevError:
   261  		glog.Error(msg)
   262  	case beamlog.SevWarn:
   263  		glog.Warning(msg)
   264  	default:
   265  		glog.V(5).Infof("?? %s", msg)
   266  	}
   267  }