github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/binary_transparency/firmware/cmd/ftmap/map.go (about) 1 // Copyright 2020 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // map constructs a verifiable map from the firmware in the FT log. 16 package main 17 18 import ( 19 "context" 20 "database/sql" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "reflect" 25 26 "github.com/apache/beam/sdks/v2/go/pkg/beam" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/databaseio" 28 beamlog "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 30 31 "github.com/golang/glog" 32 33 "github.com/google/trillian/experimental/batchmap" 34 "github.com/google/trillian/types" 35 36 "github.com/google/trillian-examples/binary_transparency/firmware/api" 37 "github.com/google/trillian-examples/binary_transparency/firmware/internal/ftmap" 38 39 _ "github.com/go-sql-driver/mysql" 40 _ "github.com/mattn/go-sqlite3" 41 ) 42 43 var ( 44 trillianMySQL = flag.String("trillian_mysql", "", "The connection string to the Trillian MySQL database.") 45 mapDBString = flag.String("map_db", "", "Connection path for output database where the map tiles will be written.") 46 count = flag.Int64("count", -1, "The total number of entries starting from the beginning of the log to use, or -1 to use all. This can be used to independently create maps of the same size.") 47 batchSize = flag.Int("write_batch_size", 250, "Number of tiles to write per batch") 48 ) 49 50 func init() { 51 beam.RegisterType(reflect.TypeOf((*tileToDBRowFn)(nil)).Elem()) 52 beam.RegisterType(reflect.TypeOf((*logToDBRowFn)(nil)).Elem()) 53 beam.RegisterType(reflect.TypeOf((*aggToDBRowFn)(nil)).Elem()) 54 } 55 56 func main() { 57 flag.Parse() 58 beam.Init() 59 60 // Connect to where we will read from and write to. 61 trillianDB, err := newTrillianDBFromFlags() 62 if err != nil { 63 glog.Exitf("Failed to initialize Trillian connection: %v", err) 64 } 65 mapDB, rev, err := sinkFromFlags() 66 if err != nil { 67 glog.Exitf("Failed to initialize Map DB: %v", err) 68 } 69 70 // The tree & strata config is part of the API for clients. If we make this configurable then 71 // there needs to be some dynamic way to get this to clients (e.g. in a MapCheckpoint). 72 pb := ftmap.NewMapBuilder(trillianDB, api.MapTreeID, api.MapPrefixStrata) 73 74 beamlog.SetLogger(&BeamGLogger{InfoLogAtVerbosity: 2}) 75 p, s := beam.NewPipelineWithRoot() 76 result, err := pb.Create(s, *count) 77 if err != nil { 78 glog.Exitf("Failed to build Create pipeline: %v", err) 79 } 80 81 tileRows := beam.ParDo(s.Scope("convertTiles"), &tileToDBRowFn{Revision: rev}, result.MapTiles) 82 databaseio.WriteWithBatchSize(s.Scope("sinkTiles"), *batchSize, "sqlite3", *mapDBString, "tiles", []string{}, tileRows) 83 aggRows := beam.ParDo(s.Scope("convertAgg"), &aggToDBRowFn{Revision: rev}, result.AggregatedFirmware) 84 databaseio.WriteWithBatchSize(s.Scope("sinkAgg"), *batchSize, "sqlite3", *mapDBString, "aggregations", []string{}, aggRows) 85 logRows := beam.ParDo(s, &logToDBRowFn{rev}, result.DeviceLogs) 86 databaseio.WriteWithBatchSize(s.Scope("sinkLogs"), *batchSize, "sqlite3", *mapDBString, "logs", []string{}, logRows) 87 88 // All of the above constructs the pipeline but doesn't run it. Now we run it. 89 if err := beamx.Run(context.Background(), p); err != nil { 90 glog.Exitf("Failed to execute job: %q", err) 91 } 92 93 // Now write the revision metadata to finalize this map construction. 94 if err := mapDB.WriteRevision(rev, result.Metadata.Checkpoint, result.Metadata.Entries); err != nil { 95 glog.Exitf("Failed to finalize map revison %d: %v", rev, err) 96 } 97 } 98 99 func sinkFromFlags() (*ftmap.MapDB, int, error) { 100 if len(*mapDBString) == 0 { 101 return nil, 0, fmt.Errorf("missing flag: map_db") 102 } 103 104 mapDB, err := ftmap.NewMapDB(*mapDBString) 105 if err != nil { 106 return nil, 0, fmt.Errorf("failed to open map DB at %q: %v", *mapDBString, err) 107 } 108 109 var rev int 110 if rev, err = mapDB.NextWriteRevision(); err != nil { 111 return nil, 0, fmt.Errorf("failed to query for next write revision: %v", err) 112 113 } 114 return mapDB, rev, nil 115 } 116 117 // LogDBRow adapts DeviceReleaseLog to the schema format of the Map database to allow for databaseio writing. 118 type LogDBRow struct { 119 Revision int 120 DeviceID string 121 Leaves []byte 122 } 123 124 type logToDBRowFn struct { 125 Revision int 126 } 127 128 func (fn *logToDBRowFn) ProcessElement(ctx context.Context, l *api.DeviceReleaseLog) (LogDBRow, error) { 129 bs, err := json.Marshal(l.Revisions) 130 if err != nil { 131 return LogDBRow{}, err 132 } 133 return LogDBRow{ 134 Revision: fn.Revision, 135 DeviceID: l.DeviceID, 136 Leaves: bs, 137 }, nil 138 } 139 140 // MapTile is the schema format of the Map database to allow for databaseio writing. 141 type MapTile struct { 142 Revision int 143 Path []byte 144 Tile []byte 145 } 146 147 type tileToDBRowFn struct { 148 Revision int 149 } 150 151 func (fn *tileToDBRowFn) ProcessElement(ctx context.Context, t *batchmap.Tile) (MapTile, error) { 152 bs, err := json.Marshal(t) 153 if err != nil { 154 return MapTile{}, err 155 } 156 return MapTile{ 157 Revision: fn.Revision, 158 Path: t.Path, 159 Tile: bs, 160 }, nil 161 } 162 163 // AggregatedFirmwareDBRow adapts AggregatedFirmware to the schema format of the Map database to allow for databaseio writing. 164 type AggregatedFirmwareDBRow struct { 165 // The keys are the index of the FW Log Metadata that was aggregated, and map Revision number. 166 FWLogIndex uint64 167 Revision int 168 169 // The value is the summary of the aggregated information. Thus far, a bool for whether it's considered good. 170 // Clients will have the other information about the FW so no need to duplicate it here. 171 Good int 172 } 173 174 type aggToDBRowFn struct { 175 Revision int 176 } 177 178 func (fn *aggToDBRowFn) ProcessElement(ctx context.Context, t *api.AggregatedFirmware) AggregatedFirmwareDBRow { 179 goodInt := 0 180 if t.Good { 181 goodInt = 1 182 } 183 return AggregatedFirmwareDBRow{ 184 FWLogIndex: t.Index, 185 Revision: fn.Revision, 186 Good: goodInt, 187 } 188 } 189 190 // TODO(mhutchinson): This only works if the Trillian DB has a single tree. 191 type trillianDB struct { 192 dbString string 193 db *sql.DB 194 } 195 196 func newTrillianDBFromFlags() (*trillianDB, error) { 197 if len(*trillianMySQL) == 0 { 198 return nil, fmt.Errorf("missing flag: trillian_mysql") 199 } 200 db, err := sql.Open("mysql", *trillianMySQL) 201 return &trillianDB{ 202 dbString: *trillianMySQL, 203 db: db, 204 }, err 205 } 206 207 // Head gets the STH and the total number of entries available to process. 208 func (m *trillianDB) Head() ([]byte, int64, error) { 209 // This implementation taken from Trillian's storage/mysql/log_storage.go#fetchLatestRoot 210 var timestamp, treeSize, treeRevision int64 211 var rootHash []byte 212 if err := m.db.QueryRow("SELECT TreeHeadTimestamp,TreeSize,RootHash,TreeRevision FROM TreeHead ORDER BY TreeRevision DESC LIMIT 1").Scan( 213 ×tamp, &treeSize, &rootHash, &treeRevision, 214 ); err != nil { 215 // It's possible there are no roots for this tree yet 216 return []byte{}, 0, fmt.Errorf("failed to read TreeHead table: %w", err) 217 } 218 219 // Put logRoot back together. Fortunately LogRoot has a deterministic serialization. 220 cp, err := (&types.LogRootV1{ 221 RootHash: rootHash, 222 TimestampNanos: uint64(timestamp), 223 Revision: uint64(treeRevision), 224 TreeSize: uint64(treeSize), 225 }).MarshalBinary() 226 if err != nil { 227 return []byte{}, 0, fmt.Errorf("failed to marshal LogRoot: %w", err) 228 } 229 return cp, treeSize, nil 230 } 231 232 const sequencedLeafDataQuery = ` 233 SELECT 234 s.SequenceNumber AS Seq, 235 l.LeafValue AS Data 236 FROM SequencedLeafData s INNER JOIN LeafData l 237 ON s.TreeId = l.TreeId AND s.LeafIdentityHash = l.LeafIdentityHash 238 WHERE 239 s.SequenceNumber >= %d AND s.SequenceNumber < %d 240 ` 241 242 // Entries returns a PCollection of InputLogLeaf, containing entries in range [start, end). 243 func (m *trillianDB) Entries(s beam.Scope, start, end int64) beam.PCollection { 244 return databaseio.Query(s, "mysql", m.dbString, fmt.Sprintf(sequencedLeafDataQuery, start, end), reflect.TypeOf(ftmap.InputLogLeaf{})) 245 } 246 247 // BeamGLogger allows Beam to log via the glog mechanism. 248 // This is used to allow the very verbose logging output from Beam to be switched off. 249 type BeamGLogger struct { 250 InfoLogAtVerbosity glog.Level 251 } 252 253 // Log logs. 254 func (l *BeamGLogger) Log(ctx context.Context, sev beamlog.Severity, _ int, msg string) { 255 switch sev { 256 case beamlog.SevDebug: 257 glog.V(3).Info(msg) 258 case beamlog.SevInfo: 259 glog.V(l.InfoLogAtVerbosity).Info(msg) 260 case beamlog.SevError: 261 glog.Error(msg) 262 case beamlog.SevWarn: 263 glog.Warning(msg) 264 default: 265 glog.V(5).Infof("?? %s", msg) 266 } 267 }