github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/experimental/batchmap/sumdb/build/map.go (about) 1 // Copyright 2020 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // map constructs a verifiable map from the modules in Go SumDB. 16 package main 17 18 import ( 19 "context" 20 "database/sql" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "reflect" 25 26 "github.com/apache/beam/sdks/v2/go/pkg/beam" 27 "github.com/apache/beam/sdks/v2/go/pkg/beam/io/databaseio" 28 beamlog "github.com/apache/beam/sdks/v2/go/pkg/beam/log" 29 "github.com/apache/beam/sdks/v2/go/pkg/beam/x/beamx" 30 31 "github.com/golang/glog" 32 33 "github.com/google/trillian/experimental/batchmap" 34 35 "github.com/google/trillian-examples/experimental/batchmap/sumdb/build/pipeline" 36 "github.com/google/trillian-examples/experimental/batchmap/sumdb/mapdb" 37 38 _ "github.com/mattn/go-sqlite3" 39 ) 40 41 var ( 42 sumDBString = flag.String("sum_db", "", "The path of the SQLite file generated by sumdbaudit, e.g. ~/sum.db.") 43 mapDBString = flag.String("map_db", "", "Output database where the map tiles will be written.") 44 treeID = flag.Int64("tree_id", 12345, "The ID of the tree. Used as a salt in hashing.") 45 prefixStrata = flag.Int("prefix_strata", 2, "The number of strata of 8-bit strata before the final strata.") 46 count = flag.Int64("count", -1, "The total number of entries starting from the beginning of the SumDB to use, or -1 to use all") 47 batchSize = flag.Int("write_batch_size", 250, "Number of tiles to write per batch") 48 incrementalUpdate = flag.Bool("incremental_update", false, "If set the map tiles from the previous revision will be updated with the delta, otherwise this will build the map from scratch each time.") 49 buildVersionList = flag.Bool("build_version_list", false, "If set then the map will also contain a mapping for each module to a log committing to its list of versions.") 50 ) 51 52 func init() { 53 beam.RegisterType(reflect.TypeOf((*tileToDBRowFn)(nil)).Elem()) 54 beam.RegisterFunction(tileFromDBRowFn) 55 56 beam.RegisterType(reflect.TypeOf((*logToDBRowFn)(nil)).Elem()) 57 } 58 59 func main() { 60 flag.Parse() 61 beam.Init() 62 63 // Connect to where we will read from and write to. 64 sumDB, err := newSumDBMirrorFromFlags() 65 if err != nil { 66 glog.Exitf("Failed to initialize from local SumDB: %v", err) 67 } 68 mapDB, rev, err := sinkFromFlags() 69 if err != nil { 70 glog.Exitf("Failed to initialize Map DB: %v", err) 71 } 72 73 pb := pipeline.NewMapBuilder(sumDB, *treeID, *prefixStrata, *buildVersionList) 74 75 beamlog.SetLogger(&BeamGLogger{InfoLogAtVerbosity: 2}) 76 p, s := beam.NewPipelineWithRoot() 77 78 var tiles, logs beam.PCollection 79 var inputLogMetadata pipeline.InputLogMetadata 80 if *incrementalUpdate { 81 lastMapRev, golden, startID, err := mapDB.LatestRevision() 82 if err != nil { 83 glog.Exitf("Failed to get LatestRevision: %v", err) 84 } 85 tileRows := databaseio.Query(s, "sqlite3", *mapDBString, fmt.Sprintf("SELECT * FROM tiles WHERE revision=%d", lastMapRev), reflect.TypeOf(MapTile{})) 86 lastTiles := beam.ParDo(s, tileFromDBRowFn, tileRows) 87 88 tiles, inputLogMetadata, err = pb.Update(s, lastTiles, pipeline.InputLogMetadata{ 89 Checkpoint: golden, 90 Entries: startID, 91 }, *count) 92 if err != nil { 93 glog.Exitf("Failed to build Update pipeline: %v", err) 94 } 95 } else { 96 tiles, logs, inputLogMetadata, err = pb.Create(s, *count) 97 if err != nil { 98 glog.Exitf("Failed to build Create pipeline: %v", err) 99 } 100 } 101 102 tileRows := beam.ParDo(s.Scope("convertoutput"), &tileToDBRowFn{Revision: rev}, tiles) 103 databaseio.WriteWithBatchSize(s.Scope("sink"), *batchSize, "sqlite3", *mapDBString, "tiles", []string{}, tileRows) 104 105 if *buildVersionList { 106 logRows := beam.ParDo(s, &logToDBRowFn{rev}, logs) 107 databaseio.WriteWithBatchSize(s.Scope("sinkLogs"), *batchSize, "sqlite3", *mapDBString, "logs", []string{}, logRows) 108 } 109 110 // All of the above constructs the pipeline but doesn't run it. Now we run it. 111 if err := beamx.Run(context.Background(), p); err != nil { 112 glog.Exitf("Failed to execute job: %q", err) 113 } 114 115 if err := mapDB.WriteRevision(rev, inputLogMetadata.Checkpoint, inputLogMetadata.Entries); err != nil { 116 glog.Exitf("Failed to finalize map revison %d: %v", rev, err) 117 } 118 } 119 120 func sinkFromFlags() (*mapdb.TileDB, int, error) { 121 if len(*mapDBString) == 0 { 122 return nil, 0, fmt.Errorf("missing flag: map_db") 123 } 124 125 tiledb, err := mapdb.NewTileDB(*mapDBString) 126 if err != nil { 127 return nil, 0, fmt.Errorf("failed to open map DB at %q: %v", *mapDBString, err) 128 } 129 if err := tiledb.Init(); err != nil { 130 return nil, 0, fmt.Errorf("failed to Init map DB at %q: %v", *mapDBString, err) 131 } 132 133 var rev int 134 if rev, err = tiledb.NextWriteRevision(); err != nil { 135 return nil, 0, fmt.Errorf("failed to query for next write revision: %v", err) 136 137 } 138 return tiledb, rev, nil 139 } 140 141 // LogDBRow adapts ModuleVersionLog to the schema format of the Map database to allow for databaseio writing. 142 type LogDBRow struct { 143 Revision int 144 Module string 145 Leaves []byte 146 } 147 148 type logToDBRowFn struct { 149 Revision int 150 } 151 152 func (fn *logToDBRowFn) ProcessElement(ctx context.Context, l *pipeline.ModuleVersionLog) (LogDBRow, error) { 153 bs, err := json.Marshal(l.Versions) 154 if err != nil { 155 return LogDBRow{}, err 156 } 157 return LogDBRow{ 158 Revision: fn.Revision, 159 Module: l.Module, 160 Leaves: bs, 161 }, nil 162 } 163 164 // MapTile is the schema format of the Map database to allow for databaseio writing. 165 type MapTile struct { 166 Revision int 167 Path []byte 168 Tile []byte 169 } 170 171 type tileToDBRowFn struct { 172 Revision int 173 } 174 175 func (fn *tileToDBRowFn) ProcessElement(ctx context.Context, t *batchmap.Tile) (MapTile, error) { 176 bs, err := json.Marshal(t) 177 if err != nil { 178 return MapTile{}, err 179 } 180 return MapTile{ 181 Revision: fn.Revision, 182 Path: t.Path, 183 Tile: bs, 184 }, nil 185 } 186 187 func tileFromDBRowFn(t MapTile) (*batchmap.Tile, error) { 188 var res batchmap.Tile 189 if err := json.Unmarshal(t.Tile, &res); err != nil { 190 return nil, err 191 } 192 return &res, nil 193 } 194 195 type sumDBMirror struct { 196 dbString string 197 db *sql.DB 198 } 199 200 func newSumDBMirrorFromFlags() (*sumDBMirror, error) { 201 if len(*sumDBString) == 0 { 202 return nil, fmt.Errorf("missing flag: sum_db") 203 } 204 db, err := sql.Open("sqlite3", *sumDBString) 205 return &sumDBMirror{ 206 dbString: *sumDBString, 207 db: db, 208 }, err 209 } 210 211 // Head gets the STH and the total number of entries available to process. 212 func (m *sumDBMirror) Head() ([]byte, int64, error) { 213 var cp []byte 214 var leafCount int64 215 216 if err := m.db.QueryRow("SELECT checkpoint FROM checkpoints ORDER BY datetime DESC LIMIT 1").Scan(&cp); err != nil { 217 return nil, 0, err 218 } 219 return cp, leafCount, m.db.QueryRow("SELECT COUNT(*) FROM leafMetadata").Scan(&leafCount) 220 } 221 222 // Entries returns a PCollection of Metadata, containing entries in range [start, end). 223 func (m *sumDBMirror) Entries(s beam.Scope, start, end int64) beam.PCollection { 224 return databaseio.Query(s, "sqlite3", m.dbString, fmt.Sprintf("SELECT * FROM leafMetadata WHERE id >= %d AND id < %d", start, end), reflect.TypeOf(pipeline.Metadata{})) 225 } 226 227 // BeamGLogger allows Beam to log via the glog mechanism. 228 // This is used to allow the very verbose logging output from Beam to be switched off. 229 type BeamGLogger struct { 230 InfoLogAtVerbosity glog.Level 231 } 232 233 // Log logs. 234 func (l *BeamGLogger) Log(ctx context.Context, sev beamlog.Severity, _ int, msg string) { 235 switch sev { 236 case beamlog.SevDebug: 237 glog.V(3).Info(msg) 238 case beamlog.SevInfo: 239 glog.V(l.InfoLogAtVerbosity).Info(msg) 240 case beamlog.SevError: 241 glog.Error(msg) 242 case beamlog.SevWarn: 243 glog.Warning(msg) 244 default: 245 glog.V(5).Infof("?? %s", msg) 246 } 247 }