github.com/quay/claircore@v1.5.28/rpm/bdb/bdb.go (about) 1 package bdb 2 3 import ( 4 "context" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "io" 9 10 "github.com/quay/zlog" 11 ) 12 13 // PackageDB is the "pkgdb" a.k.a. "Packages", the raw package data. 14 type PackageDB struct { 15 r io.ReaderAt 16 ord binary.ByteOrder 17 m hashmeta 18 } 19 20 // Parse closes over the provided [io.ReaderAt] and readies the provided PackageDB. 21 func (db *PackageDB) Parse(r io.ReaderAt) error { 22 const ( 23 hashmagic = 0x00061561 24 hashmagicBE = 0x61150600 25 ) 26 db.ord = binary.LittleEndian 27 Again: 28 pg := io.NewSectionReader(r, 0, 512) 29 if err := binary.Read(pg, db.ord, &db.m); err != nil { 30 return err 31 } 32 if db.m.Magic == hashmagicBE { 33 // Swap, try again. 34 db.ord = binary.BigEndian 35 goto Again 36 } 37 38 if db.m.Magic != hashmagic { 39 return fmt.Errorf("bdb: nonsense magic: %08x", db.m.Magic) 40 } 41 if db.m.Type != pagetypeHashMeta { 42 return fmt.Errorf("bdb: nonsense page type: %08x", db.m.Type) 43 } 44 if db.m.EncryptAlg != 0 { // none 45 return errors.New("bdb: database encryption not supported") 46 } 47 ok := false 48 for i := 0; i < 8; i++ { 49 var sz uint32 = (1 << i) * 512 50 if db.m.PageSize == sz { 51 ok = true 52 break 53 } 54 } 55 if !ok { 56 return fmt.Errorf("bdb: nonsense page size: %d", db.m.PageSize) 57 } 58 59 db.r = r 60 return nil 61 } 62 63 /* 64 Some terminology: 65 66 - LSN: 67 Log Sequence Number -- Needed for detecting stale writes, I think. 68 This package ignores it. 69 70 Note that the page type always falls in byte 25 -- very clever. 71 Don't freak out if it looks like the first page is read multiple ways; it is. 72 73 See also: libdb's src/dbinc/db_page.h 74 */ 75 76 // Meta is the generic metadata, aka DBMETA in C. 77 type meta struct { 78 LSN [8]byte /* 00-07: LSN. */ 79 PageNo uint32 /* 08-11: Current page number. */ 80 Magic uint32 /* 12-15: Magic number. */ 81 Version uint32 /* 16-19: Version. */ 82 PageSize uint32 /* 20-23: Pagesize. */ 83 EncryptAlg byte /* 24: Encryption algorithm. */ 84 Type byte /* 25: Page type. */ 85 Metaflags byte /* 26: Meta-only flags */ 86 _ byte /* 27: Unused. */ 87 Free uint32 /* 28-31: Free list page number. */ 88 LastPageNo uint32 /* 32-35: Page number of last page in db. */ 89 NParts uint32 /* 36-39: Number of partitions. */ 90 KeyCount uint32 /* 40-43: Cached key count. */ 91 RecordCount uint32 /* 44-47: Cached record count. */ 92 Flags uint32 /* 48-51: Flags: unique to each AM. */ 93 UID [20]byte /* 52-71: Unique file ID. */ 94 } 95 96 // Pagetype numbers: 97 const ( 98 pagetypeHashMeta = 8 99 pagetypeHashUnsorted = 2 100 pagetypeHash = 13 101 pagetypeHashOffIndex = 3 102 pagetypeOverflow = 7 103 pagetypeKeyData = 1 // Disused, we never examine the keys. 104 ) 105 106 // Serialized sizes: 107 const ( 108 hashpageSize = 26 109 hashoffpageSize = 12 110 ) 111 112 // Hash database metadata, aka HMETA in C. 113 type hashmeta struct { 114 meta /* 00-71: Generic meta-data page header. */ 115 MaxBucket uint32 /* 72-75: ID of Maximum bucket in use */ 116 HighMask uint32 /* 76-79: Modulo mask into table */ 117 LowMask uint32 /* 80-83: Modulo mask into table lower half */ 118 FllFactor uint32 /* 84-87: Fill factor */ 119 NElem uint32 /* 88-91: Number of keys in hash table */ 120 HashCharKey uint32 /* 92-95: Value of hash(CHARKEY) */ 121 _ [32]uint32 /* 96-223: Spare pages for overflow */ 122 _ [59]uint32 /* 224-459: Unused space */ 123 CryptoMagic uint32 /* 460-463: Crypto magic number */ 124 _ [3]uint32 /* 464-475: Trash space - Do not use */ 125 // The comments don't line up, but the numbers come from the source, so... 126 IV [16]byte /* 476-495: Crypto IV */ 127 Checksum [20]byte /* 496-511: Page chksum */ 128 } 129 130 // Hash page header, aka PAGE in C. 131 // 132 // Also shared with btree databases, which are unimplemented here. 133 // The [meta.PageSize] block of memory has this struct at position 0, then 134 // populates it backwards from the end for structured data, or immediately after 135 // this for binary data. 136 type hashpage struct { 137 LSN [8]byte /* 00-07: Log sequence number. */ 138 PageNo uint32 /* 08-11: Current page number. */ 139 PrevPageNo uint32 /* 12-15: Previous page number. */ 140 NextPageNo uint32 /* 16-19: Next page number. */ 141 Entries uint16 /* 20-21: Number of items on the page. */ 142 HighFreeOffset uint16 /* 22-23: High free byte page offset. */ 143 Level byte /* 24: Btree tree level. */ 144 Type byte /* 25: Page type. */ 145 } 146 147 // Hash page entries. 148 // 149 // This data structure doesn't appear directly in the C source, but open a file 150 // in a hex editor and it's apparent. The comments mention that "For hash and 151 // btree leaf pages, index items are paired, e.g., inp[0] is the key for 152 // inp[1]'s data." I think this is just a codification of that. 153 // 154 // We never bother looking up the key. If access to a single, specific header 155 // were needed, the code would have to handle it then. 156 type hashentry struct { 157 Key uint16 158 Data uint16 159 } 160 161 // Hash offpage header, aka HOFFPAGE in C. 162 // 163 // This stores the data on how to extract "overflow"/"offpage" data. 164 type hashoffpage struct { 165 Type byte /* 00: Page type and delete flag. */ 166 _ [3]byte /* 01-03: Padding, unused. */ 167 PageNo uint32 /* 04-07: Offpage page number. */ 168 Length uint32 /* 08-11: Total length of item. */ 169 } 170 171 // AllHeaders returns ReaderAts for all RPM headers in the PackageDB. 172 func (db *PackageDB) AllHeaders(ctx context.Context) ([]io.ReaderAt, error) { 173 var ret []io.ReaderAt 174 pageSz := int64(db.m.PageSize) 175 for n, lim := int64(0), int64(db.m.LastPageNo)+1; n < lim; n++ { 176 pg := io.NewSectionReader(db.r, n*pageSz, pageSz) 177 var h hashpage 178 if err := binary.Read(pg, db.ord, &h); err != nil { 179 return nil, fmt.Errorf("bdb: error reading hashpage: %w", err) 180 } 181 if h.Type != pagetypeHashUnsorted && h.Type != pagetypeHash { 182 continue 183 } 184 if h.Entries%2 != 0 { 185 return nil, errors.New("bdb: odd number of entries") 186 } 187 188 ent := make([]hashentry, int(h.Entries)/2) 189 for i := range ent { 190 if err := binary.Read(pg, db.ord, &ent[i]); err != nil { 191 return nil, fmt.Errorf("bdb: error reading hash entry: %w", err) 192 } 193 } 194 195 k := []byte{0x00} 196 for _, e := range ent { 197 off := int64(e.Data) 198 // First, check what kind of hash entry this is. 199 view := io.NewSectionReader(pg, off, hashoffpageSize) 200 if _, err := view.ReadAt(k, 0); err != nil { 201 return nil, fmt.Errorf("bdb: error peeking page type: %w", err) 202 } 203 if k[0] != pagetypeHashOffIndex { 204 continue 205 } 206 // Read the page header, now that we know it's correct. 207 var offpg hashoffpage 208 if err := binary.Read(view, db.ord, &offpg); err != nil { 209 return nil, fmt.Errorf("bdb: error reading hashoffpage: %w", err) 210 } 211 var r rope 212 for n := offpg.PageNo; n != 0; { 213 off := pageSz * int64(n) 214 pg := io.NewSectionReader(db.r, off, pageSz) 215 var h hashpage 216 if err := binary.Read(pg, db.ord, &h); err != nil { 217 return nil, fmt.Errorf("bdb: error reading hashpage: %w", err) 218 } 219 if h.Type != pagetypeOverflow { 220 continue 221 } 222 off += hashpageSize 223 224 var data *io.SectionReader 225 if h.NextPageNo == 0 { 226 // If this is the last page, only read to the end. 227 data = io.NewSectionReader(db.r, off, int64(h.HighFreeOffset)) 228 } else { 229 data = io.NewSectionReader(db.r, off, pageSz-hashpageSize) 230 } 231 if err := r.add(data); err != nil { 232 return nil, fmt.Errorf("bdb: error adding to rope: %w", err) 233 } 234 n = h.NextPageNo 235 } 236 // Double-check we'll read the intended amount. 237 if got, want := r.Size(), int64(offpg.Length); got != want { 238 zlog.Info(ctx). 239 Int64("got", got). 240 Int64("want", want). 241 Msg("bdb: expected data length botch") 242 } 243 ret = append(ret, &r) 244 } 245 } 246 return ret, nil 247 } 248 249 // Validate currently here to fulfil an interface. 250 func (db *PackageDB) Validate(_ context.Context) error { 251 return nil 252 } 253 254 // Rope provides an [io.ReaderAt] over an ordered slice of [io.ReaderAt]. 255 // 256 // It's much simpler than a real rope because it's append only. 257 type rope struct { 258 rd []*io.SectionReader 259 off []int64 260 } 261 262 var _ io.ReaderAt = (*rope)(nil) 263 264 // ReadAt implements [io.ReaderAt]. 265 func (r *rope) ReadAt(b []byte, off int64) (int, error) { 266 // Find start: 267 idx := 0 268 for i, roff := range r.off { 269 if roff > off { 270 break 271 } 272 idx = i 273 } 274 275 // Read as many segments as needed: 276 n := 0 277 rdoff := off - r.off[idx] // offset into the reader at "idx" 278 for { 279 rn, err := r.rd[idx].ReadAt(b[n:], rdoff) 280 n += rn 281 switch { 282 case errors.Is(err, nil): 283 case errors.Is(err, io.EOF): 284 idx++ 285 if idx != len(r.rd) { 286 rdoff = 0 // Reading from the start, now that we're on the next one. 287 continue 288 } 289 fallthrough 290 default: 291 return n, err 292 } 293 if n == len(b) { 294 break 295 } 296 } 297 return n, nil 298 } 299 300 // Size reports the total size of data that can be read from this rope. 301 func (r *rope) Size() (s int64) { 302 for _, rd := range r.rd { 303 s += rd.Size() 304 } 305 return s 306 } 307 308 // Add appends the provided [io.SectionReader]. 309 func (r *rope) add(rd *io.SectionReader) error { 310 var off int64 311 for _, rd := range r.rd { 312 off += rd.Size() 313 } 314 r.rd = append(r.rd, rd) 315 r.off = append(r.off, off) 316 return nil 317 }