github.com/status-im/status-go@v1.1.0/protocol/communities/manager_archive_file.go (about) 1 //go:build !disable_torrent 2 // +build !disable_torrent 3 4 // Attribution to Pascal Precht, for further context please view the below issues 5 // - https://github.com/status-im/status-go/issues/2563 6 // - https://github.com/status-im/status-go/issues/2565 7 // - https://github.com/status-im/status-go/issues/2567 8 // - https://github.com/status-im/status-go/issues/2568 9 10 package communities 11 12 import ( 13 "crypto/ecdsa" 14 "os" 15 "path" 16 "time" 17 18 "github.com/status-im/status-go/eth-node/crypto" 19 "github.com/status-im/status-go/eth-node/types" 20 "github.com/status-im/status-go/params" 21 "github.com/status-im/status-go/protocol/common" 22 "github.com/status-im/status-go/protocol/encryption" 23 "github.com/status-im/status-go/protocol/protobuf" 24 "github.com/status-im/status-go/signal" 25 26 "github.com/anacrolix/torrent/bencode" 27 "github.com/anacrolix/torrent/metainfo" 28 "github.com/golang/protobuf/proto" 29 "go.uber.org/zap" 30 ) 31 32 type ArchiveFileManager struct { 33 torrentConfig *params.TorrentConfig 34 35 logger *zap.Logger 36 persistence *Persistence 37 identity *ecdsa.PrivateKey 38 encryptor *encryption.Protocol 39 40 publisher Publisher 41 } 42 43 func NewArchiveFileManager(amc *ArchiveManagerConfig) *ArchiveFileManager { 44 return &ArchiveFileManager{ 45 torrentConfig: amc.TorrentConfig, 46 logger: amc.Logger, 47 persistence: amc.Persistence, 48 identity: amc.Identity, 49 encryptor: amc.Encryptor, 50 publisher: amc.Publisher, 51 } 52 } 53 54 func (m *ArchiveFileManager) createHistoryArchiveTorrent(communityID types.HexBytes, msgs []*types.Message, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) { 55 56 loadFromDB := len(msgs) == 0 57 58 from := startDate 59 to := from.Add(partition) 60 if to.After(endDate) { 61 to = endDate 62 } 63 64 archiveDir := m.torrentConfig.DataDir + "/" + communityID.String() 65 torrentDir := m.torrentConfig.TorrentDir 66 indexPath := archiveDir + "/index" 67 dataPath := archiveDir + "/data" 68 69 wakuMessageArchiveIndexProto := &protobuf.WakuMessageArchiveIndex{} 70 wakuMessageArchiveIndex := make(map[string]*protobuf.WakuMessageArchiveIndexMetadata) 71 archiveIDs := make([]string, 0) 72 73 if _, err := os.Stat(archiveDir); os.IsNotExist(err) { 74 err := os.MkdirAll(archiveDir, 0700) 75 if err != nil { 76 return archiveIDs, err 77 } 78 } 79 if _, err := os.Stat(torrentDir); os.IsNotExist(err) { 80 err := os.MkdirAll(torrentDir, 0700) 81 if err != nil { 82 return archiveIDs, err 83 } 84 } 85 86 _, err := os.Stat(indexPath) 87 if err == nil { 88 wakuMessageArchiveIndexProto, err = m.LoadHistoryArchiveIndexFromFile(m.identity, communityID) 89 if err != nil { 90 return archiveIDs, err 91 } 92 } 93 94 var offset uint64 = 0 95 96 for hash, metadata := range wakuMessageArchiveIndexProto.Archives { 97 offset = offset + metadata.Size 98 wakuMessageArchiveIndex[hash] = metadata 99 } 100 101 var encodedArchives []*EncodedArchiveData 102 topicsAsByteArrays := topicsAsByteArrays(topics) 103 104 m.publisher.publish(&Subscription{CreatingHistoryArchivesSignal: &signal.CreatingHistoryArchivesSignal{ 105 CommunityID: communityID.String(), 106 }}) 107 108 m.logger.Debug("creating archives", 109 zap.Any("startDate", startDate), 110 zap.Any("endDate", endDate), 111 zap.Duration("partition", partition), 112 ) 113 for { 114 if from.Equal(endDate) || from.After(endDate) { 115 break 116 } 117 m.logger.Debug("creating message archive", 118 zap.Any("from", from), 119 zap.Any("to", to), 120 ) 121 122 var messages []types.Message 123 if loadFromDB { 124 messages, err = m.persistence.GetWakuMessagesByFilterTopic(topics, uint64(from.Unix()), uint64(to.Unix())) 125 if err != nil { 126 return archiveIDs, err 127 } 128 } else { 129 for _, msg := range msgs { 130 if int64(msg.Timestamp) >= from.Unix() && int64(msg.Timestamp) < to.Unix() { 131 messages = append(messages, *msg) 132 } 133 } 134 } 135 136 if len(messages) == 0 { 137 // No need to create an archive with zero messages 138 m.logger.Debug("no messages in this partition") 139 from = to 140 to = to.Add(partition) 141 if to.After(endDate) { 142 to = endDate 143 } 144 continue 145 } 146 147 m.logger.Debug("creating archive with messages", zap.Int("messagesCount", len(messages))) 148 149 // Not only do we partition messages, we also chunk them 150 // roughly by size, such that each chunk will not exceed a given 151 // size and archive data doesn't get too big 152 messageChunks := make([][]types.Message, 0) 153 currentChunkSize := 0 154 currentChunk := make([]types.Message, 0) 155 156 for _, msg := range messages { 157 msgSize := len(msg.Payload) + len(msg.Sig) 158 if msgSize > maxArchiveSizeInBytes { 159 // we drop messages this big 160 continue 161 } 162 163 if currentChunkSize+msgSize > maxArchiveSizeInBytes { 164 messageChunks = append(messageChunks, currentChunk) 165 currentChunk = make([]types.Message, 0) 166 currentChunkSize = 0 167 } 168 currentChunk = append(currentChunk, msg) 169 currentChunkSize = currentChunkSize + msgSize 170 } 171 messageChunks = append(messageChunks, currentChunk) 172 173 for _, messages := range messageChunks { 174 wakuMessageArchive := m.createWakuMessageArchive(from, to, messages, topicsAsByteArrays) 175 encodedArchive, err := proto.Marshal(wakuMessageArchive) 176 if err != nil { 177 return archiveIDs, err 178 } 179 180 if encrypt { 181 messageSpec, err := m.encryptor.BuildHashRatchetMessage(communityID, encodedArchive) 182 if err != nil { 183 return archiveIDs, err 184 } 185 186 encodedArchive, err = proto.Marshal(messageSpec.Message) 187 if err != nil { 188 return archiveIDs, err 189 } 190 } 191 192 rawSize := len(encodedArchive) 193 padding := 0 194 size := 0 195 196 if rawSize > pieceLength { 197 size = rawSize + pieceLength - (rawSize % pieceLength) 198 padding = size - rawSize 199 } else { 200 padding = pieceLength - rawSize 201 size = rawSize + padding 202 } 203 204 wakuMessageArchiveIndexMetadata := &protobuf.WakuMessageArchiveIndexMetadata{ 205 Metadata: wakuMessageArchive.Metadata, 206 Offset: offset, 207 Size: uint64(size), 208 Padding: uint64(padding), 209 } 210 211 wakuMessageArchiveIndexMetadataBytes, err := proto.Marshal(wakuMessageArchiveIndexMetadata) 212 if err != nil { 213 return archiveIDs, err 214 } 215 216 archiveID := crypto.Keccak256Hash(wakuMessageArchiveIndexMetadataBytes).String() 217 archiveIDs = append(archiveIDs, archiveID) 218 wakuMessageArchiveIndex[archiveID] = wakuMessageArchiveIndexMetadata 219 encodedArchives = append(encodedArchives, &EncodedArchiveData{bytes: encodedArchive, padding: padding}) 220 offset = offset + uint64(rawSize) + uint64(padding) 221 } 222 223 from = to 224 to = to.Add(partition) 225 if to.After(endDate) { 226 to = endDate 227 } 228 } 229 230 if len(encodedArchives) > 0 { 231 232 dataBytes := make([]byte, 0) 233 234 for _, encodedArchiveData := range encodedArchives { 235 dataBytes = append(dataBytes, encodedArchiveData.bytes...) 236 dataBytes = append(dataBytes, make([]byte, encodedArchiveData.padding)...) 237 } 238 239 wakuMessageArchiveIndexProto.Archives = wakuMessageArchiveIndex 240 indexBytes, err := proto.Marshal(wakuMessageArchiveIndexProto) 241 if err != nil { 242 return archiveIDs, err 243 } 244 245 if encrypt { 246 messageSpec, err := m.encryptor.BuildHashRatchetMessage(communityID, indexBytes) 247 if err != nil { 248 return archiveIDs, err 249 } 250 indexBytes, err = proto.Marshal(messageSpec.Message) 251 if err != nil { 252 return archiveIDs, err 253 } 254 } 255 256 err = os.WriteFile(indexPath, indexBytes, 0644) // nolint: gosec 257 if err != nil { 258 return archiveIDs, err 259 } 260 261 file, err := os.OpenFile(dataPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644) 262 if err != nil { 263 return archiveIDs, err 264 } 265 defer file.Close() 266 267 _, err = file.Write(dataBytes) 268 if err != nil { 269 return archiveIDs, err 270 } 271 272 metaInfo := metainfo.MetaInfo{ 273 AnnounceList: defaultAnnounceList, 274 } 275 metaInfo.SetDefaults() 276 metaInfo.CreatedBy = common.PubkeyToHex(&m.identity.PublicKey) 277 278 info := metainfo.Info{ 279 PieceLength: int64(pieceLength), 280 } 281 282 err = info.BuildFromFilePath(archiveDir) 283 if err != nil { 284 return archiveIDs, err 285 } 286 287 metaInfo.InfoBytes, err = bencode.Marshal(info) 288 if err != nil { 289 return archiveIDs, err 290 } 291 292 metaInfoBytes, err := bencode.Marshal(metaInfo) 293 if err != nil { 294 return archiveIDs, err 295 } 296 297 err = os.WriteFile(torrentFile(m.torrentConfig.TorrentDir, communityID.String()), metaInfoBytes, 0644) // nolint: gosec 298 if err != nil { 299 return archiveIDs, err 300 } 301 302 m.logger.Debug("torrent created", zap.Any("from", startDate.Unix()), zap.Any("to", endDate.Unix())) 303 304 m.publisher.publish(&Subscription{ 305 HistoryArchivesCreatedSignal: &signal.HistoryArchivesCreatedSignal{ 306 CommunityID: communityID.String(), 307 From: int(startDate.Unix()), 308 To: int(endDate.Unix()), 309 }, 310 }) 311 } else { 312 m.logger.Debug("no archives created") 313 m.publisher.publish(&Subscription{ 314 NoHistoryArchivesCreatedSignal: &signal.NoHistoryArchivesCreatedSignal{ 315 CommunityID: communityID.String(), 316 From: int(startDate.Unix()), 317 To: int(endDate.Unix()), 318 }, 319 }) 320 } 321 322 lastMessageArchiveEndDate, err := m.persistence.GetLastMessageArchiveEndDate(communityID) 323 if err != nil { 324 return archiveIDs, err 325 } 326 327 if lastMessageArchiveEndDate > 0 { 328 err = m.persistence.UpdateLastMessageArchiveEndDate(communityID, uint64(from.Unix())) 329 } else { 330 err = m.persistence.SaveLastMessageArchiveEndDate(communityID, uint64(from.Unix())) 331 } 332 if err != nil { 333 return archiveIDs, err 334 } 335 return archiveIDs, nil 336 } 337 338 func (m *ArchiveFileManager) archiveIndexFile(communityID string) string { 339 return path.Join(m.torrentConfig.DataDir, communityID, "index") 340 } 341 342 func (m *ArchiveFileManager) createWakuMessageArchive(from time.Time, to time.Time, messages []types.Message, topics [][]byte) *protobuf.WakuMessageArchive { 343 var wakuMessages []*protobuf.WakuMessage 344 345 for _, msg := range messages { 346 topic := types.TopicTypeToByteArray(msg.Topic) 347 wakuMessage := &protobuf.WakuMessage{ 348 Sig: msg.Sig, 349 Timestamp: uint64(msg.Timestamp), 350 Topic: topic, 351 Payload: msg.Payload, 352 Padding: msg.Padding, 353 Hash: msg.Hash, 354 ThirdPartyId: msg.ThirdPartyID, 355 } 356 wakuMessages = append(wakuMessages, wakuMessage) 357 } 358 359 metadata := protobuf.WakuMessageArchiveMetadata{ 360 From: uint64(from.Unix()), 361 To: uint64(to.Unix()), 362 ContentTopic: topics, 363 } 364 365 wakuMessageArchive := &protobuf.WakuMessageArchive{ 366 Metadata: &metadata, 367 Messages: wakuMessages, 368 } 369 return wakuMessageArchive 370 } 371 372 func (m *ArchiveFileManager) CreateHistoryArchiveTorrentFromMessages(communityID types.HexBytes, messages []*types.Message, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) { 373 return m.createHistoryArchiveTorrent(communityID, messages, topics, startDate, endDate, partition, encrypt) 374 } 375 376 func (m *ArchiveFileManager) CreateHistoryArchiveTorrentFromDB(communityID types.HexBytes, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) { 377 return m.createHistoryArchiveTorrent(communityID, make([]*types.Message, 0), topics, startDate, endDate, partition, encrypt) 378 } 379 380 func (m *ArchiveFileManager) GetMessageArchiveIDsToImport(communityID types.HexBytes) ([]string, error) { 381 return m.persistence.GetMessageArchiveIDsToImport(communityID) 382 } 383 384 func (m *ArchiveFileManager) SaveMessageArchiveID(communityID types.HexBytes, hash string) error { 385 return m.persistence.SaveMessageArchiveID(communityID, hash) 386 } 387 388 func (m *ArchiveFileManager) SetMessageArchiveIDImported(communityID types.HexBytes, hash string, imported bool) error { 389 return m.persistence.SetMessageArchiveIDImported(communityID, hash, imported) 390 } 391 392 func (m *ArchiveFileManager) GetHistoryArchiveMagnetlink(communityID types.HexBytes) (string, error) { 393 id := communityID.String() 394 torrentFile := torrentFile(m.torrentConfig.TorrentDir, id) 395 396 metaInfo, err := metainfo.LoadFromFile(torrentFile) 397 if err != nil { 398 return "", err 399 } 400 401 info, err := metaInfo.UnmarshalInfo() 402 if err != nil { 403 return "", err 404 } 405 406 return metaInfo.Magnet(nil, &info).String(), nil 407 } 408 409 func (m *ArchiveFileManager) archiveDataFile(communityID string) string { 410 return path.Join(m.torrentConfig.DataDir, communityID, "data") 411 } 412 413 func (m *ArchiveFileManager) ExtractMessagesFromHistoryArchive(communityID types.HexBytes, archiveID string) ([]*protobuf.WakuMessage, error) { 414 id := communityID.String() 415 416 index, err := m.LoadHistoryArchiveIndexFromFile(m.identity, communityID) 417 if err != nil { 418 return nil, err 419 } 420 421 dataFile, err := os.Open(m.archiveDataFile(id)) 422 if err != nil { 423 return nil, err 424 } 425 defer dataFile.Close() 426 427 m.logger.Debug("extracting messages from history archive", 428 zap.String("communityID", communityID.String()), 429 zap.String("archiveID", archiveID)) 430 metadata := index.Archives[archiveID] 431 432 _, err = dataFile.Seek(int64(metadata.Offset), 0) 433 if err != nil { 434 m.logger.Error("failed to seek archive data file", zap.Error(err)) 435 return nil, err 436 } 437 438 data := make([]byte, metadata.Size-metadata.Padding) 439 m.logger.Debug("loading history archive data into memory", zap.Float64("data_size_MB", float64(metadata.Size-metadata.Padding)/1024.0/1024.0)) 440 _, err = dataFile.Read(data) 441 if err != nil { 442 m.logger.Error("failed failed to read archive data", zap.Error(err)) 443 return nil, err 444 } 445 446 archive := &protobuf.WakuMessageArchive{} 447 448 err = proto.Unmarshal(data, archive) 449 if err != nil { 450 // The archive data might eb encrypted so we try to decrypt instead first 451 var protocolMessage encryption.ProtocolMessage 452 err := proto.Unmarshal(data, &protocolMessage) 453 if err != nil { 454 m.logger.Error("failed to unmarshal protocol message", zap.Error(err)) 455 return nil, err 456 } 457 458 pk, err := crypto.DecompressPubkey(communityID) 459 if err != nil { 460 m.logger.Error("failed to decompress community pubkey", zap.Error(err)) 461 return nil, err 462 } 463 decryptedBytes, err := m.encryptor.HandleMessage(m.identity, pk, &protocolMessage, make([]byte, 0)) 464 if err != nil { 465 m.logger.Error("failed to decrypt message archive", zap.Error(err)) 466 return nil, err 467 } 468 err = proto.Unmarshal(decryptedBytes.DecryptedMessage, archive) 469 if err != nil { 470 m.logger.Error("failed to unmarshal message archive", zap.Error(err)) 471 return nil, err 472 } 473 } 474 return archive.Messages, nil 475 } 476 477 func (m *ArchiveFileManager) LoadHistoryArchiveIndexFromFile(myKey *ecdsa.PrivateKey, communityID types.HexBytes) (*protobuf.WakuMessageArchiveIndex, error) { 478 wakuMessageArchiveIndexProto := &protobuf.WakuMessageArchiveIndex{} 479 480 indexPath := m.archiveIndexFile(communityID.String()) 481 indexData, err := os.ReadFile(indexPath) 482 if err != nil { 483 return nil, err 484 } 485 486 err = proto.Unmarshal(indexData, wakuMessageArchiveIndexProto) 487 if err != nil { 488 return nil, err 489 } 490 491 if len(wakuMessageArchiveIndexProto.Archives) == 0 && len(indexData) > 0 { 492 // This means we're dealing with an encrypted index file, so we have to decrypt it first 493 var protocolMessage encryption.ProtocolMessage 494 err := proto.Unmarshal(indexData, &protocolMessage) 495 if err != nil { 496 return nil, err 497 } 498 pk, err := crypto.DecompressPubkey(communityID) 499 if err != nil { 500 return nil, err 501 } 502 decryptedBytes, err := m.encryptor.HandleMessage(myKey, pk, &protocolMessage, make([]byte, 0)) 503 if err != nil { 504 return nil, err 505 } 506 err = proto.Unmarshal(decryptedBytes.DecryptedMessage, wakuMessageArchiveIndexProto) 507 if err != nil { 508 return nil, err 509 } 510 } 511 512 return wakuMessageArchiveIndexProto, nil 513 }