github.com/status-im/status-go@v1.1.0/protocol/communities/manager_archive_file.go (about)

     1  //go:build !disable_torrent
     2  // +build !disable_torrent
     3  
     4  // Attribution to Pascal Precht, for further context please view the below issues
     5  // - https://github.com/status-im/status-go/issues/2563
     6  // - https://github.com/status-im/status-go/issues/2565
     7  // - https://github.com/status-im/status-go/issues/2567
     8  // - https://github.com/status-im/status-go/issues/2568
     9  
    10  package communities
    11  
    12  import (
    13  	"crypto/ecdsa"
    14  	"os"
    15  	"path"
    16  	"time"
    17  
    18  	"github.com/status-im/status-go/eth-node/crypto"
    19  	"github.com/status-im/status-go/eth-node/types"
    20  	"github.com/status-im/status-go/params"
    21  	"github.com/status-im/status-go/protocol/common"
    22  	"github.com/status-im/status-go/protocol/encryption"
    23  	"github.com/status-im/status-go/protocol/protobuf"
    24  	"github.com/status-im/status-go/signal"
    25  
    26  	"github.com/anacrolix/torrent/bencode"
    27  	"github.com/anacrolix/torrent/metainfo"
    28  	"github.com/golang/protobuf/proto"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  type ArchiveFileManager struct {
    33  	torrentConfig *params.TorrentConfig
    34  
    35  	logger      *zap.Logger
    36  	persistence *Persistence
    37  	identity    *ecdsa.PrivateKey
    38  	encryptor   *encryption.Protocol
    39  
    40  	publisher Publisher
    41  }
    42  
    43  func NewArchiveFileManager(amc *ArchiveManagerConfig) *ArchiveFileManager {
    44  	return &ArchiveFileManager{
    45  		torrentConfig: amc.TorrentConfig,
    46  		logger:        amc.Logger,
    47  		persistence:   amc.Persistence,
    48  		identity:      amc.Identity,
    49  		encryptor:     amc.Encryptor,
    50  		publisher:     amc.Publisher,
    51  	}
    52  }
    53  
    54  func (m *ArchiveFileManager) createHistoryArchiveTorrent(communityID types.HexBytes, msgs []*types.Message, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) {
    55  
    56  	loadFromDB := len(msgs) == 0
    57  
    58  	from := startDate
    59  	to := from.Add(partition)
    60  	if to.After(endDate) {
    61  		to = endDate
    62  	}
    63  
    64  	archiveDir := m.torrentConfig.DataDir + "/" + communityID.String()
    65  	torrentDir := m.torrentConfig.TorrentDir
    66  	indexPath := archiveDir + "/index"
    67  	dataPath := archiveDir + "/data"
    68  
    69  	wakuMessageArchiveIndexProto := &protobuf.WakuMessageArchiveIndex{}
    70  	wakuMessageArchiveIndex := make(map[string]*protobuf.WakuMessageArchiveIndexMetadata)
    71  	archiveIDs := make([]string, 0)
    72  
    73  	if _, err := os.Stat(archiveDir); os.IsNotExist(err) {
    74  		err := os.MkdirAll(archiveDir, 0700)
    75  		if err != nil {
    76  			return archiveIDs, err
    77  		}
    78  	}
    79  	if _, err := os.Stat(torrentDir); os.IsNotExist(err) {
    80  		err := os.MkdirAll(torrentDir, 0700)
    81  		if err != nil {
    82  			return archiveIDs, err
    83  		}
    84  	}
    85  
    86  	_, err := os.Stat(indexPath)
    87  	if err == nil {
    88  		wakuMessageArchiveIndexProto, err = m.LoadHistoryArchiveIndexFromFile(m.identity, communityID)
    89  		if err != nil {
    90  			return archiveIDs, err
    91  		}
    92  	}
    93  
    94  	var offset uint64 = 0
    95  
    96  	for hash, metadata := range wakuMessageArchiveIndexProto.Archives {
    97  		offset = offset + metadata.Size
    98  		wakuMessageArchiveIndex[hash] = metadata
    99  	}
   100  
   101  	var encodedArchives []*EncodedArchiveData
   102  	topicsAsByteArrays := topicsAsByteArrays(topics)
   103  
   104  	m.publisher.publish(&Subscription{CreatingHistoryArchivesSignal: &signal.CreatingHistoryArchivesSignal{
   105  		CommunityID: communityID.String(),
   106  	}})
   107  
   108  	m.logger.Debug("creating archives",
   109  		zap.Any("startDate", startDate),
   110  		zap.Any("endDate", endDate),
   111  		zap.Duration("partition", partition),
   112  	)
   113  	for {
   114  		if from.Equal(endDate) || from.After(endDate) {
   115  			break
   116  		}
   117  		m.logger.Debug("creating message archive",
   118  			zap.Any("from", from),
   119  			zap.Any("to", to),
   120  		)
   121  
   122  		var messages []types.Message
   123  		if loadFromDB {
   124  			messages, err = m.persistence.GetWakuMessagesByFilterTopic(topics, uint64(from.Unix()), uint64(to.Unix()))
   125  			if err != nil {
   126  				return archiveIDs, err
   127  			}
   128  		} else {
   129  			for _, msg := range msgs {
   130  				if int64(msg.Timestamp) >= from.Unix() && int64(msg.Timestamp) < to.Unix() {
   131  					messages = append(messages, *msg)
   132  				}
   133  			}
   134  		}
   135  
   136  		if len(messages) == 0 {
   137  			// No need to create an archive with zero messages
   138  			m.logger.Debug("no messages in this partition")
   139  			from = to
   140  			to = to.Add(partition)
   141  			if to.After(endDate) {
   142  				to = endDate
   143  			}
   144  			continue
   145  		}
   146  
   147  		m.logger.Debug("creating archive with messages", zap.Int("messagesCount", len(messages)))
   148  
   149  		// Not only do we partition messages, we also chunk them
   150  		// roughly by size, such that each chunk will not exceed a given
   151  		// size and archive data doesn't get too big
   152  		messageChunks := make([][]types.Message, 0)
   153  		currentChunkSize := 0
   154  		currentChunk := make([]types.Message, 0)
   155  
   156  		for _, msg := range messages {
   157  			msgSize := len(msg.Payload) + len(msg.Sig)
   158  			if msgSize > maxArchiveSizeInBytes {
   159  				// we drop messages this big
   160  				continue
   161  			}
   162  
   163  			if currentChunkSize+msgSize > maxArchiveSizeInBytes {
   164  				messageChunks = append(messageChunks, currentChunk)
   165  				currentChunk = make([]types.Message, 0)
   166  				currentChunkSize = 0
   167  			}
   168  			currentChunk = append(currentChunk, msg)
   169  			currentChunkSize = currentChunkSize + msgSize
   170  		}
   171  		messageChunks = append(messageChunks, currentChunk)
   172  
   173  		for _, messages := range messageChunks {
   174  			wakuMessageArchive := m.createWakuMessageArchive(from, to, messages, topicsAsByteArrays)
   175  			encodedArchive, err := proto.Marshal(wakuMessageArchive)
   176  			if err != nil {
   177  				return archiveIDs, err
   178  			}
   179  
   180  			if encrypt {
   181  				messageSpec, err := m.encryptor.BuildHashRatchetMessage(communityID, encodedArchive)
   182  				if err != nil {
   183  					return archiveIDs, err
   184  				}
   185  
   186  				encodedArchive, err = proto.Marshal(messageSpec.Message)
   187  				if err != nil {
   188  					return archiveIDs, err
   189  				}
   190  			}
   191  
   192  			rawSize := len(encodedArchive)
   193  			padding := 0
   194  			size := 0
   195  
   196  			if rawSize > pieceLength {
   197  				size = rawSize + pieceLength - (rawSize % pieceLength)
   198  				padding = size - rawSize
   199  			} else {
   200  				padding = pieceLength - rawSize
   201  				size = rawSize + padding
   202  			}
   203  
   204  			wakuMessageArchiveIndexMetadata := &protobuf.WakuMessageArchiveIndexMetadata{
   205  				Metadata: wakuMessageArchive.Metadata,
   206  				Offset:   offset,
   207  				Size:     uint64(size),
   208  				Padding:  uint64(padding),
   209  			}
   210  
   211  			wakuMessageArchiveIndexMetadataBytes, err := proto.Marshal(wakuMessageArchiveIndexMetadata)
   212  			if err != nil {
   213  				return archiveIDs, err
   214  			}
   215  
   216  			archiveID := crypto.Keccak256Hash(wakuMessageArchiveIndexMetadataBytes).String()
   217  			archiveIDs = append(archiveIDs, archiveID)
   218  			wakuMessageArchiveIndex[archiveID] = wakuMessageArchiveIndexMetadata
   219  			encodedArchives = append(encodedArchives, &EncodedArchiveData{bytes: encodedArchive, padding: padding})
   220  			offset = offset + uint64(rawSize) + uint64(padding)
   221  		}
   222  
   223  		from = to
   224  		to = to.Add(partition)
   225  		if to.After(endDate) {
   226  			to = endDate
   227  		}
   228  	}
   229  
   230  	if len(encodedArchives) > 0 {
   231  
   232  		dataBytes := make([]byte, 0)
   233  
   234  		for _, encodedArchiveData := range encodedArchives {
   235  			dataBytes = append(dataBytes, encodedArchiveData.bytes...)
   236  			dataBytes = append(dataBytes, make([]byte, encodedArchiveData.padding)...)
   237  		}
   238  
   239  		wakuMessageArchiveIndexProto.Archives = wakuMessageArchiveIndex
   240  		indexBytes, err := proto.Marshal(wakuMessageArchiveIndexProto)
   241  		if err != nil {
   242  			return archiveIDs, err
   243  		}
   244  
   245  		if encrypt {
   246  			messageSpec, err := m.encryptor.BuildHashRatchetMessage(communityID, indexBytes)
   247  			if err != nil {
   248  				return archiveIDs, err
   249  			}
   250  			indexBytes, err = proto.Marshal(messageSpec.Message)
   251  			if err != nil {
   252  				return archiveIDs, err
   253  			}
   254  		}
   255  
   256  		err = os.WriteFile(indexPath, indexBytes, 0644) // nolint: gosec
   257  		if err != nil {
   258  			return archiveIDs, err
   259  		}
   260  
   261  		file, err := os.OpenFile(dataPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
   262  		if err != nil {
   263  			return archiveIDs, err
   264  		}
   265  		defer file.Close()
   266  
   267  		_, err = file.Write(dataBytes)
   268  		if err != nil {
   269  			return archiveIDs, err
   270  		}
   271  
   272  		metaInfo := metainfo.MetaInfo{
   273  			AnnounceList: defaultAnnounceList,
   274  		}
   275  		metaInfo.SetDefaults()
   276  		metaInfo.CreatedBy = common.PubkeyToHex(&m.identity.PublicKey)
   277  
   278  		info := metainfo.Info{
   279  			PieceLength: int64(pieceLength),
   280  		}
   281  
   282  		err = info.BuildFromFilePath(archiveDir)
   283  		if err != nil {
   284  			return archiveIDs, err
   285  		}
   286  
   287  		metaInfo.InfoBytes, err = bencode.Marshal(info)
   288  		if err != nil {
   289  			return archiveIDs, err
   290  		}
   291  
   292  		metaInfoBytes, err := bencode.Marshal(metaInfo)
   293  		if err != nil {
   294  			return archiveIDs, err
   295  		}
   296  
   297  		err = os.WriteFile(torrentFile(m.torrentConfig.TorrentDir, communityID.String()), metaInfoBytes, 0644) // nolint: gosec
   298  		if err != nil {
   299  			return archiveIDs, err
   300  		}
   301  
   302  		m.logger.Debug("torrent created", zap.Any("from", startDate.Unix()), zap.Any("to", endDate.Unix()))
   303  
   304  		m.publisher.publish(&Subscription{
   305  			HistoryArchivesCreatedSignal: &signal.HistoryArchivesCreatedSignal{
   306  				CommunityID: communityID.String(),
   307  				From:        int(startDate.Unix()),
   308  				To:          int(endDate.Unix()),
   309  			},
   310  		})
   311  	} else {
   312  		m.logger.Debug("no archives created")
   313  		m.publisher.publish(&Subscription{
   314  			NoHistoryArchivesCreatedSignal: &signal.NoHistoryArchivesCreatedSignal{
   315  				CommunityID: communityID.String(),
   316  				From:        int(startDate.Unix()),
   317  				To:          int(endDate.Unix()),
   318  			},
   319  		})
   320  	}
   321  
   322  	lastMessageArchiveEndDate, err := m.persistence.GetLastMessageArchiveEndDate(communityID)
   323  	if err != nil {
   324  		return archiveIDs, err
   325  	}
   326  
   327  	if lastMessageArchiveEndDate > 0 {
   328  		err = m.persistence.UpdateLastMessageArchiveEndDate(communityID, uint64(from.Unix()))
   329  	} else {
   330  		err = m.persistence.SaveLastMessageArchiveEndDate(communityID, uint64(from.Unix()))
   331  	}
   332  	if err != nil {
   333  		return archiveIDs, err
   334  	}
   335  	return archiveIDs, nil
   336  }
   337  
   338  func (m *ArchiveFileManager) archiveIndexFile(communityID string) string {
   339  	return path.Join(m.torrentConfig.DataDir, communityID, "index")
   340  }
   341  
   342  func (m *ArchiveFileManager) createWakuMessageArchive(from time.Time, to time.Time, messages []types.Message, topics [][]byte) *protobuf.WakuMessageArchive {
   343  	var wakuMessages []*protobuf.WakuMessage
   344  
   345  	for _, msg := range messages {
   346  		topic := types.TopicTypeToByteArray(msg.Topic)
   347  		wakuMessage := &protobuf.WakuMessage{
   348  			Sig:          msg.Sig,
   349  			Timestamp:    uint64(msg.Timestamp),
   350  			Topic:        topic,
   351  			Payload:      msg.Payload,
   352  			Padding:      msg.Padding,
   353  			Hash:         msg.Hash,
   354  			ThirdPartyId: msg.ThirdPartyID,
   355  		}
   356  		wakuMessages = append(wakuMessages, wakuMessage)
   357  	}
   358  
   359  	metadata := protobuf.WakuMessageArchiveMetadata{
   360  		From:         uint64(from.Unix()),
   361  		To:           uint64(to.Unix()),
   362  		ContentTopic: topics,
   363  	}
   364  
   365  	wakuMessageArchive := &protobuf.WakuMessageArchive{
   366  		Metadata: &metadata,
   367  		Messages: wakuMessages,
   368  	}
   369  	return wakuMessageArchive
   370  }
   371  
   372  func (m *ArchiveFileManager) CreateHistoryArchiveTorrentFromMessages(communityID types.HexBytes, messages []*types.Message, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) {
   373  	return m.createHistoryArchiveTorrent(communityID, messages, topics, startDate, endDate, partition, encrypt)
   374  }
   375  
   376  func (m *ArchiveFileManager) CreateHistoryArchiveTorrentFromDB(communityID types.HexBytes, topics []types.TopicType, startDate time.Time, endDate time.Time, partition time.Duration, encrypt bool) ([]string, error) {
   377  	return m.createHistoryArchiveTorrent(communityID, make([]*types.Message, 0), topics, startDate, endDate, partition, encrypt)
   378  }
   379  
   380  func (m *ArchiveFileManager) GetMessageArchiveIDsToImport(communityID types.HexBytes) ([]string, error) {
   381  	return m.persistence.GetMessageArchiveIDsToImport(communityID)
   382  }
   383  
   384  func (m *ArchiveFileManager) SaveMessageArchiveID(communityID types.HexBytes, hash string) error {
   385  	return m.persistence.SaveMessageArchiveID(communityID, hash)
   386  }
   387  
   388  func (m *ArchiveFileManager) SetMessageArchiveIDImported(communityID types.HexBytes, hash string, imported bool) error {
   389  	return m.persistence.SetMessageArchiveIDImported(communityID, hash, imported)
   390  }
   391  
   392  func (m *ArchiveFileManager) GetHistoryArchiveMagnetlink(communityID types.HexBytes) (string, error) {
   393  	id := communityID.String()
   394  	torrentFile := torrentFile(m.torrentConfig.TorrentDir, id)
   395  
   396  	metaInfo, err := metainfo.LoadFromFile(torrentFile)
   397  	if err != nil {
   398  		return "", err
   399  	}
   400  
   401  	info, err := metaInfo.UnmarshalInfo()
   402  	if err != nil {
   403  		return "", err
   404  	}
   405  
   406  	return metaInfo.Magnet(nil, &info).String(), nil
   407  }
   408  
   409  func (m *ArchiveFileManager) archiveDataFile(communityID string) string {
   410  	return path.Join(m.torrentConfig.DataDir, communityID, "data")
   411  }
   412  
   413  func (m *ArchiveFileManager) ExtractMessagesFromHistoryArchive(communityID types.HexBytes, archiveID string) ([]*protobuf.WakuMessage, error) {
   414  	id := communityID.String()
   415  
   416  	index, err := m.LoadHistoryArchiveIndexFromFile(m.identity, communityID)
   417  	if err != nil {
   418  		return nil, err
   419  	}
   420  
   421  	dataFile, err := os.Open(m.archiveDataFile(id))
   422  	if err != nil {
   423  		return nil, err
   424  	}
   425  	defer dataFile.Close()
   426  
   427  	m.logger.Debug("extracting messages from history archive",
   428  		zap.String("communityID", communityID.String()),
   429  		zap.String("archiveID", archiveID))
   430  	metadata := index.Archives[archiveID]
   431  
   432  	_, err = dataFile.Seek(int64(metadata.Offset), 0)
   433  	if err != nil {
   434  		m.logger.Error("failed to seek archive data file", zap.Error(err))
   435  		return nil, err
   436  	}
   437  
   438  	data := make([]byte, metadata.Size-metadata.Padding)
   439  	m.logger.Debug("loading history archive data into memory", zap.Float64("data_size_MB", float64(metadata.Size-metadata.Padding)/1024.0/1024.0))
   440  	_, err = dataFile.Read(data)
   441  	if err != nil {
   442  		m.logger.Error("failed failed to read archive data", zap.Error(err))
   443  		return nil, err
   444  	}
   445  
   446  	archive := &protobuf.WakuMessageArchive{}
   447  
   448  	err = proto.Unmarshal(data, archive)
   449  	if err != nil {
   450  		// The archive data might eb encrypted so we try to decrypt instead first
   451  		var protocolMessage encryption.ProtocolMessage
   452  		err := proto.Unmarshal(data, &protocolMessage)
   453  		if err != nil {
   454  			m.logger.Error("failed to unmarshal protocol message", zap.Error(err))
   455  			return nil, err
   456  		}
   457  
   458  		pk, err := crypto.DecompressPubkey(communityID)
   459  		if err != nil {
   460  			m.logger.Error("failed to decompress community pubkey", zap.Error(err))
   461  			return nil, err
   462  		}
   463  		decryptedBytes, err := m.encryptor.HandleMessage(m.identity, pk, &protocolMessage, make([]byte, 0))
   464  		if err != nil {
   465  			m.logger.Error("failed to decrypt message archive", zap.Error(err))
   466  			return nil, err
   467  		}
   468  		err = proto.Unmarshal(decryptedBytes.DecryptedMessage, archive)
   469  		if err != nil {
   470  			m.logger.Error("failed to unmarshal message archive", zap.Error(err))
   471  			return nil, err
   472  		}
   473  	}
   474  	return archive.Messages, nil
   475  }
   476  
   477  func (m *ArchiveFileManager) LoadHistoryArchiveIndexFromFile(myKey *ecdsa.PrivateKey, communityID types.HexBytes) (*protobuf.WakuMessageArchiveIndex, error) {
   478  	wakuMessageArchiveIndexProto := &protobuf.WakuMessageArchiveIndex{}
   479  
   480  	indexPath := m.archiveIndexFile(communityID.String())
   481  	indexData, err := os.ReadFile(indexPath)
   482  	if err != nil {
   483  		return nil, err
   484  	}
   485  
   486  	err = proto.Unmarshal(indexData, wakuMessageArchiveIndexProto)
   487  	if err != nil {
   488  		return nil, err
   489  	}
   490  
   491  	if len(wakuMessageArchiveIndexProto.Archives) == 0 && len(indexData) > 0 {
   492  		// This means we're dealing with an encrypted index file, so we have to decrypt it first
   493  		var protocolMessage encryption.ProtocolMessage
   494  		err := proto.Unmarshal(indexData, &protocolMessage)
   495  		if err != nil {
   496  			return nil, err
   497  		}
   498  		pk, err := crypto.DecompressPubkey(communityID)
   499  		if err != nil {
   500  			return nil, err
   501  		}
   502  		decryptedBytes, err := m.encryptor.HandleMessage(myKey, pk, &protocolMessage, make([]byte, 0))
   503  		if err != nil {
   504  			return nil, err
   505  		}
   506  		err = proto.Unmarshal(decryptedBytes.DecryptedMessage, wakuMessageArchiveIndexProto)
   507  		if err != nil {
   508  			return nil, err
   509  		}
   510  	}
   511  
   512  	return wakuMessageArchiveIndexProto, nil
   513  }