github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/store/compound.go (about)

     1  package store
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/balzaczyy/golucene/core/codec"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  	"reflect"
     9  	"sync"
    10  )
    11  
    12  // store/CompoundFileDirectory.java
    13  
    14  type FileSlice struct {
    15  	offset, length int64
    16  }
    17  
    18  const (
    19  	CFD_DATA_CODEC       = "CompoundFileWriterData"
    20  	CFD_VERSION_START    = 0
    21  	CFD_VERSION_CHECKSUM = 1
    22  	CFD_VERSION_CURRENT  = CFD_VERSION_CHECKSUM
    23  
    24  	CFD_ENTRY_CODEC = "CompoundFileWriterEntries"
    25  
    26  	COMPOUND_FILE_EXTENSION         = "cfs"
    27  	COMPOUND_FILE_ENTRIES_EXTENSION = "cfe"
    28  )
    29  
    30  var SENTINEL = make(map[string]FileSlice)
    31  
    32  type CompoundFileDirectory struct {
    33  	*DirectoryImpl
    34  	*BaseDirectory
    35  	sync.Locker
    36  
    37  	directory      Directory
    38  	fileName       string
    39  	readBufferSize int
    40  	entries        map[string]FileSlice
    41  	openForWrite   bool
    42  	writer         *CompoundFileWriter
    43  	handle         IndexInput
    44  	version        int
    45  }
    46  
    47  func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) {
    48  	self := &CompoundFileDirectory{
    49  		Locker:         &sync.Mutex{},
    50  		directory:      directory,
    51  		fileName:       fileName,
    52  		readBufferSize: bufferSize(context),
    53  		openForWrite:   openForWrite}
    54  	self.DirectoryImpl = NewDirectoryImpl(self)
    55  	self.BaseDirectory = NewBaseDirectory(self)
    56  
    57  	if !openForWrite {
    58  		// log.Printf("Open for read.")
    59  		success := false
    60  		defer func() {
    61  			if !success {
    62  				util.CloseWhileSuppressingError(self.handle)
    63  			}
    64  		}()
    65  		self.handle, err = directory.OpenInput(fileName, context)
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  		self.entries, err = self.readEntries(self.handle, directory, fileName)
    70  		if err != nil {
    71  			return nil, err
    72  		}
    73  		if self.version >= CFD_VERSION_CHECKSUM {
    74  			if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC,
    75  				int32(self.version), int32(self.version)); err != nil {
    76  				return nil, err
    77  			}
    78  			// NOTE: data file is too costly to verify checksum against all the
    79  			// bytes on open, but for now we at least verify proper structure
    80  			// of the checksum footer: which looks for FOOTER_MAGIC +
    81  			// algorithmID. This is cheap and can detect some forms of
    82  			// corruption such as file trucation.
    83  			if _, err = codec.RetrieveChecksum(self.handle); err != nil {
    84  				return nil, err
    85  			}
    86  		}
    87  		success = true
    88  		self.BaseDirectory.IsOpen = true
    89  		return self, nil
    90  	} else {
    91  		assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory",
    92  			"compound file inside of compound file: %v", fileName)
    93  		self.entries = SENTINEL
    94  		self.IsOpen = true
    95  		self.writer = newCompoundFileWriter(directory, fileName)
    96  		self.handle = nil
    97  		return self, nil
    98  	}
    99  }
   100  
   101  func (d *CompoundFileDirectory) Close() error {
   102  	d.Lock() // syncronized
   103  	defer d.Unlock()
   104  
   105  	// fmt.Printf("Closing %v...\n", d)
   106  	if !d.IsOpen {
   107  		fmt.Println("CompoundFileDirectory is already closed.")
   108  		// allow double close - usually to be consistent with other closeables
   109  		return nil // already closed
   110  	}
   111  	d.IsOpen = false
   112  	if d.writer != nil {
   113  		assert(d.openForWrite)
   114  		return d.writer.Close()
   115  	} else {
   116  		return util.Close(d.handle)
   117  	}
   118  }
   119  
   120  func (d *CompoundFileDirectory) OpenInput(name string, context IOContext) (in IndexInput, err error) {
   121  	d.Lock() // synchronized
   122  	defer d.Unlock()
   123  
   124  	d.EnsureOpen()
   125  	assert(!d.openForWrite)
   126  	id := util.StripSegmentName(name)
   127  	if entry, ok := d.entries[id]; ok {
   128  		return d.handle.Slice(name, entry.offset, entry.length)
   129  	}
   130  	keys := make([]string, 0)
   131  	for k := range d.entries {
   132  		keys = append(keys, k)
   133  	}
   134  	panic(fmt.Sprintf("No sub-file with id %v found (fileName=%v files: %v)", id, name, keys))
   135  }
   136  
   137  func (d *CompoundFileDirectory) ListAll() (paths []string, err error) {
   138  	d.EnsureOpen()
   139  	// if self.writer != nil {
   140  	// 	return self.writer.ListAll()
   141  	// }
   142  	// Add the segment name
   143  	seg := util.ParseSegmentName(d.fileName)
   144  	keys := make([]string, 0, len(d.entries))
   145  	for k := range d.entries {
   146  		keys = append(keys, seg+k)
   147  	}
   148  	return keys, nil
   149  }
   150  
   151  func (d *CompoundFileDirectory) FileExists(name string) bool {
   152  	d.EnsureOpen()
   153  	// if d.writer != nil {
   154  	// 	return d.writer.FileExists(name)
   155  	// }
   156  	_, ok := d.entries[util.StripSegmentName(name)]
   157  	return ok
   158  }
   159  
   160  func (d *CompoundFileDirectory) DeleteFile(name string) error {
   161  	panic("not supported")
   162  }
   163  
   164  // Returns the length of a file in the directory.
   165  func (d *CompoundFileDirectory) FileLength(name string) (n int64, err error) {
   166  	panic("not implemented yet")
   167  }
   168  
   169  func (d *CompoundFileDirectory) CreateOutput(name string, context IOContext) (out IndexOutput, err error) {
   170  	d.EnsureOpen()
   171  	return d.writer.createOutput(name, context)
   172  }
   173  
   174  func (d *CompoundFileDirectory) Sync(names []string) error {
   175  	panic("not supported")
   176  }
   177  
   178  func (d *CompoundFileDirectory) MakeLock(name string) Lock {
   179  	panic("not supported by CFS")
   180  }
   181  
   182  func (d *CompoundFileDirectory) String() string {
   183  	return fmt.Sprintf("CompoundFileDirectory(file='%v' in dir=%v)", d.fileName, d.directory)
   184  }
   185  
   186  const (
   187  	CODEC_MAGIC_BYTE1 = byte(uint32(codec.CODEC_MAGIC) >> 24 & 0xFF)
   188  	CODEC_MAGIC_BYTE2 = byte(uint32(codec.CODEC_MAGIC) >> 16 & 0xFF)
   189  	CODEC_MAGIC_BYTE3 = byte(uint32(codec.CODEC_MAGIC) >> 8 & 0xFF)
   190  	CODEC_MAGIC_BYTE4 = byte(codec.CODEC_MAGIC & 0xFF)
   191  )
   192  
   193  func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) {
   194  	var stream IndexInput = nil
   195  	var entriesStream ChecksumIndexInput = nil
   196  	// read the first VInt. If it is negative, it's the version number
   197  	// otherwise it's the count (pre-3.1 indexes)
   198  	var success = false
   199  	defer func() {
   200  		if success {
   201  			err = util.Close(stream, entriesStream)
   202  		} else {
   203  			util.CloseWhileSuppressingError(stream, entriesStream)
   204  		}
   205  	}()
   206  
   207  	stream = handle.Clone()
   208  	// fmt.Printf("Reading from stream: %v\n", stream)
   209  	firstInt, err := stream.ReadVInt()
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  	// impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
   214  	// and separate norms/etc are outside of cfs.
   215  	if firstInt == int32(CODEC_MAGIC_BYTE1) {
   216  		if secondByte, err := stream.ReadByte(); err == nil {
   217  			if thirdByte, err := stream.ReadByte(); err == nil {
   218  				if fourthByte, err := stream.ReadByte(); err == nil {
   219  					if secondByte != CODEC_MAGIC_BYTE2 ||
   220  						thirdByte != CODEC_MAGIC_BYTE3 ||
   221  						fourthByte != CODEC_MAGIC_BYTE4 {
   222  						return nil, errors.New(fmt.Sprintf(
   223  							"Illegal/impossible header for CFS file: %v,%v,%v",
   224  							secondByte, thirdByte, fourthByte))
   225  					}
   226  				}
   227  			}
   228  		}
   229  		if err != nil {
   230  			return nil, err
   231  		}
   232  
   233  		d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT))
   234  		if err != nil {
   235  			return nil, err
   236  		}
   237  		entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION)
   238  		entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE)
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  		_, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)
   243  		if err != nil {
   244  			return nil, err
   245  		}
   246  		numEntries, err := entriesStream.ReadVInt()
   247  		if err != nil {
   248  			return nil, err
   249  		}
   250  
   251  		mapping = make(map[string]FileSlice)
   252  		// fmt.Printf("Entries number: %v\n", numEntries)
   253  		for i := int32(0); i < numEntries; i++ {
   254  			id, err := entriesStream.ReadString()
   255  			if err != nil {
   256  				return nil, err
   257  			}
   258  			if _, ok := mapping[id]; ok {
   259  				return nil, errors.New(fmt.Sprintf(
   260  					"Duplicate cfs entry id=%v in CFS: %v", id, entriesStream))
   261  			}
   262  			// log.Printf("Found entry: %v", id)
   263  			offset, err := entriesStream.ReadLong()
   264  			if err != nil {
   265  				return nil, err
   266  			}
   267  			length, err := entriesStream.ReadLong()
   268  			if err != nil {
   269  				return nil, err
   270  			}
   271  			mapping[id] = FileSlice{offset, length}
   272  		}
   273  		if d.version >= CFD_VERSION_CHECKSUM {
   274  			_, err = codec.CheckFooter(entriesStream)
   275  		} else {
   276  			err = codec.CheckEOF(entriesStream)
   277  		}
   278  		if err != nil {
   279  			return nil, err
   280  		}
   281  	} else {
   282  		// TODO remove once 3.x is not supported anymore
   283  		panic("not supported yet; will also be obsolete soon")
   284  	}
   285  	success = true
   286  	return mapping, nil
   287  }
   288  
   289  func int32ToInt(n int32, err error) (int, error) {
   290  	return int(n), err
   291  }