github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/store/compound.go (about) 1 package store 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 "github.com/balzaczyy/golucene/core/util" 8 "reflect" 9 "sync" 10 ) 11 12 // store/CompoundFileDirectory.java 13 14 type FileSlice struct { 15 offset, length int64 16 } 17 18 const ( 19 CFD_DATA_CODEC = "CompoundFileWriterData" 20 CFD_VERSION_START = 0 21 CFD_VERSION_CHECKSUM = 1 22 CFD_VERSION_CURRENT = CFD_VERSION_CHECKSUM 23 24 CFD_ENTRY_CODEC = "CompoundFileWriterEntries" 25 26 COMPOUND_FILE_EXTENSION = "cfs" 27 COMPOUND_FILE_ENTRIES_EXTENSION = "cfe" 28 ) 29 30 var SENTINEL = make(map[string]FileSlice) 31 32 type CompoundFileDirectory struct { 33 *DirectoryImpl 34 *BaseDirectory 35 sync.Locker 36 37 directory Directory 38 fileName string 39 readBufferSize int 40 entries map[string]FileSlice 41 openForWrite bool 42 writer *CompoundFileWriter 43 handle IndexInput 44 version int 45 } 46 47 func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { 48 self := &CompoundFileDirectory{ 49 Locker: &sync.Mutex{}, 50 directory: directory, 51 fileName: fileName, 52 readBufferSize: bufferSize(context), 53 openForWrite: openForWrite} 54 self.DirectoryImpl = NewDirectoryImpl(self) 55 self.BaseDirectory = NewBaseDirectory(self) 56 57 if !openForWrite { 58 // log.Printf("Open for read.") 59 success := false 60 defer func() { 61 if !success { 62 util.CloseWhileSuppressingError(self.handle) 63 } 64 }() 65 self.handle, err = directory.OpenInput(fileName, context) 66 if err != nil { 67 return nil, err 68 } 69 self.entries, err = self.readEntries(self.handle, directory, fileName) 70 if err != nil { 71 return nil, err 72 } 73 if self.version >= CFD_VERSION_CHECKSUM { 74 if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC, 75 int32(self.version), int32(self.version)); err != nil { 76 return nil, err 77 } 78 // NOTE: data file is too costly to verify checksum against all the 79 // bytes on open, but for now we at least verify proper structure 80 // of the checksum footer: which looks for FOOTER_MAGIC + 81 // algorithmID. This is cheap and can detect some forms of 82 // corruption such as file trucation. 83 if _, err = codec.RetrieveChecksum(self.handle); err != nil { 84 return nil, err 85 } 86 } 87 success = true 88 self.BaseDirectory.IsOpen = true 89 return self, nil 90 } else { 91 assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory", 92 "compound file inside of compound file: %v", fileName) 93 self.entries = SENTINEL 94 self.IsOpen = true 95 self.writer = newCompoundFileWriter(directory, fileName) 96 self.handle = nil 97 return self, nil 98 } 99 } 100 101 func (d *CompoundFileDirectory) Close() error { 102 d.Lock() // syncronized 103 defer d.Unlock() 104 105 // fmt.Printf("Closing %v...\n", d) 106 if !d.IsOpen { 107 fmt.Println("CompoundFileDirectory is already closed.") 108 // allow double close - usually to be consistent with other closeables 109 return nil // already closed 110 } 111 d.IsOpen = false 112 if d.writer != nil { 113 assert(d.openForWrite) 114 return d.writer.Close() 115 } else { 116 return util.Close(d.handle) 117 } 118 } 119 120 func (d *CompoundFileDirectory) OpenInput(name string, context IOContext) (in IndexInput, err error) { 121 d.Lock() // synchronized 122 defer d.Unlock() 123 124 d.EnsureOpen() 125 assert(!d.openForWrite) 126 id := util.StripSegmentName(name) 127 if entry, ok := d.entries[id]; ok { 128 return d.handle.Slice(name, entry.offset, entry.length) 129 } 130 keys := make([]string, 0) 131 for k := range d.entries { 132 keys = append(keys, k) 133 } 134 panic(fmt.Sprintf("No sub-file with id %v found (fileName=%v files: %v)", id, name, keys)) 135 } 136 137 func (d *CompoundFileDirectory) ListAll() (paths []string, err error) { 138 d.EnsureOpen() 139 // if self.writer != nil { 140 // return self.writer.ListAll() 141 // } 142 // Add the segment name 143 seg := util.ParseSegmentName(d.fileName) 144 keys := make([]string, 0, len(d.entries)) 145 for k := range d.entries { 146 keys = append(keys, seg+k) 147 } 148 return keys, nil 149 } 150 151 func (d *CompoundFileDirectory) FileExists(name string) bool { 152 d.EnsureOpen() 153 // if d.writer != nil { 154 // return d.writer.FileExists(name) 155 // } 156 _, ok := d.entries[util.StripSegmentName(name)] 157 return ok 158 } 159 160 func (d *CompoundFileDirectory) DeleteFile(name string) error { 161 panic("not supported") 162 } 163 164 // Returns the length of a file in the directory. 165 func (d *CompoundFileDirectory) FileLength(name string) (n int64, err error) { 166 panic("not implemented yet") 167 } 168 169 func (d *CompoundFileDirectory) CreateOutput(name string, context IOContext) (out IndexOutput, err error) { 170 d.EnsureOpen() 171 return d.writer.createOutput(name, context) 172 } 173 174 func (d *CompoundFileDirectory) Sync(names []string) error { 175 panic("not supported") 176 } 177 178 func (d *CompoundFileDirectory) MakeLock(name string) Lock { 179 panic("not supported by CFS") 180 } 181 182 func (d *CompoundFileDirectory) String() string { 183 return fmt.Sprintf("CompoundFileDirectory(file='%v' in dir=%v)", d.fileName, d.directory) 184 } 185 186 const ( 187 CODEC_MAGIC_BYTE1 = byte(uint32(codec.CODEC_MAGIC) >> 24 & 0xFF) 188 CODEC_MAGIC_BYTE2 = byte(uint32(codec.CODEC_MAGIC) >> 16 & 0xFF) 189 CODEC_MAGIC_BYTE3 = byte(uint32(codec.CODEC_MAGIC) >> 8 & 0xFF) 190 CODEC_MAGIC_BYTE4 = byte(codec.CODEC_MAGIC & 0xFF) 191 ) 192 193 func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) { 194 var stream IndexInput = nil 195 var entriesStream ChecksumIndexInput = nil 196 // read the first VInt. If it is negative, it's the version number 197 // otherwise it's the count (pre-3.1 indexes) 198 var success = false 199 defer func() { 200 if success { 201 err = util.Close(stream, entriesStream) 202 } else { 203 util.CloseWhileSuppressingError(stream, entriesStream) 204 } 205 }() 206 207 stream = handle.Clone() 208 // fmt.Printf("Reading from stream: %v\n", stream) 209 firstInt, err := stream.ReadVInt() 210 if err != nil { 211 return nil, err 212 } 213 // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible 214 // and separate norms/etc are outside of cfs. 215 if firstInt == int32(CODEC_MAGIC_BYTE1) { 216 if secondByte, err := stream.ReadByte(); err == nil { 217 if thirdByte, err := stream.ReadByte(); err == nil { 218 if fourthByte, err := stream.ReadByte(); err == nil { 219 if secondByte != CODEC_MAGIC_BYTE2 || 220 thirdByte != CODEC_MAGIC_BYTE3 || 221 fourthByte != CODEC_MAGIC_BYTE4 { 222 return nil, errors.New(fmt.Sprintf( 223 "Illegal/impossible header for CFS file: %v,%v,%v", 224 secondByte, thirdByte, fourthByte)) 225 } 226 } 227 } 228 } 229 if err != nil { 230 return nil, err 231 } 232 233 d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)) 234 if err != nil { 235 return nil, err 236 } 237 entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) 238 entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE) 239 if err != nil { 240 return nil, err 241 } 242 _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT) 243 if err != nil { 244 return nil, err 245 } 246 numEntries, err := entriesStream.ReadVInt() 247 if err != nil { 248 return nil, err 249 } 250 251 mapping = make(map[string]FileSlice) 252 // fmt.Printf("Entries number: %v\n", numEntries) 253 for i := int32(0); i < numEntries; i++ { 254 id, err := entriesStream.ReadString() 255 if err != nil { 256 return nil, err 257 } 258 if _, ok := mapping[id]; ok { 259 return nil, errors.New(fmt.Sprintf( 260 "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) 261 } 262 // log.Printf("Found entry: %v", id) 263 offset, err := entriesStream.ReadLong() 264 if err != nil { 265 return nil, err 266 } 267 length, err := entriesStream.ReadLong() 268 if err != nil { 269 return nil, err 270 } 271 mapping[id] = FileSlice{offset, length} 272 } 273 if d.version >= CFD_VERSION_CHECKSUM { 274 _, err = codec.CheckFooter(entriesStream) 275 } else { 276 err = codec.CheckEOF(entriesStream) 277 } 278 if err != nil { 279 return nil, err 280 } 281 } else { 282 // TODO remove once 3.x is not supported anymore 283 panic("not supported yet; will also be obsolete soon") 284 } 285 success = true 286 return mapping, nil 287 } 288 289 func int32ToInt(n int32, err error) (int, error) { 290 return int(n), err 291 }