github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/util.go (about) 1 package codec 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/util" 7 ) 8 9 // codecs/CodecUtil.java 10 11 /* Constant to identify the start of a codec header. */ 12 const CODEC_MAGIC = 0x3fd76c17 13 14 /* Constant to identify the start of a codec footer. */ 15 const FOOTER_MAGIC = ^CODEC_MAGIC 16 17 const FOOTER_LENGTH = 16 18 19 type DataOutput interface { 20 WriteInt(n int32) error 21 WriteString(s string) error 22 } 23 24 /* 25 Writes a codc header, which records both a string to identify the 26 file and a version number. This header can be parsed and validated 27 with CheckHeader(). 28 29 CodecHeader --> Magic,CodecName,Version 30 Magic --> uint32. This identifies the start of the header. It is 31 always CODEC_MAGIC. 32 CodecName --> string. This is a string to identify this file. 33 Version --> uint32. Records the version of the file. 34 35 Note that the length of a codec header depends only upon the name of 36 the codec, so this length can be computed at any time with 37 HeaderLength(). 38 */ 39 func WriteHeader(out DataOutput, codec string, version int) error { 40 assert(out != nil) 41 bytes := []byte(codec) 42 assert2(len(bytes) == len(codec) && len(bytes) < 128, 43 "codec must be simple ASCII, less than 128 characters in length [got %v]", codec) 44 err := out.WriteInt(CODEC_MAGIC) 45 if err == nil { 46 err = out.WriteString(codec) 47 if err == nil { 48 err = out.WriteInt(int32(version)) 49 } 50 } 51 return err 52 } 53 54 func assert(ok bool) { 55 assert2(ok, "assert fail") 56 } 57 58 func assert2(ok bool, msg string, args ...interface{}) { 59 if !ok { 60 panic(fmt.Sprintf(msg, args...)) 61 } 62 } 63 64 /* Computes the length of a codec header */ 65 func HeaderLength(codec string) int { 66 return 9 + len(codec) 67 } 68 69 type DataInput interface { 70 ReadInt() (int32, error) 71 ReadString() (string, error) 72 } 73 74 func CheckHeader(in DataInput, codec string, minVersion, maxVersion int32) (v int32, err error) { 75 // Safety to guard against reading a bogus string: 76 actualHeader, err := in.ReadInt() 77 if err != nil { 78 return 0, err 79 } 80 if actualHeader != CODEC_MAGIC { 81 return 0, errors.New(fmt.Sprintf( 82 "codec header mismatch: actual header=%v vs expected header=%v (resource: %v)", 83 actualHeader, CODEC_MAGIC, in)) 84 } 85 return CheckHeaderNoMagic(in, codec, minVersion, maxVersion) 86 } 87 88 func CheckHeaderNoMagic(in DataInput, codec string, minVersion, maxVersion int32) (v int32, err error) { 89 actualCodec, err := in.ReadString() 90 if err != nil { 91 return 0, err 92 } 93 if actualCodec != codec { 94 return 0, errors.New(fmt.Sprintf( 95 "codec mismatch: actual codec=%v vs expected codec=%v (resource: %v)", actualCodec, codec, in)) 96 } 97 98 actualVersion, err := in.ReadInt() 99 if err != nil { 100 return 0, err 101 } 102 if actualVersion < minVersion { 103 return 0, NewIndexFormatTooOldError(in, actualVersion, minVersion, maxVersion) 104 } 105 if actualVersion > maxVersion { 106 return 0, NewIndexFormatTooNewError(in, actualVersion, minVersion, maxVersion) 107 } 108 109 return actualVersion, nil 110 } 111 112 func NewIndexFormatTooNewError(in DataInput, version, minVersion, maxVersion int32) error { 113 return errors.New(fmt.Sprintf( 114 "Format version is not supported (resource: %v): %v (needs to be between %v and %v)", 115 in, version, minVersion, maxVersion)) 116 } 117 118 func NewIndexFormatTooOldError(in DataInput, version, minVersion, maxVersion int32) error { 119 return errors.New(fmt.Sprintf( 120 "Format version is not supported (resource: %v): %v (needs to be between %v and %v). This version of Lucene only supports indexes created with release 3.0 and later.", 121 in, version, minVersion, maxVersion)) 122 } 123 124 type IndexOutput interface { 125 WriteInt(n int32) error 126 WriteLong(n int64) error 127 Checksum() int64 128 } 129 130 /* 131 Writes a codec footer, which records both a checksum algorithm ID and 132 a checksum. This footer can be parsed and validated with CheckFooter(). 133 134 CodecFooter --> Magic,AlgorithmID,Checksum 135 - Magic --> uint32. This identifies the start of the footer. It is 136 always FOOTER_MAGIC. 137 - AlgorithmID --> uing32. This indicates the checksum algorithm 138 used. Currently this is always 0, for zlib-crc32. 139 - Checksum --> uint64. The actual checksum value for all previous 140 bytes in the stream, including the bytes from Magic and AlgorithmID. 141 */ 142 func WriteFooter(out IndexOutput) (err error) { 143 if err = out.WriteInt(FOOTER_MAGIC); err == nil { 144 if err = out.WriteInt(0); err == nil { 145 err = out.WriteLong(out.Checksum()) 146 } 147 } 148 return 149 } 150 151 type ChecksumIndexInput interface { 152 IndexInput 153 Checksum() int64 154 } 155 156 /* Validates the codec footer previously written by WriteFooter(). */ 157 func CheckFooter(in ChecksumIndexInput) (cs int64, err error) { 158 if err = validateFooter(in); err == nil { 159 cs = in.Checksum() 160 var cs2 int64 161 if cs2, err = in.ReadLong(); err == nil { 162 if cs != cs2 { 163 return 0, errors.New(fmt.Sprintf( 164 "checksum failed (hardware problem?): expected=%v actual=%v (resource=%v)", 165 util.ItoHex(cs2), util.ItoHex(cs), in)) 166 } 167 if in.FilePointer() != in.Length() { 168 return 0, errors.New(fmt.Sprintf( 169 "did not read all bytes from file: read %v vs size %v (resource: %v)", 170 in.FilePointer(), in.Length(), in)) 171 } 172 } 173 } 174 return 175 } 176 177 /* Returns (but does not validate) the checksum previously written by CheckFooter. */ 178 func RetrieveChecksum(in IndexInput) (int64, error) { 179 var err error 180 if err = in.Seek(in.Length() - FOOTER_LENGTH); err != nil { 181 return 0, err 182 } 183 if err = validateFooter(in); err != nil { 184 return 0, err 185 } 186 return in.ReadLong() 187 } 188 189 func validateFooter(in IndexInput) error { 190 magic, err := in.ReadInt() 191 if err != nil { 192 return err 193 } 194 if magic != FOOTER_MAGIC { 195 return errors.New(fmt.Sprintf( 196 "codec footer mismatch: actual footer=%v vs expected footer=%v (resource: %v)", 197 magic, FOOTER_MAGIC, in)) 198 } 199 200 algorithmId, err := in.ReadInt() 201 if err != nil { 202 return err 203 } 204 if algorithmId != 0 { 205 return errors.New(fmt.Sprintf( 206 "codec footer mismatch: unknown algorithmID: %v", 207 algorithmId)) 208 } 209 return nil 210 } 211 212 type IndexInput interface { 213 FilePointer() int64 214 Seek(int64) error 215 Length() int64 216 ReadInt() (int32, error) 217 ReadLong() (int64, error) 218 } 219 220 /* Checks that the stream is positioned at the end, and returns error if it is not. */ 221 func CheckEOF(in IndexInput) error { 222 if in.FilePointer() != in.Length() { 223 return errors.New(fmt.Sprintf( 224 "did not read all bytes from file: read %v vs size %v (resources: %v)", 225 in.FilePointer(), in.Length(), in)) 226 } 227 return nil 228 }